Java Code Examples for org.apache.pig.PigServer#dumpSchema()

The following examples show how to use org.apache.pig.PigServer#dumpSchema() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestProjectStarExpander.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
        + "f = foreach l1 generate * as (aa, bb, cc), *;"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString(
            "aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,10,20,30)",
                        "(11,21,31,11,21,31)",
                });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example 2
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeCross() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );") ;
    pig.registerQuery("c = cross a, b;") ;
    Schema dumpedSchema = pig.dumpSchema("c") ;
    Schema expectedSchema = Utils.getSchemaFromString("a::field1: int,a::field2: float,a::field3: chararray,b::field4: bytearray,b::field5: double,b::field6: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 3
Source File: BoundScript.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Describe the schema of an alias in this pipeline.
 * Results will be printed to stdout.
 * @param alias to be described
 * @throws IOException if describe fails.
 */
public void describe(String alias) throws IOException {
    if (queries.isEmpty()) {
        LOG.info("No bound query to describe");
        return;
    }
    PigServer pigServer = new PigServer(scriptContext.getPigContext(), false);
    registerQuery(pigServer, queries.get(0));
    pigServer.dumpSchema(alias);        
}
 
Example 4
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeDistinct() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = distinct a ;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 5
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeComplex() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (site: chararray, count: int, itemCounts: bag { itemCountsTuple: tuple (type: chararray, typeCount: int, f: float, m: map[]) } ) ;") ;
    pig.registerQuery("b = foreach a generate site, count, FLATTEN(itemCounts);") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString(
                "site: chararray,count: int," +
                "itemCounts::type: chararray,itemCounts::typeCount: int," +
                "itemCounts::f: float,itemCounts::m: map[ ]");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 6
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
public void testDescribeLoad() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    Schema dumpedSchema = pig.dumpSchema("a") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 7
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleLoad() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
            "[key#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/table_testSimpleLoad", input);

    String query =
        "  a = load '" + tmpDirName + "/table_testSimpleLoad' as (m:map[int]);";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("a");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[int]}");
    Iterator<Tuple> it = pig.openIterator("a");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Integer);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
    Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof Integer);
    Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));

    Assert.assertTrue(it.hasNext());
    t = it.next();
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Integer);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("2"));

    Assert.assertFalse(it.hasNext());
}
 
Example 8
Source File: TestUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testUDFMultiLevelOutputSchema() throws Exception {
    PigServer pig = new PigServer(cluster.getExecType(), cluster.getProperties());
    pig.registerQuery("A = LOAD 'a.txt';");
    pig.registerQuery("B = FOREACH A GENERATE org.apache.pig.test.utils.MultiLevelDerivedUDF1();");
    pig.registerQuery("C = FOREACH A GENERATE org.apache.pig.test.utils.MultiLevelDerivedUDF2();");
    pig.registerQuery("D = FOREACH A GENERATE org.apache.pig.test.utils.MultiLevelDerivedUDF3();");
    Schema s = pig.dumpSchema("B");
    assertTrue(s.getField(0).type == DataType.DOUBLE);
    s = pig.dumpSchema("C");
    assertTrue(s.getField(0).type == DataType.DOUBLE);
    s = pig.dumpSchema("D");
    assertTrue(s.getField(0).type == DataType.DOUBLE);
}
 
Example 9
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA with input relation having udfs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaInputUdfs() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);"
        + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);"
        + "f1 = foreach l1 generate i, CONCAT(j,j) as cj, " +
        		"org.apache.pig.test.TestUnionOnSchema\\$UDFTupleNullSchema(i,j) as uo;"
        + "u = union onschema f1, l2;"
    ; 
    Util.registerMultiLineQuery(pig, query);

    Schema sch = pig.dumpSchema("u");
    String expectedSch = "{i: int,cj: chararray,uo: (),j: chararray}";
    Assert.assertTrue( expectedSch.equals( sch.toString() ) );
    

    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,null,null,'2')",
                        "(5,null,null,'3')",
                        "(1,'22',(1,'2'),null)",
                        "(5,'33',(5,'3'),null)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example 10
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in both the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnNameBothInp2() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "   l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
        + " l2 = load '" + INP_FILE_2NUMS + "' as (i : int, x : chararray); " 
        + " cg1 = cogroup l1 by i, l2 by i; "
        + " f1 = foreach cg1 generate group as gkey, flatten(l1), flatten(l2); "
        + " cg2 = cogroup l2 by i, l1 by i; "
        + " f2 = foreach cg1 generate group as gkey, flatten(l2), flatten(l1); "
        + "u = union onschema f1, f2; " ; 
    Util.registerMultiLineQuery(pig, query);
            
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = 
        Utils.getSchemaFromString("gkey: int, l1::i: int, l1::j: int, l2::i: int, l2::x: chararray");
    assertEquals("Checking expected schema",sch, expectedSch);

    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,1,2,1,'2')",
                        "(5,5,3,5,'3')",
                        "(1,1,2,1,'2')",
                        "(5,5,3,5,'3')",
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
    
}
 
Example 11
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in both the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnNameBothInp1() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query = 
    "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
    + "g1 = group l1 by i; "
    + "f1 = foreach g1 generate group as gkey, flatten(l1); "
    + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, x : chararray); " 
    + "g2 = group l2 by i; "
    + "f2 = foreach g2 generate group as gkey, flatten(l2); "
    + "u = union onschema f1, f2; " ; 
    Util.registerMultiLineQuery(pig, query);
    
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = 
        Utils.getSchemaFromString("gkey: int, l1::i: int, l1::j: int, l2::i: int, l2::x: chararray");
    assertEquals("Checking expected schema",sch, expectedSch);

    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,1,2,null,null)",
                        "(5,5,3,null,null)",
                        "(1,null,null,1,'2')",
                        "(5,null,null,5,'3')"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example 12
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in one of the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnName() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query_prefix = 
    "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
    + "g = group l1 by i; "
    + "f = foreach g generate flatten(l1); "
    + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); ";

    String query = query_prefix + "u = union onschema f, l2; " ; 
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    Iterator<Tuple> it = pig.openIterator("u");

    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,2)",
                        "(5,3)",
                        "(1,2)",
                        "(5,3)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
    
    // now try reversing the order of relation
    query = query_prefix + "u = union onschema l2, f; " ; 
    Util.registerMultiLineQuery(pig, query);
    sch = pig.dumpSchema("u");
    expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    it = pig.openIterator("u");
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example 13
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeJoin() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = load 'b' as (field4, field5: double, field6: chararray );") ;
    pig.registerQuery("c = join a by field1, b by field4;") ;
    Schema dumpedSchema = pig.dumpSchema("c");
    Schema expectedSchema = Utils.getSchemaFromString("a::field1: int,a::field2: float,a::field3: chararray,b::field4: bytearray,b::field5: double,b::field6: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example 14
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA with operations after the union
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaFilter() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, x : int);"
        + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int);"
        + "u = union onschema l1, l2;"
        + "fil = filter u by i == 5 and (x is null or x != 1);"
    ; 
    Util.registerMultiLineQuery(pig, query);
    
    Schema sch = pig.dumpSchema("fil");
    Schema expectedSch = Utils.getSchemaFromString("i: int, x: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    

    Iterator<Tuple> it = pig.openIterator("fil");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(5,null,3)",
                        "(5,3,null)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example 15
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeForeachNoSchema() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' ;") ;
    pig.registerQuery("b = foreach a generate *;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    assertNull(dumpedSchema);
}
 
Example 16
Source File: TestUnion.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testCastingAfterUnionWithMultipleLoadersSameCaster()
    throws Exception {
    // Fields coming from different loaders but
    // having the same LoadCaster.
    File f1 = Util.createInputFile("tmp", "i1.txt", new String[] {"1\ta","2\tb","3\tc"});
    PigServer ps = new PigServer(ExecType.LOCAL, new Properties());
    // PigStorage and PigStorageWithStatistics have the same
    // LoadCaster(== Utf8StorageConverter)
    ps.registerQuery("A = load '" + Util.encodeEscape(f1.getAbsolutePath()) + "' as (a:bytearray, b:bytearray);");
    ps.registerQuery("B = load '" + Util.encodeEscape(f1.getAbsolutePath()) +
      "' using org.apache.pig.test.PigStorageWithStatistics() as (a:bytearray, b:bytearray);");
    ps.registerQuery("C = union onschema A,B;");
    ps.registerQuery("D = foreach C generate (int)a as a,(chararray)b as b;");
    // 'a' is coming from A and 'b' is coming from B; No overlaps.

    Schema dumpSchema = ps.dumpSchema("D");
    Schema expected = new Schema ();
    expected.add(new Schema.FieldSchema("a", DataType.INTEGER));
    expected.add(new Schema.FieldSchema("b", DataType.CHARARRAY));
    assertEquals(expected, dumpSchema);

    Iterator<Tuple> itr = ps.openIterator("D");
    int recordCount = 0;
    while(itr.next() != null)
        ++recordCount;
    assertEquals(6, recordCount);

}
 
Example 17
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeTuple2Elem() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: int, field3: int );") ;
    pig.registerQuery("b = foreach a generate field1, (field2, field3);") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    assertTrue(dumpedSchema.getField(0).type==DataType.INTEGER);
    assertTrue(dumpedSchema.getField(1).type==DataType.TUPLE);
}
 
Example 18
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testComplexCast() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#{(1,2),(1,3)},134#]",
            "[key2#]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testComplexCast", input);

    String query = "a = load '" + tmpDirName + "/testComplexCast' as (m);" +
        "b = foreach a generate ([{(i:int,j:int)}])m;";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("b");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[{(i: int,j: int)}]}");
    Iterator<Tuple> it = pig.openIterator("b");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("134"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataBag);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("{(1,2),(1,3)}"));
    Assert.assertTrue(((Map)t.get(0)).get("134")==null);

    Assert.assertTrue(it.hasNext());
    t = it.next();
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key2")==null);

    Assert.assertFalse(it.hasNext());
}
 
Example 19
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 4 votes vote down vote up
private void checkSchemaEquals(String query, Schema expectedSch) throws IOException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("u");
    assertEquals("Checking expected schema", expectedSch, sch);      
}
 
Example 20
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Test UNION ONSCHEMA with udf whose default type is different from
 * final type - where udf is not in immediate input of union
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaUdfTypeEvolution2() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query_prefix =
        "  l1 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " 
        + "  (i : int, c : chararray, j : int " 
        +       ", b : bag { t : tuple (c1 : int, c2 : chararray)}" 
        +       ", t : tuple (tc1 : int, tc2 : chararray) );"
        + " l2 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " 
        + "  (i : int, c : chararray, j : int " 
        +       ", b : bag { t : tuple (c1 : int, c2 : chararray)}" 
        +       ", t : tuple (tc1 : int, tc2 : chararray) );"
        + "f1 = foreach l1 generate i, MAX(b.c1) as mx;"
        + "f11 = foreach f1 generate i, mx;"
        + "f2 = foreach l2 generate i, COUNT(b.c1) as mx;"
        + "f22 = foreach f2 generate i, mx;"

    ; 
    String query = query_prefix  + "u = union onschema f11, f22;";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = 
        Utils.getSchemaFromString("i: int, mx: long");
    assertEquals("Checking expected schema",sch, expectedSch);
    
    // verify schema for reverse order of relations as well
    query = query_prefix  + "u = union onschema f22, f11;";
    Util.registerMultiLineQuery(pig, query);
    sch = pig.dumpSchema("u");
    expectedSch = 
        Utils.getSchemaFromString("i: int, mx: long");
    assertEquals("Checking expected schema",sch, expectedSch);
    
    
    Iterator<Tuple> it = pig.openIterator("u");
    
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,1L)",
                        "(5,2L)",
                        "(1,2L)",
                        "(5,2L)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}