org.apache.pig.parser.ParserException Java Examples

The following examples show how to use org.apache.pig.parser.ParserException. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestProjectRange.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * -ve test cases
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testNegativeForeachWSchema() throws IOException, ParserException {
    String query;
    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : float, c : int, d : int, e : int);"
        + "f = foreach l1 generate  $3 .. $1;"
        ;
    Util.checkExceptionMessage(query, "f",
            "start column appears after end column in range projection");

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : float, c : int, d : int, e : int);"
        + "f = foreach l1 generate  c .. b;"
        ;
    Util.checkExceptionMessage(query, "f",
            "start column appears after end column in range projection");
}
 
Example #2
Source File: TestTypeCheckingValidatorNewLP.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testDereferenceTypeSet() throws IOException, ParserException {
    String query = "a = load 'a' as (i : int, j : int);"
    + " b = foreach a generate i, j/10.1 as jd;"
    + " c = group b by i;"
    + " d = foreach c generate MAX(b.jd) as mx;";

    PigServer pig = new PigServer(ExecType.LOCAL);
    Util.registerMultiLineQuery(pig, query);

    Schema expectedSch =
        Utils.getSchemaFromString("mx: double");
    Schema sch = pig.dumpSchema("d");
    assertEquals("Checking expected schema", expectedSch, sch);

}
 
Example #3
Source File: TestTypeCheckingValidatorNewLP.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testCastEmptyInnerSchema() throws IOException, ParserException{
    final String INP_FILE = "testCastEmptyInnerSchema.txt";
    PrintWriter w = new PrintWriter(new FileWriter(INP_FILE));
    w.println("(1,2)");
    w.println("(2,3)");
    w.close();
    PigServer pigServer = new PigServer(LOCAL);

    String query = "a = load '" + INP_FILE + "' as (t:tuple());" +
    "b = foreach a generate (tuple(int, long))t;" +
    "c = foreach b generate t.$0 + t.$1;";

    Util.registerMultiLineQuery(pigServer, query);

    List<Tuple> expectedRes =
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(3L)",
                        "(5L)",
                });
    Iterator<Tuple> it = pigServer.openIterator("c");
    Util.checkQueryOutputs(it, expectedRes);
}
 
Example #4
Source File: TestTupleRecordConsumer.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testArtSchema() throws ExecException, ParserException {

  String pigSchemaString =
          "DocId:long, " +
          "Links:(Backward:{(long)}, Forward:{(long)}), " +
          "Name:{(Language:{(Code:chararray,Country:chararray)}, Url:chararray)}";

  SimpleGroup g = new SimpleGroup(getMessageType(pigSchemaString));
  g.add("DocId", 1l);
  Group links = g.addGroup("Links");
  links.addGroup("Backward").addGroup("bag").add(0, 1l);
  links.addGroup("Forward").addGroup("bag").add(0, 1l);
  Group name = g.addGroup("Name").addGroup("bag");
  name.addGroup("Language").addGroup("bag").append("Code", "en").append("Country", "US");
  name.add("Url", "http://foo/bar");

  testFromGroups(pigSchemaString, Arrays.<Group>asList(g));
}
 
Example #5
Source File: TestProjectStarExpander.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
        + "f = foreach l1 generate * as (aa, bb, cc), *;"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString(
            "aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,10,20,30)",
                        "(11,21,31,11,21,31)",
                });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example #6
Source File: GroovyEvalFunc.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public Schema outputSchema(Schema input) {
  if (null != this.schemaFunction) {
    try {
      Tuple t = TupleFactory.getInstance().newTuple(1);
      // Strip enclosing '{}' from schema
      t.set(0, input.toString().replaceAll("^\\{", "").replaceAll("\\}$", ""));
      return Utils.getSchemaFromString((String) this.schemaFunction.exec(t));
    } catch (ParserException pe) {
      throw new RuntimeException(pe);
    } catch (IOException ioe) {
      throw new RuntimeException(ioe);
    }
  } else {
    return this.schema;
  }
}
 
Example #7
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test UNION ONSCHEMA on two inputs with same column names, but different
 * numeric types - test type promotion
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaDiffNumType() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : double);"
        + "l2 = load '" + INP_FILE_2NUMS + "' as (i : long, j : float);"
        + "u = union onschema l1, l2;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    Iterator<Tuple> it = pig.openIterator("u");
    
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1L,2.0)",
                        "(5L,3.0)",
                        "(1L,2.0)",
                        "(5L,3.0)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #8
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test UNION ONSCHEMA on two inputs with no common columns
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaNoCommonCols() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int);"
        + "l2 = load '" + INP_FILE_2NUMS + "' as (x : long, y : float);"
        + "u = union onschema l1, l2;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    Iterator<Tuple> it = pig.openIterator("u");
    
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,2,null,null)",
                        "(5,3,null,null)",
                        "(null,null,1L,2.0F)",
                        "(null,null,5L,3.0F)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #9
Source File: TestForEachStar.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testForeachStarSchemaUnkown() throws IOException, ParserException{
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INPUT_FILE + "' ;"
        + "f1 = foreach l1 generate * ;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    pig.explain("f1",System.out);
    Iterator<Tuple> it = pig.openIterator("f1");
    
    
    Tuple expectedResCharArray = (Tuple)Util.getPigConstant("('one','two')");
    Tuple expectedRes = TupleFactory.getInstance().newTuple();
    for(Object field :  expectedResCharArray.getAll() ){
        expectedRes.append(new DataByteArray(field.toString()));
    }
    assertTrue("has output", it.hasNext());
    assertEquals(expectedRes, it.next());
}
 
Example #10
Source File: TestSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testGetInitialSchemaStringFromSchema() throws ParserException {
    String[] schemaStrings = {
            "my_list:{array:(array_element:(num1:int,num2:int))}",
            "my_list:{array:(array_element:(num1:int,num2:int),c:chararray)}",
            "bag:{mytuple3:(mytuple2:(mytuple:(f1:int)))}",
            "bag:{mytuple:(f1:int)}",
            "{((num1:int,num2:int))}"
    };
    for (String schemaString : schemaStrings) {
        String s1 = Utils.getSchemaFromString(schemaString).toString();
        //check if we get back the initial schema string
        String s2 = s1.substring(1, s1.length() - 1).replaceAll("\\s|bag_0:|tuple_0:", "");
        assertTrue(schemaString.equals(s2));
    }
}
 
Example #11
Source File: TestRelationToExprProject.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testFilterCount3() throws IOException, ParserException {
    String query = "TESTDATA =  load '"+TEST_FILTER_COUNT3_INPUT+"' using PigStorage() as (timestamp:chararray, testid:chararray, userid: chararray, sessionid:chararray, value:long, flag:int);" +
            "TESTDATA_FILTERED = filter TESTDATA by (timestamp gte '1230800400000' and timestamp lt '1230804000000' and value != 0);" +
            "TESTDATA_GROUP = group TESTDATA_FILTERED by testid;" +
            "TESTDATA_AGG = foreach TESTDATA_GROUP {" +
            "                        A = filter TESTDATA_FILTERED by (userid eq sessionid);" +
            "                        C = distinct A.userid;" +
            "                        generate group as testid, COUNT(TESTDATA_FILTERED) as counttestdata, COUNT(C) as distcount, SUM(TESTDATA_FILTERED.flag) as total_flags;" +
            "                }" +
            "TESTDATA_AGG_1 = group TESTDATA_AGG ALL;" +
            "TESTDATA_AGG_2 = foreach TESTDATA_AGG_1 generate COUNT(TESTDATA_AGG);" ;
    pigServer.registerQuery(query);
    Iterator<Tuple> it = pigServer.openIterator("TESTDATA_AGG_2");

    int i = 0;
    while(it.hasNext()) {
        Tuple actual = it.next();
        assertEquals(20l, actual.get(0));
        i++;
    }
    assertEquals(1, i);
}
 
Example #12
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test UNION ONSCHEMA with cast from bytearray to another type
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaCastOnByteArray() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i, j);"
        + " f1 = foreach l1 generate (int)i, (int)j;"
        + "u = union onschema f1, l1;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    Iterator<Tuple> it = pig.openIterator("u");
    
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,2)",
                        "(5,3)",
                        "(1,2)",
                        "(5,3)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #13
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testComplexCast2() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testComplexCast2", input);

    String query = "a = load '" + tmpDirName + "/testComplexCast2' as (m:[int]);" +
        "b = foreach a generate ([long])m;";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("b");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[long]}");
    Iterator<Tuple> it = pig.openIterator("b");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Long);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
    Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof Long);
    Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));

    Assert.assertFalse(it.hasNext());
}
 
Example #14
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleLoad() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
            "[key#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/table_testSimpleLoad", input);

    String query =
        "  a = load '" + tmpDirName + "/table_testSimpleLoad' as (m:map[int]);";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("a");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[int]}");
    Iterator<Tuple> it = pig.openIterator("a");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Integer);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
    Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof Integer);
    Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));

    Assert.assertTrue(it.hasNext());
    t = it.next();
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Integer);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("2"));

    Assert.assertFalse(it.hasNext());
}
 
Example #15
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testComplexLoad() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#{(1,2),(1,3)},134#]",
            "[key2#]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testComplexLoad", input);

    String query = "a = load '" + tmpDirName + "/testComplexLoad' as (m:map[bag{(i:int,j:int)}]);";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("a");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[{(i: int,j: int)}]}");
    Iterator<Tuple> it = pig.openIterator("a");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("134"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataBag);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("{(1,2),(1,3)}"));
    Assert.assertTrue(((Map)t.get(0)).get("134")==null);

    Assert.assertTrue(it.hasNext());
    t = it.next();
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key2")==null);

    Assert.assertFalse(it.hasNext());
}
 
Example #16
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleMapCast() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
            "[key#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testSimpleMapCast", input);

    String query =
        "a = load '" + tmpDirName + "/testSimpleMapCast' as (m);" +
        "b = foreach a generate ([int])m;";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("b");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[int]}");
    Iterator<Tuple> it = pig.openIterator("b");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Integer);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("1"));
    Assert.assertTrue(((Map)t.get(0)).get("key2") instanceof Integer);
    Assert.assertTrue(((Map)t.get(0)).get("key2").toString().equals("2"));

    Assert.assertTrue(it.hasNext());
    t = it.next();
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof Integer);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("2"));

    Assert.assertFalse(it.hasNext());
}
 
Example #17
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleMapKeyLookup() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
            "[key#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testSimpleMapKeyLookup", input);

    String query =
        "a = load '" + tmpDirName + "/testSimpleMapKeyLookup' as (m:map[int]);" +
        "b = foreach a generate m#'key';";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("b");
    assertEquals("Checking expected schema",sch.toString(), "{int}");
    Iterator<Tuple> it = pig.openIterator("b");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue((Integer)t.get(0)==1);

    Assert.assertTrue(it.hasNext());
    t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue((Integer)t.get(0)==2);

    Assert.assertFalse(it.hasNext());
}
 
Example #18
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testComplexCast() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#{(1,2),(1,3)},134#]",
            "[key2#]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testComplexCast", input);

    String query = "a = load '" + tmpDirName + "/testComplexCast' as (m);" +
        "b = foreach a generate ([{(i:int,j:int)}])m;";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("b");
    assertEquals("Checking expected schema",sch.toString(), "{m: map[{(i: int,j: int)}]}");
    Iterator<Tuple> it = pig.openIterator("b");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue(t.get(0) instanceof Map);
    Assert.assertTrue(((Map)t.get(0)).containsKey("key"));
    Assert.assertTrue(((Map)t.get(0)).containsKey("134"));
    Assert.assertTrue(((Map)t.get(0)).get("key") instanceof DataBag);
    Assert.assertTrue(((Map)t.get(0)).get("key").toString().equals("{(1,2),(1,3)}"));
    Assert.assertTrue(((Map)t.get(0)).get("134")==null);

    Assert.assertTrue(it.hasNext());
    t = it.next();
    Assert.assertTrue(((Map)t.get(0)).containsKey("key2"));
    Assert.assertTrue(((Map)t.get(0)).get("key2")==null);

    Assert.assertFalse(it.hasNext());
}
 
Example #19
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA with bytearray type 
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaByteArrayConversions() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        " l1 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " 
        + "  (i : bytearray, x : bytearray, j : bytearray " 
        +       ", b : bytearray); "
        + "l2 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " 
        + "  (i : long, c : chararray, j : int " 
        +       ", b : bag { t : tuple (c1 : int, c2 : chararray)} ); "
        + "u = union onSchema l1, l2;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    pig.explain("u", System.out);

    Iterator<Tuple> it = pig.openIterator("u");
    
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1L,null,2,{(1,'a'),(1,'b')},'abc')",
                        "(1L,'abc',2,{(1,'a'),(1,'b')},null)",
                        "(5L,null,3,{(2,'a'),(2,'b')},'def')",
                        "(5L,'def',3,{(2,'a'),(2,'b')},null)",                            
                });
    //update expectedRes to use bytearray instead of chararray in 2nd field
    for(Tuple t : expectedRes){
        if(t.get(1) != null){
            t.set(1, new DataByteArray(t.get(1).toString()));
        }
    }
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example #20
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA on 3 inputs 
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchema3Inputs() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); "
        + "l2 = load '" + INP_FILE_2NUMS + "' as (i : double, x : int); "            
        + "l3 = load '" + INP_FILE_2NUM_1CHAR_1BAG + "' as " 
        + "  (i : long, c : chararray, j : int " 
        +       ", b : bag { t : tuple (c1 : int, c2 : chararray)} ); "
        + "u = union onschema l1, l2, l3;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    pig.explain("u", System.out);

    Iterator<Tuple> it = pig.openIterator("u");
    
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1.0,2,null,null,null)",
                        "(5.0,3,null,null,null)",
                        "(1.0,null,2,null,null)",
                        "(5.0,null,3,null,null)",
                        "(1.0,2,null,'abc',{(1,'a'),(1,'b')})",
                        "(5.0,3,null,'def',{(2,'a'),(2,'b')})",

                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example #21
Source File: TestProjectRange.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangeJoinMixNOSchema() throws IOException, ParserException{
    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "';" +
        "  l2 = load '" + INP_FILE_5FIELDS + "';" +
        "  j = join l1 by  $0 .. $3,  l2 by $0 .. $3;"
        ;

    compileAndCompareSchema((Schema)null, query, "j");

    //check number of group expression plans
    LogicalPlan lp = createAndProcessLPlan(query);
    checkNumExpressionPlansForJoin(lp, 4);

    Util.registerMultiLineQuery(pigServer, query);

    List<Tuple> expectedRes =
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,40,50,10,20,30,40,50)",
                        "(11,21,31,41,51,11,21,31,41,51)",
                });
    Iterator<Tuple> it = pigServer.openIterator("j");
    Util.checkQueryOutputs(it, expectedRes);
}
 
Example #22
Source File: TestProjectStarExpander.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testProjectStarForeach() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    
    //specifying the new aliases only for initial set of fields
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int, d : int, e : int);"
        + "f = foreach l1 generate * as (aa, bb, cc);"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString("aa : int, bb : int, cc : int, d : int, e : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    
    //specifying aliases for all fields
    query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int, d : int, e : int);"
        + "f = foreach l1 generate * as (aa, bb, cc, dd, ee);"
    ; 
    Util.registerMultiLineQuery(pig, query);
    
    expectedSch = Utils.getSchemaFromString("aa : int, bb : int, cc : int, dd : int, ee : int");
    sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    Iterator<Tuple> it = pig.openIterator("f");
    
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,40,50)",
                        "(11,21,31,41,51)",
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #23
Source File: TestProjectRange.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
  * Test multiple different types of range-project with foreach
  * @throws IOException
  * @throws ParserException
  */
@Test
 public void testMixRangeForeachWSchema() throws IOException, ParserException {

     //specifying the new aliases
     String query;
     query =
         "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : float, c : int, d : int, e : int);"
         + "f = foreach l1 generate  .. b, c .. d, d .. as (aa, bb);"
         ;
     compileAndCompareSchema("a : int, b : float, c : int, d : int, aa : int, bb : int", query, "f");


     // without aliases
     query =
         "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int, d : long, e : int);"
         + "f = foreach l1 generate ..$0 as (first), e.. as (last), d ..,  .. b ;"
         ;
     compileAndCompareSchema("first : int, last : int, d : long, e : int, a : int, b : int", query, "f");
     Iterator<Tuple> it = pigServer.openIterator("f");

     List<Tuple> expectedRes =
         Util.getTuplesFromConstantTupleStrings(
                 new String[] {
                         "(10,50,40L,50,10,20)",
                         "(11,51,41L,51,11,21)",
                 });
     Util.checkQueryOutputsAfterSort(it, expectedRes);

 }
 
Example #24
Source File: TestProjectRange.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * some transformations to schema, because the parser does not accept
 * group as a column name in schema, and to add tuple within bag schema
 * @param expectedSchStr
 * @return
 * @throws ParserException
 * @throws FrontendException
 */
private Schema getCleanedGroupSchema(String expectedSchStr) throws ParserException, FrontendException {
    Schema expectedSch = Utils.getSchemaFromString(expectedSchStr);
    expectedSch.getField(0).alias = "group";
    if(expectedSch.size() > 1)
        expectedSch.getField(1).schema.getField(0).alias = null;
    if(expectedSch.size() > 2)
        expectedSch.getField(2).schema.getField(0).alias = null;
    expectedSch = org.apache.pig.newplan.logical.Util.fixSchemaAddTupleInBag(expectedSch);
    return expectedSch;
}
 
Example #25
Source File: VespaQuerySchema.java    From vespa with Apache License 2.0 5 votes vote down vote up
public static Schema getPigSchema(String schemaString) {
    Schema schema = null;
    schemaString = schemaString.replace("/", "_");
    schemaString = "{(" + schemaString + ")}";
    try {
        schema = Utils.getSchemaFromString(schemaString);
    } catch (ParserException e) {
        e.printStackTrace();
    }
    return schema;
}
 
Example #26
Source File: TestProjectRange.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangeOrderByMixWSchema() throws IOException, ParserException{
    String query;

    query =
        "  l1 = load '" + INP_FILE_5FIELDS +
                "' as (a : int, b : long, c : int, d : int, e : int);"
        + " o = order l1 by  b .. c, d .. DESC,  a DESC;"
        ;
    compileAndCompareSchema("a : int, b : long, c : int, d : int, e : int", query, "o");

    //check number of sort expression plans

    LogicalPlan lp = createAndProcessLPlan(query);
    boolean[] isAsc = {true,true,false,false,false};
    checkNumExpressionPlansForSort(lp, 5, isAsc);

    Util.registerMultiLineQuery(pigServer, query);

    Iterator<Tuple> it = pigServer.openIterator("o");

    List<Tuple> expectedRes =
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,40,50)",
                        "(11,21,31,41,51)",
                });
    Util.checkQueryOutputs(it, expectedRes);
}
 
Example #27
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in one of the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnName() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query_prefix = 
    "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
    + "g = group l1 by i; "
    + "f = foreach g generate flatten(l1); "
    + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); ";

    String query = query_prefix + "u = union onschema f, l2; " ; 
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    Iterator<Tuple> it = pig.openIterator("u");

    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,2)",
                        "(5,3)",
                        "(1,2)",
                        "(5,3)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
    
    // now try reversing the order of relation
    query = query_prefix + "u = union onschema l2, f; " ; 
    Util.registerMultiLineQuery(pig, query);
    sch = pig.dumpSchema("u");
    expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    it = pig.openIterator("u");
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #28
Source File: TestSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetStringFromSchema() throws ParserException {
    String[] schemaStrings = {
        "a:int",
        "a:long",
        "a:chararray",
        "a:double",
        "a:float",
        "a:bytearray",
        "b:bag{tuple(x:int,y:int,z:int)}",
        "b:bag{t:tuple(x:int,y:int,z:int)}",
        "a:int,b:chararray,c:Map[int]",
        "a:double,b:float,t:tuple(x:int,y:double,z:bytearray)",
        "a:double,b:float,t:tuple(x:int,b:bag{t:tuple(a:int,b:float,c:double,x:tuple(z:bag{r:tuple(z:bytearray)}))},z:bytearray)",
        "a,b,t:tuple(x,b:bag{t:tuple(a,b,c,x:tuple(z:bag{r:tuple(z)}))},z)",
        "a:bag{t:tuple(a:bag{t:tuple(a:bag{t:tuple(a:bag{t:tuple(a:bag{t:tuple(a:bag{t:tuple(a:int,b:float)})})})})})}",
        "a:bag{}",
        "b:{null:(a:int)}",
        "int,int,int,int,int,int,int,int,int,int",
        "long,long,long,long,long,long,long,long,long,long",
        "float,float,float,float,float,float,float,float,float,float",
        "double,double,double,double,double,double,double,double,double,double",
        "boolean,boolean,boolean,boolean,boolean,boolean,boolean,boolean,boolean,boolean",
        "datetime,datetime,datetime,datetime,datetime,datetime,datetime,datetime,datetime,datetime",
        "{},{},{},{},{},{},{},{},{},{}",
        "map[],map[],map[],map[],map[],map[],map[],map[],map[],map[]",
        "int,int,long,long,float,float,double,double,boolean,boolean,datetime,datetime,(int,long,float,double,boolean,datetime),{(int,long,float,double,boolean,datetime)},map[(int,long,float,double,boolean,datetime)]"
    };
    for (String schemaString : schemaStrings) {
        Schema s1 = Utils.getSchemaFromString(schemaString);
        String s=s1.toString();
        Schema s2 = Utils.getSchemaFromBagSchemaString(s); // removes outer curly-braces added by Schema#toString
        assertTrue(Schema.equals(s1,s2,false,true));
    }
}
 
Example #29
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in both the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnNameBothInp1() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query = 
    "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
    + "g1 = group l1 by i; "
    + "f1 = foreach g1 generate group as gkey, flatten(l1); "
    + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, x : chararray); " 
    + "g2 = group l2 by i; "
    + "f2 = foreach g2 generate group as gkey, flatten(l2); "
    + "u = union onschema f1, f2; " ; 
    Util.registerMultiLineQuery(pig, query);
    
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = 
        Utils.getSchemaFromString("gkey: int, l1::i: int, l1::j: int, l2::i: int, l2::x: chararray");
    assertEquals("Checking expected schema",sch, expectedSch);

    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,1,2,null,null)",
                        "(5,5,3,null,null)",
                        "(1,null,null,1,'2')",
                        "(5,null,null,5,'3')"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example #30
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA with input relation having udfs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaInputUdfs() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);"
        + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);"
        + "f1 = foreach l1 generate i, CONCAT(j,j) as cj, " +
        		"org.apache.pig.test.TestUnionOnSchema\\$UDFTupleNullSchema(i,j) as uo;"
        + "u = union onschema f1, l2;"
    ; 
    Util.registerMultiLineQuery(pig, query);

    Schema sch = pig.dumpSchema("u");
    String expectedSch = "{i: int,cj: chararray,uo: (),j: chararray}";
    Assert.assertTrue( expectedSch.equals( sch.toString() ) );
    

    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,null,null,'2')",
                        "(5,null,null,'3')",
                        "(1,'22',(1,'2'),null)",
                        "(5,'33',(5,'3'),null)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}