org.apache.pig.ExecType Java Examples

The following examples show how to use org.apache.pig.ExecType. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: PerfTest.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static void load(String out, int colsToLoad) throws ExecException, IOException {
  long t0 = System.currentTimeMillis();
  StringBuilder schemaString = new StringBuilder("a0: chararray");
  for (int i = 1; i < colsToLoad; i++) {
    schemaString.append(", a" + i + ": chararray");
  }
  PigServer pigServer = new PigServer(ExecType.LOCAL);
  pigServer.registerQuery("B = LOAD '"+out+"' USING "+ParquetLoader.class.getName()+"('"+schemaString+"');");
  pigServer.registerQuery("C = FOREACH (GROUP B ALL) GENERATE COUNT(B);");
  Iterator<Tuple> it = pigServer.openIterator("C");
  if (!it.hasNext()) {
    throw new RuntimeException("Job failed: no tuple to read");
  }
  Long count = (Long)it.next().get(0);

  assertEquals(ROW_COUNT, count.longValue());
  long t1 = System.currentTimeMillis();
  results.append((t1-t0)+" ms to read "+colsToLoad+" columns\n");
}
 
Example #2
Source File: TestPigServerWithMacros.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testInlineMacro() throws Throwable {
    PigServer pig = new PigServer(ExecType.LOCAL);

    Storage.Data data = resetData(pig);
    data.set("some_path", "(l:chararray)", tuple("first row"), tuple("second row"));

    pig.registerQuery("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };");
    pig.registerQuery("a = load 'some_path' USING mock.Storage();");
    pig.registerQuery("b = row_count(a);");
    Iterator<Tuple> iter = pig.openIterator("b");

    assertEquals(2L, ((Long)iter.next().get(0)).longValue());

    pig.shutdown();
}
 
Example #3
Source File: TestCase.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Verify that FrontendException is thrown when when expression is missing.
 * @throws Exception
 */
@Test(expected = FrontendException.class)
public void testMissingElseExpression() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple(1),
            tuple(2),
            tuple(3),
            tuple(4),
            tuple(5)
            );

    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE (" +
            "  CASE i % 3" +
            "    WHEN 0 THEN '3n'" +
            "    WHEN 1 THEN '3n+1'" +
            "    ELSE " + // No else expression
            "  END" +
            ");");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    fail("FrontendException must be thrown for invalid case statement");
}
 
Example #4
Source File: SparqlQueryPigEngineTest.java    From rya with Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
    super.setUp();
    SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor();
    visitor.setTablePrefix("l_");
    visitor.setInstance("stratus");
    visitor.setZk("stratus13:2181");
    visitor.setUser("root");
    visitor.setPassword("password");

    engine = new SparqlQueryPigEngine();
    engine.setSparqlToPigTransformVisitor(visitor);
    engine.setExecType(ExecType.LOCAL);
    engine.setInference(false);
    engine.setStats(false);
    engine.init();
}
 
Example #5
Source File: TestPigServer.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test(expected = RuntimeException.class)
public void testLocationStrictCheck() throws ExecException, IOException {
    Properties properties = PropertiesUtil.loadDefaultProperties();
    properties.setProperty("pig.location.check.strict", "true");
    PigServer pigServer = new PigServer(ExecType.LOCAL, properties);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple("a", 1, "b"),
            tuple("b", 2, "c"),
            tuple("c", 3, "d"));

    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);");
    pigServer.registerQuery("B = order A by f1,f2,f3 DESC;");
    pigServer.registerQuery("C = order A by f1,f2,f3;");
    // Storing to same location 'bar' should throw a RuntimeException
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    pigServer.registerQuery("STORE C INTO 'bar' USING mock.Storage();");

    List<Tuple> out = data.get("bar");
    assertEquals(tuple("a", 1, "b"), out.get(0));
    assertEquals(tuple("b", 2, "c"), out.get(1));
    assertEquals(tuple("c", 3, "d"), out.get(2));
}
 
Example #6
Source File: TestUDF.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testNormalDefine() throws Exception {
    String input = "udf_test_jira_2430_2.txt";
    Util.createLocalInputFile(input, new String[]{"1"});
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    pigServer.registerQuery("A = LOAD '"+input+"' as (x:int);");
    pigServer.registerQuery("DEFINE udftest1 org.apache.pig.test.TestUDF$UdfWithFuncSpecWithArgs('1');");
    pigServer.registerQuery("DEFINE udftest2 org.apache.pig.test.TestUDF$UdfWithFuncSpecWithArgs('2');");
    pigServer.registerQuery("DEFINE udftest3 org.apache.pig.test.TestUDF$UdfWithFuncSpecWithArgs('3');");
    pigServer.registerQuery("B = FOREACH A GENERATE udftest1(x), udftest2(x), udftest3(x);");
    Iterator<Tuple> its = pigServer.openIterator("B");
    Tuple t = its.next();
    assertEquals(Integer.valueOf(1),t.get(0));
    assertEquals(Integer.valueOf(2),t.get(1));
    assertEquals(Integer.valueOf(3),t.get(2));
}
 
Example #7
Source File: TestProjectStarExpander.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
        + "f = foreach l1 generate * as (aa, bb, cc), *;"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString(
            "aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,10,20,30)",
                        "(11,21,31,11,21,31)",
                });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example #8
Source File: TestPigServer.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPigTempDir() throws Throwable {
    Properties properties = PropertiesUtil.loadDefaultProperties();
    File pigTempDir = new File(tempDir, FILE_SEPARATOR + "tmp" + FILE_SEPARATOR + "test");
    properties.put("pig.temp.dir", pigTempDir.getPath());
    PigContext pigContext=new PigContext(ExecType.LOCAL, properties);
    pigContext.connect();
    FileLocalizer.setInitialized(false);

    String tempPath= FileLocalizer.getTemporaryPath(pigContext).toString();
    Path path = new Path(tempPath);
    assertTrue(tempPath.startsWith(pigTempDir.toURI().toString()));

    FileSystem fs = FileSystem.get(path.toUri(),
            ConfigurationUtil.toConfiguration(pigContext.getProperties()));
    FileStatus status = fs.getFileStatus(path.getParent());
    // Temporary root dir should have 700 as permission
    assertEquals("rwx------", status.getPermission().toString());
    pigTempDir.delete();
    FileLocalizer.setInitialized(false);
}
 
Example #9
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadingSingleFileNoProjections() throws IOException {
    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString
            + ";");

    Iterator<Tuple> result = server.openIterator("a");

    int count = 0;
    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(3, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(simpleRowCount, count);
}
 
Example #10
Source File: TestMergeJoinOuter.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testFailure() throws Exception{
    String query = "A = LOAD 'data1' using "+ DummyCollectableLoader.class.getName() +"() as (id, name, grade);" +
    "E = group A by id;" +
    "B = LOAD 'data2' using "+ DummyIndexableLoader.class.getName() +"() as (id, name, grade);" +
    "C = join E by A.id, B by id using 'merge';" +
    "store C into 'output';";
    LogicalPlan lp = Util.buildLp(pigServer, query);
    Operator op = lp.getSinks().get(0);
    LOJoin join = (LOJoin)lp.getPredecessors(op).get(0);
    assertEquals(LOJoin.JOINTYPE.MERGE, join.getJoinType());

    PigContext pc = new PigContext(ExecType.MAPREDUCE,cluster.getProperties());
    pc.connect();
    boolean exceptionCaught = false;
    try{
        Util.buildPp(pigServer, query);   
    }catch (FrontendException e){
        assertEquals(1103,e.getErrorCode());
        exceptionCaught = true;
    }
    assertTrue(exceptionCaught);
}
 
Example #11
Source File: TestProject.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testMissingCols1() throws Exception {
    String inputFileName = "TestProject-testMissingCols1-input.txt";
    String input[] = { "hello\tworld", "good\tbye" };
    Util.createLocalInputFile(inputFileName, input);
    String query = "a = load '" + inputFileName
            + "' as (s1:chararray, s2:chararray, extra:chararray);" +
            "b = foreach a generate s1, s2, extra;";

    PigServer ps = new PigServer(ExecType.LOCAL);
    Util.registerMultiLineQuery(ps, query);
    Iterator<Tuple> it = ps.openIterator("b");
    Tuple[] expectedResults = new Tuple[] {
                    (Tuple)Util.getPigConstant("('hello', 'world', null)"),
                    (Tuple)Util.getPigConstant("('good', 'bye', null)")
    };
    int i = 0;
    while (it.hasNext()) {
        assertEquals(expectedResults[i++], it.next());
    }
}
 
Example #12
Source File: TestAssert.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Verify that ASSERT operator works. Disable fetch for this testcase.
 * @throws Exception
 */
@Test
public void testNegativeWithoutFetch() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple(1),
            tuple(2),
            tuple(3)
            );

    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("ASSERT A BY i > 1 , 'i should be greater than 1';");

    try {
        pigServer.openIterator("A");
    } catch (FrontendException fe) {
        Assert.assertTrue(fe.getCause().getMessage().contains(
                "Job terminated with anomalous status FAILED"));
    }
}
 
Example #13
Source File: TestMapSideCogroup.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testFailure2() throws Exception{
    PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    String query = "A = LOAD 'data1' using "+ DummyCollectableLoader.class.getName() +"() as (id, name, grade);" +
    "B = LOAD 'data2' using "+ DummyIndexableLoader.class.getName() +"() as (id, name, grade);" +
    "D = LOAD 'data2' using "+ DummyIndexableLoader.class.getName() +"() as (id, name, grade);" +
    "C = cogroup A by id inner, B by id, D by id inner using 'merge';" +
    "store C into 'output';";
    LogicalPlan lp = Util.buildLp(pigServer, query);
    Operator op = lp.getSinks().get(0);
    LOCogroup cogrp = (LOCogroup)lp.getPredecessors(op).get(0);
    assertEquals(LOCogroup.GROUPTYPE.MERGE, cogrp.getGroupType());

    PigContext pc = new PigContext(ExecType.MAPREDUCE,cluster.getProperties());
    pc.connect();
    boolean exceptionCaught = false;
    try{
        Util.buildPp(pigServer, query);   
    }catch (FrontendException e){
        exceptionCaught = true;
    }
    assertTrue(exceptionCaught);
}
 
Example #14
Source File: TestUDF.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
// See PIG-4184
public void testUDFNullInput() throws Exception {
    PigServer pig = new PigServer(ExecType.LOCAL);
    File inputFile = Util.createInputFile("tmp", "", 
            new String[] {"\t", "2\t3"});
    pig.registerQuery("a = load '"
            + Util.generateURI(inputFile.toString(), pig.getPigContext())
            + "' as (i1:int, i2:int);");
    pig.registerQuery("b = foreach a generate " + IntToBool.class.getName() + "(i1);");

    Iterator<Tuple> iter = pig.openIterator("b");
    assertEquals(iter.next().toString(), "(false)");
    assertEquals(iter.next().toString(), "(true)");
    assertFalse(iter.hasNext());
}
 
Example #15
Source File: TestOrcStoragePushdown.java    From spork with Apache License 2.0 6 votes vote down vote up
private void testPredicatePushdownLocal(String filterStmt, int expectedRows) throws IOException {

        PigServer pigServer_disabledRule = new PigServer(ExecType.LOCAL);
        // Test with PredicatePushdownOptimizer disabled.
        HashSet<String> disabledOptimizerRules = new HashSet<String>();
        disabledOptimizerRules.add("PredicatePushdownOptimizer");
        pigServer_disabledRule.getPigContext().getProperties().setProperty(PigImplConstants.PIG_OPTIMIZER_RULES_KEY,
                ObjectSerializer.serialize(disabledOptimizerRules));
        pigServer_disabledRule.registerQuery("B = load '" + INPUT + "' using OrcStorage();");
        pigServer_disabledRule.registerQuery("C = filter B by " + filterStmt + ";");

        // Test with PredicatePushdownOptimizer enabled.
        pigServer.registerQuery("D = load '" + INPUT + "' using OrcStorage();");
        pigServer.registerQuery("E = filter D by " + filterStmt + ";");

        //Verify that results are same
        Util.checkQueryOutputs(pigServer_disabledRule.openIterator("C"), pigServer.openIterator("E"), expectedRows);
    }
 
Example #16
Source File: TestCase.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Verify that FrontendException is thrown when case expression is missing,
 * and when branches do not contain conditional expressions.
 * @throws Exception
 */
@Test(expected = FrontendException.class)
public void testMissingCaseExpression() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple(1),
            tuple(2),
            tuple(3),
            tuple(4),
            tuple(5)
            );

    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE (" +
            "  CASE " + // No case expression
            "    WHEN 0 THEN '3n'" + // When expression is not conditional
            "    WHEN 1 THEN '3n+1'" +
            "    ELSE        '3n+2'" +
            "  END" +
            ");");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    fail("FrontendException must be thrown for invalid case statement");
}
 
Example #17
Source File: TestNewPlanOperatorPlan.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    PigContext pc = new PigContext(ExecType.LOCAL, new Properties());
    pc.connect();
    conf = new Configuration(
            ConfigurationUtil.toConfiguration(pc.getFs().getConfiguration())
            );
}
 
Example #18
Source File: TestUDF.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testHelperEvalFunc() throws Exception {
    String pref="org.apache.pig.test.utils.HelperEvalFuncUtils$";
    String[][] UDF = {
        {pref + "BasicSUM", pref + "AccSUM", pref + "AlgSUM", "SUM"},
        {pref + "BasicCOUNT", pref + "AccCOUNT", pref + "AlgCOUNT", "COUNT"},
        {"BasLCWC", "AccLCWC", "AlgLCWC", "5*COUNT"}
    };
    String input = "udf_test_helper_eval_func.txt";
    Util.createLocalInputFile(input, new String[]{"1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n13\n14\n15"});
    for (String[] udfs : UDF) {
        for (int i = 0; i < udfs.length - 1; i++) {
            String query = "DEFINE BasLCWC " + pref + "BasicLongCountWithConstructor('5');";
            query += "DEFINE AccLCWC " + pref +" AccLongCountWithConstructor('5');";
            query += "DEFINE AlgLCWC " + pref + "AlgLongCountWithConstructor('5');";
            query += "A = load '" + input + "' as (x:int);";
            query += "B = foreach (group A all) generate ";
            for (String s : Arrays.copyOfRange(udfs, i, udfs.length - 1)) {
                query += s + "(A),";
            }
            query += udfs[udfs.length - 1] + "(A);";
            PigServer pigServer = new PigServer(ExecType.LOCAL);
            pigServer.registerQuery(query);
            Iterator<Tuple> it = pigServer.openIterator("B");
            while (it.hasNext()) {
                Tuple t = it.next();
                Long val = (Long)t.get(0);
                for (int j = 1; j < i; j++) {
                    assertEquals(val, t.get(j));
                }
            }
        }
    }
}
 
Example #19
Source File: TestAvroStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void setup() throws ExecException, IOException {
    pigServerLocal = new PigServer(ExecType.LOCAL);
    String TMP_DIR = System.getProperty("user.dir") + "/build/test/tmp/";
    pigServerLocal.getPigContext().getProperties().setProperty(PigConfiguration.PIG_TEMP_DIR, TMP_DIR);
    outbasedir = FileLocalizer.getTemporaryPath(pigServerLocal.getPigContext()).toString() + "/TestAvroStorage/";
    deleteDirectory(new File(outbasedir));
}
 
Example #20
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA with operations after the union
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaFilter() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, x : int);"
        + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int);"
        + "u = union onschema l1, l2;"
        + "fil = filter u by i == 5 and (x is null or x != 1);"
    ; 
    Util.registerMultiLineQuery(pig, query);
    
    Schema sch = pig.dumpSchema("fil");
    Schema expectedSch = Utils.getSchemaFromString("i: int, x: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    

    Iterator<Tuple> it = pig.openIterator("fil");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(5,null,3)",
                        "(5,3,null)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #21
Source File: TestOrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() throws ExecException, IOException {
    pigServer = new PigServer(ExecType.LOCAL);
    fs = FileSystem.get(ConfigurationUtil.toConfiguration(pigServer.getPigContext().getProperties()));
    deleteTestFiles();
    pigServer.mkdirs(outbasedir);
    generateInputFiles();
}
 
Example #22
Source File: TestJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testLiteralsForJoinAlgoSpecification5() throws Exception {
    setUp(ExecType.LOCAL);
    String query = "a = load 'A'; " +
                   "b = load 'B'; " +
                   "c = Join a by $0, b by $0 using 'default'; "+
                   "store c into 'output';";
    LogicalPlan lp = Util.buildLp(pigServer, query);
    Operator store = lp.getSinks().get(0);
    LOJoin join = (LOJoin) lp.getPredecessors( store ).get(0);
    assertEquals(JOINTYPE.HASH, join.getJoinType());
}
 
Example #23
Source File: Context.java    From incubator-sentry with Apache License 2.0 5 votes vote down vote up
public PigServer getPigServer(String userName, final ExecType exType)
    throws Exception {
  UserGroupInformation clientUgi = UserGroupInformation
      .createRemoteUser(userName);
  PigServer pigServer = (PigServer) clientUgi.
      doAs(new PrivilegedExceptionAction<Object>() {
    @Override
    public PigServer run() throws Exception {
      return new PigServer(exType, new HiveConf());
    }
  });
  return pigServer;
}
 
Example #24
Source File: TestLoadDissectorDynamicallyInPig.java    From yauaa with Apache License 2.0 5 votes vote down vote up
@Test
public void dynamicallyLoadedWithoutExtraRules() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    pigServer.registerQuery(
        "Clicks = " +
            "    LOAD '" + logfile + "' " +
            "    USING nl.basjes.pig.input.apachehttpdlog.Loader(" +
            "            '" + LOGFORMAT + "'," +
            "            'IP:connection.client.host'," +
            "            'TIME.STAMP:request.receive.time'," +
            "    '-load:nl.basjes.parse.useragent.dissector.UserAgentDissector:'," +
            "            'HTTP.USERAGENT:request.user-agent'," +
            "            'HTTP.HOST:request.user-agent.agent_information_url.host'" +
            "            )" +
            "         AS (" +
            "            ConnectionClientHost," +
            "            RequestReceiveTime," +
            "            RequestUseragent," +
            "            RequestUseragentUrlHostName" +
            "            );"
    );
    Storage.Data data = resetData(pigServer);

    pigServer.registerQuery("STORE Clicks INTO 'Clicks' USING mock.Storage();");

    List<Tuple> out = data.get("Clicks");

    assertEquals(1, out.size());
    assertEquals(tuple(
        "172.21.13.88",
        "07/Apr/2013:03:04:49 +0200",
        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) " +
            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 " +
            "Mobile Safari/537.36" +
            "(https://yauaa.basjes.nl:8080/something.html?aap=noot&mies=wim#zus)",
        "yauaa.basjes.nl"
        ).toDelimitedString("><#><"),
        out.get(0).toDelimitedString("><#><"));
}
 
Example #25
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in one of the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnName() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query_prefix = 
    "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
    + "g = group l1 by i; "
    + "f = foreach g generate flatten(l1); "
    + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); ";

    String query = query_prefix + "u = union onschema f, l2; " ; 
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    Iterator<Tuple> it = pig.openIterator("u");

    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,2)",
                        "(5,3)",
                        "(1,2)",
                        "(5,3)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
    
    // now try reversing the order of relation
    query = query_prefix + "u = union onschema l2, f; " ; 
    Util.registerMultiLineQuery(pig, query);
    sch = pig.dumpSchema("u");
    expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    it = pig.openIterator("u");
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #26
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testReadingMultipleNonPartitionedFiles() throws IOException {
    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    String singlePartitionedDir = simpleDataDir.getAbsolutePath();

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedDir) + "' using " + funcSpecString
            + ";");

    server.registerQuery("b = foreach a generate f1;");

    Iterator<Tuple> result = server.openIterator("b");

    int count = 0;
    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(1, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(simpleDirFileCount * simpleRowCount, count);
}
 
Example #27
Source File: TestOrderBy.java    From spork with Apache License 2.0 5 votes vote down vote up
public TestOrderBy() throws Throwable {
    DecimalFormat myFormatter = new DecimalFormat("0000000");
    for (int i = 0; i < DATALEN; i++) {
        DATA[0][i] = myFormatter.format(i);
        DATA[1][i] = myFormatter.format(DATALEN - i - 1);
    }
    pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
}
 
Example #28
Source File: TestJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testLiteralsForJoinAlgoSpecification3() throws Exception {
    setUp(ExecType.LOCAL);
    String query = "a = load 'A'; " +
                   "b = load 'B'; " +
                   "c = Join a by $0, b by $0 using 'repl'; "+
                   "store c into 'output';";
    LogicalPlan lp = Util.buildLp(pigServer, query);
    Operator store = lp.getSinks().get(0);
    LOJoin join = (LOJoin) lp.getPredecessors( store ).get(0);
    assertEquals(JOINTYPE.REPLICATED, join.getJoinType());
}
 
Example #29
Source File: TestFRJoin2.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testTooBigReplicatedFile() throws Exception {
    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

    pigServer.registerQuery("A = LOAD '" + INPUT_DIR + "' as (x:int,y:int);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
    pigServer.registerQuery("C = group B all parallel 5;");
    pigServer.registerQuery("C = foreach C generate MAX(B.x) as x;");
    pigServer.registerQuery("D = join A by x, B by x, C by x using 'repl';");
    {
        // When the replicated input sizes=(12 + 5) is bigger than
        // pig.join.replicated.max.bytes=16, we throw exception
        try {
            pigServer.getPigContext().getProperties().setProperty(
                    PigConfiguration.PIG_JOIN_REPLICATED_MAX_BYTES,
                    String.valueOf(16));
            pigServer.openIterator("D");
            Assert.fail();
        } catch (FrontendException e) {
            assertEquals("Internal error. Distributed cache could" +
                    " not be set up for the replicated files",
                    e.getCause().getCause().getCause().getMessage());
        }

        // If we increase the size to 17, it should work
        pigServer.getPigContext().getProperties().setProperty(
                    PigConfiguration.PIG_JOIN_REPLICATED_MAX_BYTES,
                    String.valueOf(17));
        pigServer.openIterator("D");
    }
}
 
Example #30
Source File: TestJoin.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testLiteralsForJoinAlgoSpecification4() throws Exception {
    setUp(ExecType.LOCAL);
    String query = "a = load 'A'; " +
                   "b = load 'B'; " +
                   "c = Join a by $0, b by $0 using 'replicated'; "+
                   "store c into 'output';";
    LogicalPlan lp = Util.buildLp(pigServer, query);
    Operator store = lp.getSinks().get(0);
    LOJoin join = (LOJoin) lp.getPredecessors( store ).get(0);
    assertEquals(JOINTYPE.REPLICATED, join.getJoinType());
}