Java Code Examples for org.apache.pig.ExecType

The following examples show how to use org.apache.pig.ExecType. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: spork   Author: sigmoidanalytics   File: TestPigServer.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = RuntimeException.class)
public void testLocationStrictCheck() throws ExecException, IOException {
    Properties properties = PropertiesUtil.loadDefaultProperties();
    properties.setProperty("pig.location.check.strict", "true");
    PigServer pigServer = new PigServer(ExecType.LOCAL, properties);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple("a", 1, "b"),
            tuple("b", 2, "c"),
            tuple("c", 3, "d"));

    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);");
    pigServer.registerQuery("B = order A by f1,f2,f3 DESC;");
    pigServer.registerQuery("C = order A by f1,f2,f3;");
    // Storing to same location 'bar' should throw a RuntimeException
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    pigServer.registerQuery("STORE C INTO 'bar' USING mock.Storage();");

    List<Tuple> out = data.get("bar");
    assertEquals(tuple("a", 1, "b"), out.get(0));
    assertEquals(tuple("b", 2, "c"), out.get(1));
    assertEquals(tuple("c", 3, "d"), out.get(2));
}
 
Example #2
Source Project: spork   Author: sigmoidanalytics   File: TestPigServerWithMacros.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testInlineMacro() throws Throwable {
    PigServer pig = new PigServer(ExecType.LOCAL);

    Storage.Data data = resetData(pig);
    data.set("some_path", "(l:chararray)", tuple("first row"), tuple("second row"));

    pig.registerQuery("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };");
    pig.registerQuery("a = load 'some_path' USING mock.Storage();");
    pig.registerQuery("b = row_count(a);");
    Iterator<Tuple> iter = pig.openIterator("b");

    assertEquals(2L, ((Long)iter.next().get(0)).longValue());

    pig.shutdown();
}
 
Example #3
Source Project: spork   Author: sigmoidanalytics   File: TestCase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Verify that FrontendException is thrown when when expression is missing.
 * @throws Exception
 */
@Test(expected = FrontendException.class)
public void testMissingElseExpression() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple(1),
            tuple(2),
            tuple(3),
            tuple(4),
            tuple(5)
            );

    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE (" +
            "  CASE i % 3" +
            "    WHEN 0 THEN '3n'" +
            "    WHEN 1 THEN '3n+1'" +
            "    ELSE " + // No else expression
            "  END" +
            ");");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    fail("FrontendException must be thrown for invalid case statement");
}
 
Example #4
Source Project: rya   Author: apache   File: SparqlQueryPigEngineTest.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
    super.setUp();
    SparqlToPigTransformVisitor visitor = new SparqlToPigTransformVisitor();
    visitor.setTablePrefix("l_");
    visitor.setInstance("stratus");
    visitor.setZk("stratus13:2181");
    visitor.setUser("root");
    visitor.setPassword("password");

    engine = new SparqlQueryPigEngine();
    engine.setSparqlToPigTransformVisitor(visitor);
    engine.setExecType(ExecType.LOCAL);
    engine.setInference(false);
    engine.setStats(false);
    engine.init();
}
 
Example #5
Source Project: parquet-mr   Author: apache   File: PerfTest.java    License: Apache License 2.0 6 votes vote down vote up
private static void load(String out, int colsToLoad) throws ExecException, IOException {
  long t0 = System.currentTimeMillis();
  StringBuilder schemaString = new StringBuilder("a0: chararray");
  for (int i = 1; i < colsToLoad; i++) {
    schemaString.append(", a" + i + ": chararray");
  }
  PigServer pigServer = new PigServer(ExecType.LOCAL);
  pigServer.registerQuery("B = LOAD '"+out+"' USING "+ParquetLoader.class.getName()+"('"+schemaString+"');");
  pigServer.registerQuery("C = FOREACH (GROUP B ALL) GENERATE COUNT(B);");
  Iterator<Tuple> it = pigServer.openIterator("C");
  if (!it.hasNext()) {
    throw new RuntimeException("Job failed: no tuple to read");
  }
  Long count = (Long)it.next().get(0);

  assertEquals(ROW_COUNT, count.longValue());
  long t1 = System.currentTimeMillis();
  results.append((t1-t0)+" ms to read "+colsToLoad+" columns\n");
}
 
Example #6
Source Project: spork   Author: sigmoidanalytics   File: TestProjectStarExpander.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
        + "f = foreach l1 generate * as (aa, bb, cc), *;"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString(
            "aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,10,20,30)",
                        "(11,21,31,11,21,31)",
                });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example #7
Source Project: spork   Author: sigmoidanalytics   File: TestPigServer.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPigTempDir() throws Throwable {
    Properties properties = PropertiesUtil.loadDefaultProperties();
    File pigTempDir = new File(tempDir, FILE_SEPARATOR + "tmp" + FILE_SEPARATOR + "test");
    properties.put("pig.temp.dir", pigTempDir.getPath());
    PigContext pigContext=new PigContext(ExecType.LOCAL, properties);
    pigContext.connect();
    FileLocalizer.setInitialized(false);

    String tempPath= FileLocalizer.getTemporaryPath(pigContext).toString();
    Path path = new Path(tempPath);
    assertTrue(tempPath.startsWith(pigTempDir.toURI().toString()));

    FileSystem fs = FileSystem.get(path.toUri(),
            ConfigurationUtil.toConfiguration(pigContext.getProperties()));
    FileStatus status = fs.getFileStatus(path.getParent());
    // Temporary root dir should have 700 as permission
    assertEquals("rwx------", status.getPermission().toString());
    pigTempDir.delete();
    FileLocalizer.setInitialized(false);
}
 
Example #8
Source Project: spork   Author: sigmoidanalytics   File: TestHiveColumnarLoader.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadingSingleFileNoProjections() throws IOException {
    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString
            + ";");

    Iterator<Tuple> result = server.openIterator("a");

    int count = 0;
    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(3, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(simpleRowCount, count);
}
 
Example #9
Source Project: spork   Author: sigmoidanalytics   File: TestUDF.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNormalDefine() throws Exception {
    String input = "udf_test_jira_2430_2.txt";
    Util.createLocalInputFile(input, new String[]{"1"});
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    pigServer.registerQuery("A = LOAD '"+input+"' as (x:int);");
    pigServer.registerQuery("DEFINE udftest1 org.apache.pig.test.TestUDF$UdfWithFuncSpecWithArgs('1');");
    pigServer.registerQuery("DEFINE udftest2 org.apache.pig.test.TestUDF$UdfWithFuncSpecWithArgs('2');");
    pigServer.registerQuery("DEFINE udftest3 org.apache.pig.test.TestUDF$UdfWithFuncSpecWithArgs('3');");
    pigServer.registerQuery("B = FOREACH A GENERATE udftest1(x), udftest2(x), udftest3(x);");
    Iterator<Tuple> its = pigServer.openIterator("B");
    Tuple t = its.next();
    assertEquals(Integer.valueOf(1),t.get(0));
    assertEquals(Integer.valueOf(2),t.get(1));
    assertEquals(Integer.valueOf(3),t.get(2));
}
 
Example #10
Source Project: spork   Author: sigmoidanalytics   File: TestMergeJoinOuter.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFailure() throws Exception{
    String query = "A = LOAD 'data1' using "+ DummyCollectableLoader.class.getName() +"() as (id, name, grade);" +
    "E = group A by id;" +
    "B = LOAD 'data2' using "+ DummyIndexableLoader.class.getName() +"() as (id, name, grade);" +
    "C = join E by A.id, B by id using 'merge';" +
    "store C into 'output';";
    LogicalPlan lp = Util.buildLp(pigServer, query);
    Operator op = lp.getSinks().get(0);
    LOJoin join = (LOJoin)lp.getPredecessors(op).get(0);
    assertEquals(LOJoin.JOINTYPE.MERGE, join.getJoinType());

    PigContext pc = new PigContext(ExecType.MAPREDUCE,cluster.getProperties());
    pc.connect();
    boolean exceptionCaught = false;
    try{
        Util.buildPp(pigServer, query);   
    }catch (FrontendException e){
        assertEquals(1103,e.getErrorCode());
        exceptionCaught = true;
    }
    assertTrue(exceptionCaught);
}
 
Example #11
Source Project: spork   Author: sigmoidanalytics   File: TestProject.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMissingCols1() throws Exception {
    String inputFileName = "TestProject-testMissingCols1-input.txt";
    String input[] = { "hello\tworld", "good\tbye" };
    Util.createLocalInputFile(inputFileName, input);
    String query = "a = load '" + inputFileName
            + "' as (s1:chararray, s2:chararray, extra:chararray);" +
            "b = foreach a generate s1, s2, extra;";

    PigServer ps = new PigServer(ExecType.LOCAL);
    Util.registerMultiLineQuery(ps, query);
    Iterator<Tuple> it = ps.openIterator("b");
    Tuple[] expectedResults = new Tuple[] {
                    (Tuple)Util.getPigConstant("('hello', 'world', null)"),
                    (Tuple)Util.getPigConstant("('good', 'bye', null)")
    };
    int i = 0;
    while (it.hasNext()) {
        assertEquals(expectedResults[i++], it.next());
    }
}
 
Example #12
Source Project: spork   Author: sigmoidanalytics   File: TestAssert.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Verify that ASSERT operator works. Disable fetch for this testcase.
 * @throws Exception
 */
@Test
public void testNegativeWithoutFetch() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple(1),
            tuple(2),
            tuple(3)
            );

    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("ASSERT A BY i > 1 , 'i should be greater than 1';");

    try {
        pigServer.openIterator("A");
    } catch (FrontendException fe) {
        Assert.assertTrue(fe.getCause().getMessage().contains(
                "Job terminated with anomalous status FAILED"));
    }
}
 
Example #13
Source Project: spork   Author: sigmoidanalytics   File: TestMapSideCogroup.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testFailure2() throws Exception{
    PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    String query = "A = LOAD 'data1' using "+ DummyCollectableLoader.class.getName() +"() as (id, name, grade);" +
    "B = LOAD 'data2' using "+ DummyIndexableLoader.class.getName() +"() as (id, name, grade);" +
    "D = LOAD 'data2' using "+ DummyIndexableLoader.class.getName() +"() as (id, name, grade);" +
    "C = cogroup A by id inner, B by id, D by id inner using 'merge';" +
    "store C into 'output';";
    LogicalPlan lp = Util.buildLp(pigServer, query);
    Operator op = lp.getSinks().get(0);
    LOCogroup cogrp = (LOCogroup)lp.getPredecessors(op).get(0);
    assertEquals(LOCogroup.GROUPTYPE.MERGE, cogrp.getGroupType());

    PigContext pc = new PigContext(ExecType.MAPREDUCE,cluster.getProperties());
    pc.connect();
    boolean exceptionCaught = false;
    try{
        Util.buildPp(pigServer, query);   
    }catch (FrontendException e){
        exceptionCaught = true;
    }
    assertTrue(exceptionCaught);
}
 
Example #14
Source Project: spork   Author: sigmoidanalytics   File: TestUDF.java    License: Apache License 2.0 6 votes vote down vote up
@Test
// See PIG-4184
public void testUDFNullInput() throws Exception {
    PigServer pig = new PigServer(ExecType.LOCAL);
    File inputFile = Util.createInputFile("tmp", "", 
            new String[] {"\t", "2\t3"});
    pig.registerQuery("a = load '"
            + Util.generateURI(inputFile.toString(), pig.getPigContext())
            + "' as (i1:int, i2:int);");
    pig.registerQuery("b = foreach a generate " + IntToBool.class.getName() + "(i1);");

    Iterator<Tuple> iter = pig.openIterator("b");
    assertEquals(iter.next().toString(), "(false)");
    assertEquals(iter.next().toString(), "(true)");
    assertFalse(iter.hasNext());
}
 
Example #15
Source Project: spork   Author: sigmoidanalytics   File: TestOrcStoragePushdown.java    License: Apache License 2.0 6 votes vote down vote up
private void testPredicatePushdownLocal(String filterStmt, int expectedRows) throws IOException {

        PigServer pigServer_disabledRule = new PigServer(ExecType.LOCAL);
        // Test with PredicatePushdownOptimizer disabled.
        HashSet<String> disabledOptimizerRules = new HashSet<String>();
        disabledOptimizerRules.add("PredicatePushdownOptimizer");
        pigServer_disabledRule.getPigContext().getProperties().setProperty(PigImplConstants.PIG_OPTIMIZER_RULES_KEY,
                ObjectSerializer.serialize(disabledOptimizerRules));
        pigServer_disabledRule.registerQuery("B = load '" + INPUT + "' using OrcStorage();");
        pigServer_disabledRule.registerQuery("C = filter B by " + filterStmt + ";");

        // Test with PredicatePushdownOptimizer enabled.
        pigServer.registerQuery("D = load '" + INPUT + "' using OrcStorage();");
        pigServer.registerQuery("E = filter D by " + filterStmt + ";");

        //Verify that results are same
        Util.checkQueryOutputs(pigServer_disabledRule.openIterator("C"), pigServer.openIterator("E"), expectedRows);
    }
 
Example #16
Source Project: spork   Author: sigmoidanalytics   File: TestCase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Verify that FrontendException is thrown when case expression is missing,
 * and when branches do not contain conditional expressions.
 * @throws Exception
 */
@Test(expected = FrontendException.class)
public void testMissingCaseExpression() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Data data = resetData(pigServer);

    data.set("foo",
            tuple(1),
            tuple(2),
            tuple(3),
            tuple(4),
            tuple(5)
            );

    pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage() AS (i:int);");
    pigServer.registerQuery("B = FOREACH A GENERATE (" +
            "  CASE " + // No case expression
            "    WHEN 0 THEN '3n'" + // When expression is not conditional
            "    WHEN 1 THEN '3n+1'" +
            "    ELSE        '3n+2'" +
            "  END" +
            ");");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");
    fail("FrontendException must be thrown for invalid case statement");
}
 
Example #17
Source Project: yauaa   Author: nielsbasjes   File: TestLoadDissectorDynamicallyInPig.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void dynamicallyLoadedWithoutExtraRules() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    pigServer.registerQuery(
        "Clicks = " +
            "    LOAD '" + logfile + "' " +
            "    USING nl.basjes.pig.input.apachehttpdlog.Loader(" +
            "            '" + LOGFORMAT + "'," +
            "            'IP:connection.client.host'," +
            "            'TIME.STAMP:request.receive.time'," +
            "    '-load:nl.basjes.parse.useragent.dissector.UserAgentDissector:'," +
            "            'HTTP.USERAGENT:request.user-agent'," +
            "            'HTTP.HOST:request.user-agent.agent_information_url.host'" +
            "            )" +
            "         AS (" +
            "            ConnectionClientHost," +
            "            RequestReceiveTime," +
            "            RequestUseragent," +
            "            RequestUseragentUrlHostName" +
            "            );"
    );
    Storage.Data data = resetData(pigServer);

    pigServer.registerQuery("STORE Clicks INTO 'Clicks' USING mock.Storage();");

    List<Tuple> out = data.get("Clicks");

    assertEquals(1, out.size());
    assertEquals(tuple(
        "172.21.13.88",
        "07/Apr/2013:03:04:49 +0200",
        "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) " +
            "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 " +
            "Mobile Safari/537.36" +
            "(https://yauaa.basjes.nl:8080/something.html?aap=noot&mies=wim#zus)",
        "yauaa.basjes.nl"
        ).toDelimitedString("><#><"),
        out.get(0).toDelimitedString("><#><"));
}
 
Example #18
Source Project: spork   Author: sigmoidanalytics   File: TestUnionOnSchema.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA where a common column has additional 'namespace' part
 *  in the column name in one of the inputs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaScopedColumnName() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query_prefix = 
    "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); " 
    + "g = group l1 by i; "
    + "f = foreach g generate flatten(l1); "
    + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : int); ";

    String query = query_prefix + "u = union onschema f, l2; " ; 
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("u");
    Schema expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    Iterator<Tuple> it = pig.openIterator("u");

    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,2)",
                        "(5,3)",
                        "(1,2)",
                        "(5,3)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);
    
    // now try reversing the order of relation
    query = query_prefix + "u = union onschema l2, f; " ; 
    Util.registerMultiLineQuery(pig, query);
    sch = pig.dumpSchema("u");
    expectedSch = Utils.getSchemaFromString("i: int, j: int");
    assertEquals("Checking expected schema",sch, expectedSch);
    it = pig.openIterator("u");
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #19
Source Project: spork   Author: sigmoidanalytics   File: TestUnion.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCastingAfterUnionWithMultipleLoadersDifferentCasters()
    throws Exception {
    // Note that different caster case only works when each field is still coming
    // from the single Loader.
    // In the case below, 'a' is coming from A(PigStorage)
    // and 'b' is coming from B(TextLoader). No overlaps.
    File f1 = Util.createInputFile("tmp", "i1.txt", new String[] {"1","2","3"});
    File f2 = Util.createInputFile("tmp", "i2.txt", new String[] {"a","b","c"});

    PigServer ps = new PigServer(ExecType.LOCAL, new Properties());
    //PigStorage and TextLoader have different LoadCasters
    ps.registerQuery("A = load '" + Util.encodeEscape(f1.getAbsolutePath()) + "' as (a:bytearray);");
    ps.registerQuery("B = load '" + Util.encodeEscape(f2.getAbsolutePath()) + "' using TextLoader() as (b:bytearray);");
    ps.registerQuery("C = union onschema A,B;");
    ps.registerQuery("D = foreach C generate (int)a as a,(chararray)b as b;");

    Schema dumpSchema = ps.dumpSchema("D");
    Schema expected = new Schema ();
    expected.add(new Schema.FieldSchema("a", DataType.INTEGER));
    expected.add(new Schema.FieldSchema("b", DataType.CHARARRAY));
    assertEquals(expected, dumpSchema);

    Iterator<Tuple> itr = ps.openIterator("D");
    int recordCount = 0;
    while(itr.next() != null)
        ++recordCount;
    assertEquals(6, recordCount);

}
 
Example #20
Source Project: spork   Author: sigmoidanalytics   File: TestFRJoin2.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTooBigReplicatedFile() throws Exception {
    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());

    pigServer.registerQuery("A = LOAD '" + INPUT_DIR + "' as (x:int,y:int);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE + "' as (x:int,y:int);");
    pigServer.registerQuery("C = group B all parallel 5;");
    pigServer.registerQuery("C = foreach C generate MAX(B.x) as x;");
    pigServer.registerQuery("D = join A by x, B by x, C by x using 'repl';");
    {
        // When the replicated input sizes=(12 + 5) is bigger than
        // pig.join.replicated.max.bytes=16, we throw exception
        try {
            pigServer.getPigContext().getProperties().setProperty(
                    PigConfiguration.PIG_JOIN_REPLICATED_MAX_BYTES,
                    String.valueOf(16));
            pigServer.openIterator("D");
            Assert.fail();
        } catch (FrontendException e) {
            assertEquals("Internal error. Distributed cache could" +
                    " not be set up for the replicated files",
                    e.getCause().getCause().getCause().getMessage());
        }

        // If we increase the size to 17, it should work
        pigServer.getPigContext().getProperties().setProperty(
                    PigConfiguration.PIG_JOIN_REPLICATED_MAX_BYTES,
                    String.valueOf(17));
        pigServer.openIterator("D");
    }
}
 
Example #21
Source Project: spork   Author: sigmoidanalytics   File: TestJoin.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testJoinNullTupleFieldKey() throws Exception{
    for (ExecType execType : execTypes) {
        setUp(execType);
        String[] input1 = {
                "1\t",
                "2\taa"
        };
        String[] input2 = {
                "1\t",
                "2\taa"
        };

        String firstInput = createInputFile(execType, "a.txt", input1);
        String secondInput = createInputFile(execType, "b.txt", input2);

        String script = "a = load '"+ Util.encodeEscape(firstInput) +"' as (a1:int, a2:chararray);" +
                "b = load '"+ Util.encodeEscape(secondInput) +"' as (b1:int, b2:chararray);" +
                "c = join a by (a1, a2), b by (b1, b2);";
        Util.registerMultiLineQuery(pigServer, script);
        Iterator<Tuple> it = pigServer.openIterator("c");

        List<Tuple> expectedResults = Util
                .getTuplesFromConstantTupleStrings(new String[] { "(2,'aa',2,'aa')" });
        Util.checkQueryOutputs(it, expectedResults);

        deleteInputFile(execType, firstInput);
        deleteInputFile(execType, secondInput);
    }
}
 
Example #22
Source Project: parquet-mr   Author: apache   File: TestParquetLoader.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReqestedSchemaColumnPruning() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL); 
  pigServer.setValidateEachStatement(true);
  String out = "target/out";
  int rows = 10;
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < rows; i++) {
    list.add(Storage.tuple(i, "a"+i, i*2));
  }
  data.set("in", "i:int, a:chararray, b:int", list);
  pigServer.setBatchOn();
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.deleteFile(out);
  pigServer.registerQuery("Store A into '" + out + "' using " + ParquetStorer.class.getName() + "();");
  pigServer.executeBatch();
    
  //Test Null Padding at the end 
  pigServer.registerQuery("C = LOAD '" + out + "' using " + ParquetLoader.class.getName()+"('i:int, a:chararray, b:int, n1:int, n2:chararray');");
  pigServer.registerQuery("G = foreach C generate n1,b,n2,i;");
  pigServer.registerQuery("STORE G into 'out' using mock.Storage();");
  pigServer.executeBatch();
  
  List<Tuple> actualList = data.get("out");
  
  assertEquals(rows, actualList.size());
  for(Tuple t : actualList) {
      assertEquals(4, t.size());
      assertTrue(t.isNull(0));
      assertTrue(t.isNull(2));
  }
}
 
Example #23
Source Project: spork   Author: sigmoidanalytics   File: TestUDFGroovy.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEvalFunc_NonStatic() throws Exception {
  String[] groovyStatements = {
      "import org.apache.pig.builtin.OutputSchema;",
      "class GroovyUDF {",
      "  private final long multiplicator;",
      "  public GroovyUDF() {",
      "    this.multiplicator = 42L;",
      "  }",
      "  @OutputSchema('x:long')",
      "  long mul(long x) {",
      "    return x*this.multiplicator;",
      "  }",
      "}"
  };

  File tmpScriptFile = File.createTempFile("temp_groovy_udf", ".groovy");
  tmpScriptFile.deleteOnExit();
  FileWriter writer = new FileWriter(tmpScriptFile);
  for (String line : groovyStatements) {
    writer.write(line + "\n");
  }
  writer.close();

  PigServer pigServer = new PigServer(ExecType.LOCAL);

  pigServer.registerCode(tmpScriptFile.getCanonicalPath(), "groovy", "groovyudfs");

  Data data = resetData(pigServer);
  data.set("foo1",
      tuple(1)
      );

  pigServer.registerQuery("A = LOAD 'foo1' USING mock.Storage();");
  pigServer.registerQuery("B = FOREACH A GENERATE groovyudfs.mul($0);");
  pigServer.registerQuery("STORE B INTO 'bar1' USING mock.Storage();");

  List<Tuple> out = data.get("bar1");
  assertEquals(tuple(42L), out.get(0));
}
 
Example #24
Source Project: spork   Author: sigmoidanalytics   File: TestGrunt.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testShellCommandOrder() throws Throwable {
    PigServer server = new PigServer(ExecType.LOCAL, new Properties());

    String strRemove = "rm";

    if (Util.WINDOWS)
    {
        strRemove = "del";
    }

    File inputFile = File.createTempFile("testInputFile", ".txt");
    PrintWriter pwInput = new PrintWriter(new FileWriter(inputFile));
    pwInput.println("1");
    pwInput.close();

    File inputScript = File.createTempFile("testInputScript", "");
    File outputFile = File.createTempFile("testOutputFile", ".txt");
    outputFile.delete();
    PrintWriter pwScript = new PrintWriter(new FileWriter(inputScript));
    pwScript.println("a = load '" + Util.encodeEscape(inputFile.getAbsolutePath()) + "';");
    pwScript.println("store a into '" + Util.encodeEscape(outputFile.getAbsolutePath()) + "';");
    pwScript.println("sh " + strRemove + " " + Util.encodeEscape(inputFile.getAbsolutePath()));
    pwScript.close();

    InputStream inputStream = new FileInputStream(inputScript.getAbsoluteFile());
    server.setBatchOn();
    server.registerScript(inputStream);
    List<ExecJob> execJobs = server.executeBatch();
    assertTrue(execJobs.get(0).getStatus() == JOB_STATUS.COMPLETED);
}
 
Example #25
Source Project: spork   Author: sigmoidanalytics   File: TestMacroExpansion.java    License: Apache License 2.0 5 votes vote down vote up
private void validateDryrunFailure(String piglatin, String expectedErr,
        String keyword) throws Throwable {
    String scriptFile = "myscript.pig";

    try {
        BufferedReader br = new BufferedReader(new StringReader(piglatin));
        DryRunGruntParser parser = new DryRunGruntParser(br, scriptFile,
                new PigContext(ExecType.LOCAL, new Properties()));

        PrintWriter w = new PrintWriter(new FileWriter(scriptFile));
        w.print(piglatin);
        w.close();

        parser.parseStopOnError();

        Assert.fail("Expected exception isn't thrown");
    } catch (Exception e) {
        String msg = e.getMessage();
        int pos = msg.indexOf(keyword);
        if (pos < 0) {
            Throwable cause = e.getCause();
            if (cause != null) {
                msg = cause.getMessage();
                pos = msg.indexOf(keyword);
            }
        }
        Assert.assertEquals(expectedErr,
                msg.substring(pos, pos + expectedErr.length()));
    } finally {
        new File(scriptFile).delete();
    }
}
 
Example #26
Source Project: spork   Author: sigmoidanalytics   File: TestGrunt.java    License: Apache License 2.0 5 votes vote down vote up
private void validateGruntCheckFail(String piglatin, String errMsg) throws Throwable{
    String scriptFile = "myscript.pig";
    try {
        BufferedReader br = new BufferedReader(new StringReader(piglatin));
        Grunt grunt = new Grunt(br, new PigContext(ExecType.LOCAL, new Properties()));
        String [] inp = {piglatin};
        Util.createLocalInputFile(scriptFile, inp);

        grunt.checkScript(scriptFile);

        fail("Expected exception isn't thrown");
    } catch (FrontendException e) {
        Util.checkMessageInException(e, errMsg);
    }
}
 
Example #27
Source Project: spork   Author: sigmoidanalytics   File: TestRank1.java    License: Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    pigServer = new PigServer(ExecType.LOCAL);

    data = resetData(pigServer);
    data.set("test01", tuple("A", 1, "N"), tuple("B", 2, "N"),
            tuple("C", 3, "M"), tuple("D", 4, "P"), tuple("E", 4, "Q"),
            tuple("E", 4, "Q"), tuple("F", 8, "Q"), tuple("F", 7, "Q"),
            tuple("F", 8, "T"), tuple("F", 8, "Q"), tuple("G", 10, "V"));

    data.set(
            "test02",
            tuple("Michael", "Blythe", 1, 1, 1, 1, 4557045.046, 98027),
            tuple("Linda", "Mitchell", 2, 1, 1, 1, 5200475.231, 98027),
            tuple("Jillian", "Carson", 3, 1, 1, 1, 3857163.633, 98027),
            tuple("Garrett", "Vargas", 4, 1, 1, 1, 1764938.986, 98027),
            tuple("Tsvi", "Reiter", 5, 1, 1, 2, 2811012.715, 98027),
            tuple("Shu", "Ito", 6, 6, 2, 2, 3018725.486, 98055),
            tuple("Jose", "Saraiva", 7, 6, 2, 2, 3189356.247, 98055),
            tuple("David", "Campbell", 8, 6, 2, 3, 3587378.426, 98055),
            tuple("Tete", "Mensa-Annan", 9, 6, 2, 3, 1931620.184, 98055),
            tuple("Lynn", "Tsoflias", 10, 6, 2, 3, 1758385.926, 98055),
            tuple("Rachel", "Valdez", 11, 6, 2, 4, 2241204.042, 98055),
            tuple("Jae", "Pak", 12, 6, 2, 4, 5015682.375, 98055),
            tuple("Ranjit", "Varkey Chudukatil", 13, 6, 2, 4,
                    3827950.238, 98055));
}
 
Example #28
Source Project: spork   Author: sigmoidanalytics   File: TestBuiltinInvoker.java    License: Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    pigServer = new PigServer(ExecType.LOCAL);

    data = resetData(pigServer);

    data.set("chardata", Utils.getSchemaFromString("x:chararray"), chardata);
    data.set("charintdata", Utils.getSchemaFromString("x:chararray"), charintdata);

    r = new Random(42L);
}
 
Example #29
Source Project: spork   Author: sigmoidanalytics   File: TestLocationInPhysicalPlan.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
    File input = File.createTempFile("test", "input");
    input.deleteOnExit();
    File output = File.createTempFile("test", "output");
    output.delete();
    Util.createLocalInputFile(input.getAbsolutePath(), new String[] {
        "1,2,3",
        "1,1,3",
        "1,1,1",
        "3,1,1",
        "1,2,1",
    });
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    pigServer.setBatchOn();
    pigServer.registerQuery(
            "A = LOAD '" + Util.encodeEscape(input.getAbsolutePath()) + "' using PigStorage();\n"
        +  	"B = GROUP A BY $0;\n"
        + 	"A = FOREACH B GENERATE COUNT(A);\n"
        +	"STORE A INTO '" + Util.encodeEscape(output.getAbsolutePath()) + "';");
    ExecJob job = pigServer.executeBatch().get(0);
    List<OriginalLocation> originalLocations = job.getPOStore().getOriginalLocations();
    Assert.assertEquals(1, originalLocations.size());
    OriginalLocation originalLocation = originalLocations.get(0);
    Assert.assertEquals(4, originalLocation.getLine());
    Assert.assertEquals(0, originalLocation.getOffset());
    Assert.assertEquals("A", originalLocation.getAlias());
    JobStats jStats = (JobStats)job.getStatistics().getJobGraph().getSinks().get(0);
    Assert.assertEquals("M: A[1,4],A[3,4],B[2,4] C: A[3,4],B[2,4] R: A[3,4]", jStats.getAliasLocation());
}
 
Example #30
Source Project: spork   Author: sigmoidanalytics   File: TestGrunt.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testIllustrateScript() throws Throwable {
    PigServer server = new PigServer(ExecType.LOCAL, new Properties());
    PigContext context = server.getPigContext();

    String strCmd = "illustrate -script "
            + basedir + "/illustrate.pig;";

    ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
    InputStreamReader reader = new InputStreamReader(cmd);

    Grunt grunt = new Grunt(new BufferedReader(reader), context);

    grunt.exec();
}