Java Code Examples for org.apache.pig.PigServer#openIterator()

The following examples show how to use org.apache.pig.PigServer#openIterator() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBinaryExpressionOps.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testArithmeticOperators() throws Exception {
    PigServer pig = new PigServer(cluster.getExecType(), properties);

    pig.registerQuery("A = LOAD '" + INPUT_1 + "' AS (id:chararray, val:long);");
    pig.registerQuery("B = LOAD '" + INPUT_2 + "' AS (id:chararray, val:long);");
    pig.registerQuery("C = COGROUP A BY id, B BY id;");
    pig.registerQuery("D = FOREACH C GENERATE group, SUM(B.val), SUM(A.val), "
            + "(SUM(A.val) - SUM(B.val)), (SUM(A.val) + SUM(B.val)), "
            + "(SUM(A.val) * SUM(B.val)), (SUM(A.val) / SUM(B.val)), "
            + "(SUM(A.val) % SUM(B.val)), (SUM(A.val) < 0 ? SUM(A.val) : SUM(B.val));");

    List<Tuple> expectedResults = Util.getTuplesFromConstantTupleStrings(
            new String[] {
                    "('id1',2L,null,null,null,null,null,null,null)",
                    "('id2',2L,10L,8L,12L,20L,5L,0L,2L)" });
    Iterator<Tuple> iter = pig.openIterator("D");
    Util.checkQueryOutputsAfterSort(iter, expectedResults);
}
 
Example 2
Source File: TestScalarAliases.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testScalarErrMultipleRowsInInput() throws Exception{
    Assume.assumeTrue("Skip this test for TEZ. See PIG-3994", Util.isMapredExecType(cluster.getExecType()));
    Util.resetStateForExecModeSwitch();
    pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    String[] input = {
            "1\t5",
            "2\t10",
            "3\t20"
    };
    String INPUT_FILE = "table_testScalarAliasesMulRows";
    Util.createInputFile(cluster, INPUT_FILE, input);
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE +  "' as (a0: long, a1: double);");
    pigServer.registerQuery("B = LOAD '" + INPUT_FILE +  "' as (b0: long, b1: double);");
    pigServer.registerQuery("C = foreach A generate $0, B.$0;");
    try {
        pigServer.openIterator("C");
        fail("exception expected - scalar input has multiple rows");
    } catch (IOException pe){
        Util.checkStrContainsSubStr(pe.getCause().getMessage(),
                "Scalar has more than one row in the output"
        );
    }
}
 
Example 3
Source File: TestCommonLogLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testLoadFromPigServer() throws Exception {
    String filename = TestHelper.createTempFile(data, " ");
    PigServer pig = new PigServer(ExecType.LOCAL);
    filename = filename.replace("\\", "\\\\");
    pig.registerQuery("A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.apachelog.CommonLogLoader();");
    Iterator<?> it = pig.openIterator("A");

    int tupleCount = 0;

    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        else {
            TestHelper.examineTuple(EXPECTED, tuple, tupleCount);
            tupleCount++;
        }
    }
    assertEquals(data.size(), tupleCount);
}
 
Example 4
Source File: TestUDF.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testUDFReturnMap_MapReduceMode() throws Exception {
    Util.createInputFile(cluster, "a.txt", new String[] { "dummy",
            "dummy" });
    FileLocalizer.deleteTempFiles();
    PigServer pig = new PigServer(cluster.getExecType(), cluster
            .getProperties());
    pig.registerQuery("A = LOAD 'a.txt';");
    pig.registerQuery("B = FOREACH A GENERATE org.apache.pig.test.utils.MyUDFReturnMap();");

    Iterator<Tuple> iterator = pig.openIterator("B");
    while (iterator.hasNext()) {
        Tuple tuple = iterator.next();
        @SuppressWarnings("unchecked")
        Map<Object, Object> result = (Map<Object, Object>) tuple.get(0);
        assertEquals(result, MyUDFReturnMap.map);
    }
}
 
Example 5
Source File: TestProjectStarExpander.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
        + "f = foreach l1 generate * as (aa, bb, cc), *;"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString(
            "aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,10,20,30)",
                        "(11,21,31,11,21,31)",
                });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example 6
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadingSingleFileNoProjections() throws IOException {
    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString
            + ";");

    Iterator<Tuple> result = server.openIterator("a");

    int count = 0;
    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(3, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(simpleRowCount, count);
}
 
Example 7
Source File: TestXMLLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
public void testShouldReturn0TupleCountIfEmptyFileIsPassed() throws Exception {
   // modify the data content to avoid end tag for </ignoreProperty>
   ArrayList<String[]> testData = new ArrayList<String[]>();

   String filename = TestHelper.createTempFile(testData, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('ignoreProperty') as (doc:chararray);";
   pig.registerQuery(query);
   Iterator<?> it = pig.openIterator("A");
   int tupleCount = 0;
   while (it.hasNext()) {
       Tuple tuple = (Tuple) it.next();
       if (tuple == null)
           break;
       else {
           if (tuple.size() > 0) {
               tupleCount++;
           }
       }
   }
   assertEquals(0, tupleCount);
}
 
Example 8
Source File: TestBuiltin.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testUniqueID() throws Exception {
    Util.resetStateForExecModeSwitch();
    String inputFileName = "testUniqueID.txt";
    Util.createInputFile(cluster, inputFileName, new String[]
        {"1\n2\n3\n4\n5\n1\n2\n3\n4\n5\n"});
    PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    pigServer.getPigContext().getProperties().setProperty("mapred.max.split.size", "10");
    pigServer.getPigContext().getProperties().setProperty("pig.noSplitCombination", "true");
    pigServer.registerQuery("A = load '" + inputFileName + "' as (name);");
    pigServer.registerQuery("B = foreach A generate name, UniqueID();");
    Iterator<Tuple> iter = pigServer.openIterator("B");
    iter.next().get(1).equals("0-0");
    iter.next().get(1).equals("0-1");
    iter.next().get(1).equals("0-2");
    iter.next().get(1).equals("0-3");
    iter.next().get(1).equals("0-4");
    iter.next().get(1).equals("1-0");
    iter.next().get(1).equals("1-1");
    iter.next().get(1).equals("1-1");
    iter.next().get(1).equals("1-2");
    iter.next().get(1).equals("1-3");
    iter.next().get(1).equals("1-4");
}
 
Example 9
Source File: TestTypeCheckingValidatorNewLP.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testCastEmptyInnerSchema() throws IOException, ParserException{
    final String INP_FILE = "testCastEmptyInnerSchema.txt";
    PrintWriter w = new PrintWriter(new FileWriter(INP_FILE));
    w.println("(1,2)");
    w.println("(2,3)");
    w.close();
    PigServer pigServer = new PigServer(LOCAL);

    String query = "a = load '" + INP_FILE + "' as (t:tuple());" +
    "b = foreach a generate (tuple(int, long))t;" +
    "c = foreach b generate t.$0 + t.$1;";

    Util.registerMultiLineQuery(pigServer, query);

    List<Tuple> expectedRes =
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(3L)",
                        "(5L)",
                });
    Iterator<Tuple> it = pigServer.openIterator("c");
    Util.checkQueryOutputs(it, expectedRes);
}
 
Example 10
Source File: TestGroupConstParallel.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test parallelism for group all
 * @throws Exception
 */
@Test
public void testGroupAllWithParallel() throws Exception {
    PigServer pigServer = new PigServer(cluster.getExecType(), cluster
            .getProperties());
    
    
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE + "' as (x:chararray);");
    pigServer.registerQuery("B = group A all parallel 5;");
    {
        Iterator<Tuple> iter = pigServer.openIterator("B");
        List<Tuple> expectedRes = 
            Util.getTuplesFromConstantTupleStrings(
                    new String[] {
                            "('all',{('one'),('two'),('two')})"
                    });
        Util.checkQueryOutputsAfterSort(iter, expectedRes);
        
        JobGraph jGraph = PigStats.get().getJobGraph();
        checkGroupAllWithParallelGraphResult(jGraph);
    }
}
 
Example 11
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA with input relation having udfs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaInputUdfs() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);"
        + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);"
        + "f1 = foreach l1 generate i, CONCAT(j,j) as cj, " +
        		"org.apache.pig.test.TestUnionOnSchema\\$UDFTupleNullSchema(i,j) as uo;"
        + "u = union onschema f1, l2;"
    ; 
    Util.registerMultiLineQuery(pig, query);

    Schema sch = pig.dumpSchema("u");
    String expectedSch = "{i: int,cj: chararray,uo: (),j: chararray}";
    Assert.assertTrue( expectedSch.equals( sch.toString() ) );
    

    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,null,null,'2')",
                        "(5,null,null,'3')",
                        "(1,'22',(1,'2'),null)",
                        "(5,'33',(5,'3'),null)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example 12
Source File: VespaQueryTest.java    From vespa with Apache License 2.0 5 votes vote down vote up
private void runQueryTest(String script, MockQueryHandler queryHandler, int port) throws Exception {
    final String endpoint = "http://localhost:" + port;

    HttpServer server = HttpServer.create(new InetSocketAddress(port), 0);
    server.createContext("/", queryHandler);
    server.start();

    PigServer ps = setup(script, endpoint);

    Iterator<Tuple> recommendations = ps.openIterator("recommendations");
    while (recommendations.hasNext()) {
        Tuple tuple = recommendations.next();

        String userid = (String) tuple.get(0);
        Integer rank = (Integer) tuple.get(1);
        String docid = (String) tuple.get(2);
        Double relevance = (Double) tuple.get(3);
        String fieldId = (String) tuple.get(4);
        String fieldContent = (String) tuple.get(5);

        MockQueryHandler.MockQueryHit hit = queryHandler.getHit(userid, rank);
        assertEquals(docid, hit.id);
        assertEquals(relevance, hit.relevance, 1e-3);
        assertEquals(fieldId, hit.fieldId);
        assertEquals(fieldContent, hit.fieldContent);
    }

    if (server != null) {
        server.stop(0);
    }

}
 
Example 13
Source File: TestPredeployedJar.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testPredeployedJar() throws IOException, ClassNotFoundException {
    Logger logger = Logger.getLogger(JobControlCompiler.class);
    logger.removeAllAppenders();
    logger.setLevel(Level.INFO);
    SimpleLayout layout = new SimpleLayout();
    File logFile = File.createTempFile("log", "");
    FileAppender appender = new FileAppender(layout, logFile.toString(), false, false, 0);
    logger.addAppender(appender);

    PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getConfiguration());
    pigServer.getPigContext().getProperties().put(PigConfiguration.PIG_OPT_FETCH, "false");
    String[] inputData = new String[] { "hello", "world" };
    Util.createInputFile(cluster, "a.txt", inputData);
    String jodaTimeJar = JarManager.findContainingJar(org.joda.time.DateTime.class);

    pigServer.registerQuery("a = load 'a.txt' as (line:chararray);");
    Iterator<Tuple> it = pigServer.openIterator("a");

    String content = FileUtils.readFileToString(logFile);
    Assert.assertTrue(content.contains(jodaTimeJar));

    logFile = File.createTempFile("log", "");

    // Now let's mark the guava jar as predeployed.
    pigServer.getPigContext().markJarAsPredeployed(jodaTimeJar);
    it = pigServer.openIterator("a");

    content = FileUtils.readFileToString(logFile);
    Assert.assertFalse(content.contains(jodaTimeJar));
}
 
Example 14
Source File: TestUnion.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testCastingAfterUnionWithMultipleLoadersSameCaster()
    throws Exception {
    // Fields coming from different loaders but
    // having the same LoadCaster.
    File f1 = Util.createInputFile("tmp", "i1.txt", new String[] {"1\ta","2\tb","3\tc"});
    PigServer ps = new PigServer(ExecType.LOCAL, new Properties());
    // PigStorage and PigStorageWithStatistics have the same
    // LoadCaster(== Utf8StorageConverter)
    ps.registerQuery("A = load '" + Util.encodeEscape(f1.getAbsolutePath()) + "' as (a:bytearray, b:bytearray);");
    ps.registerQuery("B = load '" + Util.encodeEscape(f1.getAbsolutePath()) +
      "' using org.apache.pig.test.PigStorageWithStatistics() as (a:bytearray, b:bytearray);");
    ps.registerQuery("C = union onschema A,B;");
    ps.registerQuery("D = foreach C generate (int)a as a,(chararray)b as b;");
    // 'a' is coming from A and 'b' is coming from B; No overlaps.

    Schema dumpSchema = ps.dumpSchema("D");
    Schema expected = new Schema ();
    expected.add(new Schema.FieldSchema("a", DataType.INTEGER));
    expected.add(new Schema.FieldSchema("b", DataType.CHARARRAY));
    assertEquals(expected, dumpSchema);

    Iterator<Tuple> itr = ps.openIterator("D");
    int recordCount = 0;
    while(itr.next() != null)
        ++recordCount;
    assertEquals(6, recordCount);

}
 
Example 15
Source File: TestPigServerWithMacros.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testRegisterRemoteMacro() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), cluster.getProperties());

    String macroName = "util.pig";
    File macroFile = File.createTempFile("tmp", "");
    PrintWriter pw = new PrintWriter(new FileWriter(macroFile));
    pw.println("DEFINE row_count(X) RETURNS Z { Y = group $X all; $Z = foreach Y generate COUNT($X); };");
    pw.close();

    FileSystem fs = cluster.getFileSystem();
    fs.copyFromLocalFile(new Path(macroFile.getAbsolutePath()), new Path(macroName));

    // find the absolute path for the directory so that it does not
    // depend on configuration
    String absPath = fs.getFileStatus(new Path(macroName)).getPath().toString();

    Util.createInputFile(cluster, "testRegisterRemoteMacro_input", new String[]{"1", "2"});

    pig.registerQuery("import '" + absPath + "';");
    pig.registerQuery("a = load 'testRegisterRemoteMacro_input';");
    pig.registerQuery("b = row_count(a);");
    Iterator<Tuple> iter = pig.openIterator("b");

    assertEquals(2L, ((Long)iter.next().get(0)).longValue());

    pig.shutdown();
}
 
Example 16
Source File: TestUDFContext.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testUDFContext() throws Exception {
    File a = Util.createLocalInputFile("a.txt", new String[] { "dumb" });
    File b = Util.createLocalInputFile("b.txt", new String[] { "dumber" });
    FileLocalizer.deleteTempFiles();
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] statement = { "A = LOAD '" + Util.encodeEscape(a.getAbsolutePath()) +
            "' USING org.apache.pig.test.utils.UDFContextTestLoader('joe');",
        "B = LOAD '" + Util.encodeEscape(b.getAbsolutePath()) +
        "' USING org.apache.pig.test.utils.UDFContextTestLoader('jane');",
        "C = union A, B;",
        "D = FOREACH C GENERATE $0, $1, org.apache.pig.test.utils.UDFContextTestEvalFunc($0), " +
        "org.apache.pig.test.utils.UDFContextTestEvalFunc2($0);" };

    File tmpFile = File.createTempFile("temp_jira_851", ".pig");
    FileWriter writer = new FileWriter(tmpFile);
    for (String line : statement) {
        writer.write(line + "\n");
    }
    writer.close();

    pig.registerScript(tmpFile.getAbsolutePath());
    Iterator<Tuple> iterator = pig.openIterator("D");
    while (iterator.hasNext()) {
        Tuple tuple = iterator.next();
        if ("dumb".equals(tuple.get(0).toString())) {
            assertEquals(tuple.get(1).toString(), "joe");
        } else if ("dumber".equals(tuple.get(0).toString())) {
            assertEquals(tuple.get(1).toString(), "jane");
        }
    	assertEquals(Integer.valueOf(tuple.get(2).toString()), new Integer(5));
    	assertEquals(tuple.get(3).toString(), "five");
    }
}
 
Example 17
Source File: TestPigScriptParser.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefineUDF() throws Exception {
    PigServer ps = new PigServer(ExecType.LOCAL);
    String inputData[] = {
            "dshfdskfwww.xyz.com/sportsjoadfjdslpdshfdskfwww.xyz.com/sportsjoadfjdsl" ,
            "kas;dka;sd" ,
            "jsjsjwww.xyz.com/sports" ,
            "jsdLSJDcom/sports" ,
            "wwwJxyzMcom/sports"
    };
    File f = Util.createFile(inputData);

    String[] queryLines = new String[] {
            // the reason we have 4 backslashes below is we really want to put two backslashes but
            // since this is to be represented in a Java String, we escape each backslash with one more
            // backslash - hence 4. In a pig script in a file, this would be
            // www\\.xyz\\.com
            "define minelogs org.apache.pig.test.RegexGroupCount('www\\\\.xyz\\\\.com/sports');" ,
            "A = load '" + Util.generateURI(f.getAbsolutePath(), ps.getPigContext()) + "'  using PigStorage() as (source : chararray);" ,
            "B = foreach A generate minelogs(source) as sportslogs;" };
    for (String line : queryLines) {
        ps.registerQuery(line);
    }
    Iterator<Tuple> it = ps.openIterator("B");
    int[] expectedResults = new int[] {2,0,1,0,0};
    int i = 0;
    while (it.hasNext()) {
        Tuple t = it.next();
        assertEquals(expectedResults[i++], t.get(0));
    }
}
 
Example 18
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testYearMonthDayHourPartitionedFilesWithProjection() throws IOException {
    int count = 0;

    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(yearMonthDayHourPartitionedDir.getAbsolutePath())
            + "' using " + funcSpecString + ";");
    server.registerQuery("f = FILTER a by year=='2010';");
    server.registerQuery("b = foreach f generate f1,f2;");

    Iterator<Tuple> result = server.openIterator("b");

    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(2, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(240, count);

}
 
Example 19
Source File: TestCombiner.java    From spork with Apache License 2.0 4 votes vote down vote up
@Test
public void testDistinctAggs1() throws Exception {
    // test the use of combiner for distinct aggs:
    String input[] = {
                    "pig1\t18\t2.1",
                    "pig2\t24\t3.3",
                    "pig5\t45\t2.4",
                    "pig1\t18\t2.1",
                    "pig1\t19\t2.1",
                    "pig2\t24\t4.5",
                    "pig1\t20\t3.1" };

    Util.createInputFile(cluster, "distinctAggs1Input.txt", input);
    PigServer pigServer = new PigServer(cluster.getExecType(), properties);
    pigServer.registerQuery("a = load 'distinctAggs1Input.txt' as (name:chararray, age:int, gpa:double);");
    pigServer.registerQuery("b = group a by name;");
    pigServer.registerQuery("c = foreach b  {" +
            "        x = distinct a.age;" +
            "        y = distinct a.gpa;" +
            "        z = distinct a;" +
            "        generate group, COUNT(x), SUM(x.age), SUM(y.gpa), SUM(a.age), " +
            "                       SUM(a.gpa), COUNT(z.age), COUNT(z), SUM(z.age);};");

    // make sure there is a combine plan in the explain output
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PrintStream ps = new PrintStream(baos);
    pigServer.explain("c", ps);
    assertTrue(baos.toString().matches("(?si).*combine plan.*"));

    HashMap<String, Object[]> results = new HashMap<String, Object[]>();
    results.put("pig1", new Object[] { "pig1", 3L, 57L, 5.2, 75L, 9.4, 3L, 3L, 57L });
    results.put("pig2", new Object[] { "pig2", 1L, 24L, 7.8, 48L, 7.8, 2L, 2L, 48L });
    results.put("pig5", new Object[] { "pig5", 1L, 45L, 2.4, 45L, 2.4, 1L, 1L, 45L });
    Iterator<Tuple> it = pigServer.openIterator("c");
    while (it.hasNext()) {
        Tuple t = it.next();
        List<Object> fields = t.getAll();
        Object[] expected = results.get(fields.get(0));
        int i = 0;
        for (Object field : fields) {
            assertEquals(expected[i++], field);
        }
    }
    Util.deleteFile(cluster, "distinctAggs1Input.txt");
    pigServer.shutdown();
}
 
Example 20
Source File: TestBZip.java    From spork with Apache License 2.0 4 votes vote down vote up
/**
 * Tests the end-to-end writing and reading of an empty BZip file.
 */
 @Test
 public void testEmptyBzipInPig() throws Exception {
    PigServer pig = new PigServer(cluster.getExecType(), properties);

    File in = File.createTempFile("junit", ".tmp");
    in.deleteOnExit();

    File out = File.createTempFile("junit", ".bz2");
    out.delete();
    String clusterOutputFilePath = Util.removeColon(out.getAbsolutePath());

    FileOutputStream fos = new FileOutputStream(in);
    fos.write("55\n".getBytes());
    fos.close();
    System.out.println(in.getAbsolutePath());

    pig.registerQuery("AA = load '"
            + Util.generateURI(in.getAbsolutePath(), pig.getPigContext())
            + "';");
    pig.registerQuery("A=foreach (group (filter AA by $0 < '0') all) generate flatten($1);");
    pig.registerQuery("store A into '" + Util.encodeEscape(clusterOutputFilePath) + "';");
    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path(clusterOutputFilePath),
            Util.getSuccessMarkerPathFilter());
    FSDataInputStream is = fs.open(outputFiles[0].getPath());
    CBZip2InputStream cis = new CBZip2InputStream(is, -1, out.length());

    // Just a sanity check, to make sure it was a bzip file; we
    // will do the value verification later
    assertEquals(-1, cis.read(new byte[100]));
    cis.close();

    pig.registerQuery("B = load '" + Util.encodeEscape(clusterOutputFilePath) + "';");
    pig.openIterator("B");

    in.delete();
    Util.deleteFile(cluster, clusterOutputFilePath);

}