org.apache.pig.PigServer Java Examples

The following examples show how to use org.apache.pig.PigServer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test UNION ONSCHEMA with cast from bytearray to another type
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaCastOnByteArray() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i, j);"
        + " f1 = foreach l1 generate (int)i, (int)j;"
        + "u = union onschema f1, l1;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    Iterator<Tuple> it = pig.openIterator("u");
    
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,2)",
                        "(5,3)",
                        "(1,2)",
                        "(5,3)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #2
Source File: TestBlackAndWhitelistValidator.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPreprocessorCommand3() throws Exception {
    try {
        ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "Define");
        PigServer pigServer = new PigServer(ctx);
        Data data = resetData(pigServer);

        data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"),
                tuple("c", 3, "d"));

        StringBuilder script = new StringBuilder();
        script.append("set io.sort.mb 1000;")
        .append("DEFINE UrlDecode InvokeForString('java.net.URLDecoder.decode', 'String String');  ")
                .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);")
                .append("B = order A by f1,f2,f3 DESC;")
                .append("STORE B INTO 'bar' USING mock.Storage();");

        pigServer.registerScript(IOUtils.toInputStream(script));
        fail();
    } catch (Exception e) {
        Util.assertExceptionAndMessage(FrontendException.class, e,
                "Error during parsing. DEFINE command is not permitted. ");
    }
}
 
Example #3
Source File: TestSequenceFileLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  pigServer = new PigServer(LOCAL);
  File tmpFile = File.createTempFile("test", ".txt");
  tmpFileName = tmpFile.getAbsolutePath();
  System.err.println("fileName: "+tmpFileName);
  Path path = new Path("file:///"+tmpFileName);
  JobConf conf = new JobConf();
  FileSystem fs = FileSystem.get(path.toUri(), conf);

  IntWritable key = new IntWritable();
  Text value = new Text();
  SequenceFile.Writer writer = null;
  try {
    writer = SequenceFile.createWriter(fs, conf, path,
                                       key.getClass(), value.getClass());
    for (int i=0; i < DATA.length; i++) {
      key.set(i);
      value.set(DATA[i]);
      writer.append(key, value);
    }
  } finally {
    IOUtils.closeStream(writer);
  }
}
 
Example #4
Source File: TestDefaultDateTimeZone.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testDST() throws Exception {
    String defaultDTZ = "America/New_York"; // a timezone that uses DST
	Properties config = new Properties();
    config.setProperty("pig.datetime.default.tz", defaultDTZ);
    PigServer pig = new PigServer(Util.getLocalTestMode(), config);
    pig.registerQuery("a = load '"
            + Util.encodeEscape(Util.generateURI(tmpFile.toString(), pig.getPigContext()))
            + "' as (test:datetime);");
    pig.registerQuery("b = filter a by test > ToDate('2014-01-01T00:00:00.000');");
    pig.registerQuery("c = foreach b generate ToString(test, 'Z') as tz;");
    Iterator<Tuple> actualItr = pig.openIterator("c");

    Tuple est = actualItr.next();
    assertEquals(Util.buildTuple("-0500"), est);
    Tuple edt = actualItr.next();
    assertEquals(Util.buildTuple("-0400"), edt);
}
 
Example #5
Source File: Util.java    From spork with Apache License 2.0 6 votes vote down vote up
static public void copyFromLocalToLocal(String fromLocalFileName,
        String toLocalFileName) throws IOException {
    if(Util.WINDOWS){
        fromLocalFileName = fromLocalFileName.replace('\\','/');
        toLocalFileName = toLocalFileName.replace('\\','/');
    }
    PigServer ps = new PigServer(ExecType.LOCAL, new Properties());
    String script = getMkDirCommandForHadoop2_0(toLocalFileName) + "fs -cp " + fromLocalFileName + " " + toLocalFileName;

    new File(toLocalFileName).deleteOnExit();

    GruntParser parser = new GruntParser(new StringReader(script), ps);
    parser.setInteractive(false);
    try {
        parser.parseStopOnError();
    } catch (org.apache.pig.tools.pigscript.parser.ParseException e) {
        throw new IOException(e);
    }

}
 
Example #6
Source File: TestXMLLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
public void testXMLLoaderShouldReturnValidXML() throws Exception {
  String filename = TestHelper.createTempFile(inlineClosedTags, "");
  PigServer pig = new PigServer(LOCAL);
  filename = filename.replace("\\", "\\\\");
  String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('event') as (doc:chararray);";
  pig.registerQuery(query);
  Iterator<?> it = pig.openIterator("A");
  while (it.hasNext()) {
      Tuple tuple = (Tuple) it.next();
      if (tuple == null)
          break;
      else {
          // Test it returns a valid XML
          DocumentBuilder docBuilder =
                  DocumentBuilderFactory.newInstance().newDocumentBuilder();
          docBuilder.parse(new ByteArrayInputStream(((String)tuple.get(0)).getBytes()));
      }
  }
}
 
Example #7
Source File: TestBlackAndWhitelistValidator.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPreprocessorCommands2() throws Exception {
    try {
        ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "dEfaUlt");
        PigServer pigServer = new PigServer(ctx);
        Data data = resetData(pigServer);

        data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"),
                tuple("c", 3, "d"));

        StringBuilder script = new StringBuilder();
        script.append("set io.sort.mb 1000;")
        .append("%Default input 'foo';")
                .append("A = LOAD '$input' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);")
                .append("B = order A by f1,f2,f3 DESC;")
                .append("STORE B INTO 'bar' USING mock.Storage();");

        pigServer.registerScript(IOUtils.toInputStream(script));
        fail();
    } catch (Exception e) {
        // We check RuntimeException here and not FrontendException as Pig wraps the error from Preprocessor
        // within RuntimeException
        Util.assertExceptionAndMessage(RuntimeException.class, e,
                "DEFAULT command is not permitted. ");
    }
}
 
Example #8
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testDatePartitionedFiles() throws IOException {
    int count = 0;

    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'"
            + ", '" + startingDate + ":" + endingDate + "')";

    System.out.println(funcSpecString);

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using "
            + funcSpecString + ";");
    Iterator<Tuple> result = server.openIterator("a");

    while ((result.next()) != null) {
        count++;
    }

    Assert.assertEquals(datePartitionedRowCount, count);
}
 
Example #9
Source File: TestCombinedLogLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testLoadFromPigServer() throws Exception {
    String filename = TestHelper.createTempFile(data, " ");
    PigServer pig = new PigServer(ExecType.LOCAL);
    filename = filename.replace("\\", "\\\\");
    pig.registerQuery("A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader();");
    Iterator<?> it = pig.openIterator("A");

    int tupleCount = 0;

    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        else {
            TestHelper.examineTuple(EXPECTED, tuple, tupleCount);
            tupleCount++;
        }
    }
    assertEquals(data.size(), tupleCount);
}
 
Example #10
Source File: TestProjectStarExpander.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
        + "f = foreach l1 generate * as (aa, bb, cc), *;"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString(
            "aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,10,20,30)",
                        "(11,21,31,11,21,31)",
                });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example #11
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPigScriptEmptyBagForBagToStringUDF() throws Exception {
	PigServer pigServer = new PigServer(ExecType.LOCAL);
	Data data = resetData(pigServer);

    data.set("foo", "myBag:bag{t:(l:chararray)}",
			tuple(bag()));

	pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();");
	pigServer.registerQuery("B = FOREACH A GENERATE BagToString(myBag) as myBag;");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");

    List<Tuple> out = data.get("bar");
    // empty bag will generate empty string
    assertEquals(tuple(""), out.get(0));

}
 
Example #12
Source File: TestGrunt.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testRegisterScripts() throws Throwable {
    String[] script = {
            "#!/usr/bin/python",
            "@outputSchema(\"x:{t:(num:long)}\")",
            "def square(number):" ,
            "\treturn (number * number)"
    };

    Util.createLocalInputFile( "testRegisterScripts.py", script);

    PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties());
    PigContext context = server.getPigContext();

    String strCmd = "register testRegisterScripts.py using jython as pig\n";

    ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
    InputStreamReader reader = new InputStreamReader(cmd);

    Grunt grunt = new Grunt(new BufferedReader(reader), context);

    grunt.exec();
    assertTrue(context.getFuncSpecFromAlias("pig.square") != null);

}
 
Example #13
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadingSingleFileNoProjections() throws IOException {
    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString
            + ";");

    Iterator<Tuple> result = server.openIterator("a");

    int count = 0;
    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(3, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(simpleRowCount, count);
}
 
Example #14
Source File: TestBlackAndWhitelistValidator.java    From spork with Apache License 2.0 6 votes vote down vote up
/**
 * Tests the blacklist filter. We blacklist "set" and make sure this test
 * throws a {@link FrontendException}
 * 
 * @throws Exception
 */
@Test
public void testBlacklist() throws Exception {
    try {
        ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "set");
        PigServer pigServer = new PigServer(ctx);
        Data data = resetData(pigServer);

        data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"),
                tuple("c", 3, "d"));

        StringBuilder script = new StringBuilder();
        script.append("set io.sort.mb 1000;")
                .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);")
                .append("B = order A by f1,f2,f3 DESC;")
                .append("STORE B INTO 'bar' USING mock.Storage();");

        pigServer.registerScript(IOUtils.toInputStream(script));
        fail();
    } catch (Exception e) {
        Util.assertExceptionAndMessage(FrontendException.class, e,
                "SET command is not permitted. ");
    }
}
 
Example #15
Source File: TestXMLLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
public void testShouldReturn0TupleCountIfEmptyFileIsPassed() throws Exception {
   // modify the data content to avoid end tag for </ignoreProperty>
   ArrayList<String[]> testData = new ArrayList<String[]>();

   String filename = TestHelper.createTempFile(testData, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('ignoreProperty') as (doc:chararray);";
   pig.registerQuery(query);
   Iterator<?> it = pig.openIterator("A");
   int tupleCount = 0;
   while (it.hasNext()) {
       Tuple tuple = (Tuple) it.next();
       if (tuple == null)
           break;
       else {
           if (tuple.size() > 0) {
               tupleCount++;
           }
       }
   }
   assertEquals(0, tupleCount);
}
 
Example #16
Source File: TestBuiltInBagToTupleOrString.java    From spork with Apache License 2.0 6 votes vote down vote up
@Test
public void testPigScriptNestedTupleForBagToStringUDF() throws Exception {
	PigServer pigServer = new PigServer(ExecType.LOCAL);
	Data data = resetData(pigServer);

    Tuple nestedTuple = tuple(bag(tuple("c"), tuple("d")));
    data.set("foo", "myBag:bag{t:(l:chararray)}",
			tuple(bag(tuple("a"), tuple("b"), nestedTuple, tuple("e"))));

	pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();");
	pigServer.registerQuery("B = FOREACH A GENERATE BagToString(myBag) as myBag;");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");

    List<Tuple> out = data.get("bar");
    assertEquals(tuple("a_b_{(c),(d)}_e"), out.get(0));

}
 
Example #17
Source File: TestSummary.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testPigScript() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL);
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < 1002; i++) {
    list.add(t("a", "b" + i, 1l, b(t("a", m("foo", "bar")))));
  }
  data.set("in", "a:chararray, a1:chararray, b:int, c:{t:(a2:chararray, b2:[])}", list);
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE "+Summary.class.getName()+"(A);");
  pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();");
  System.out.println(data.get("out").get(0).get(0));
  TupleSummaryData s = SummaryData.fromJSON((String)data.get("out").get(0).get(0), TupleSummaryData.class);
  System.out.println(s);
}
 
Example #18
Source File: TestHadoopJobHistoryLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void testHadoopJHLoader() throws Exception {
    PigServer pig = new PigServer(ExecType.LOCAL);
    pig.registerQuery("a = load '" + INPUT_DIR 
            + "' using org.apache.pig.piggybank.storage.HadoopJobHistoryLoader() " 
            + "as (j:map[], m:map[], r:map[]);");
    Iterator<Tuple> iter = pig.openIterator("a");
    
    assertTrue(iter.hasNext());
    
    Tuple t = iter.next();
    
    Map<String, Object> job = (Map<String, Object>)t.get(0);
    
    assertEquals("3eb62180-5473-4301-aa22-467bd685d466", (String)job.get("PIG_SCRIPT_ID"));
    assertEquals("job_201004271216_9998", (String)job.get("JOBID"));
    assertEquals("job_201004271216_9995", (String)job.get("PIG_JOB_PARENTS"));
    assertEquals("0.8.0-dev", (String)job.get("PIG_VERSION"));
    assertEquals("0.20.2", (String)job.get("HADOOP_VERSION"));
    assertEquals("d", (String)job.get("PIG_JOB_ALIAS"));
    assertEquals("PigLatin:Test.pig", job.get("JOBNAME"));
    assertEquals("ORDER_BY", (String)job.get("PIG_JOB_FEATURE"));
    assertEquals("1", (String)job.get("TOTAL_MAPS"));
    assertEquals("1", (String)job.get("TOTAL_REDUCES"));              
}
 
Example #19
Source File: TestBuiltin.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * End-to-end testing of the CONCAT() builtin function for vararg parameters
 * @throws Exception
 */
@Test
public void testComplexMultiCONCAT() throws Exception {
    String input = "vararg_concat_test_jira_3444.txt";
    Util.createLocalInputFile(input, new String[]{"dummy"});
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    pigServer.registerQuery("A = LOAD '"+input+"' as (x:chararray);");
    
    pigServer.registerQuery("B = foreach A generate CONCAT('a', CONCAT('b',CONCAT('c','d')));");
    Iterator<Tuple> its = pigServer.openIterator("B");
    Tuple t = its.next();
    assertEquals("abcd",t.get(0));
    
    pigServer.registerQuery("B = foreach A generate CONCAT('a', 'b', 'c', 'd');");
    its = pigServer.openIterator("B");
    t = its.next();
    assertEquals("abcd",t.get(0));
    
    pigServer.registerQuery("B = foreach A generate CONCAT('a', CONCAT('b','c'), 'd');");
    its = pigServer.openIterator("B");
    t = its.next();
    assertEquals("abcd",t.get(0));

    // Concat on a null value returns null
    pigServer.registerQuery("B = foreach A generate CONCAT('a', CONCAT('b',Null), 'd');");
    its = pigServer.openIterator("B");
    t = its.next();
    assertNull(t.get(0));
}
 
Example #20
Source File: TestExampleGenerator.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testForeach() throws ExecException, IOException {
    PigServer pigServer = new PigServer(pigContext);

    pigServer.registerQuery("A = load " + A
            + " using PigStorage() as (x : int, y : int);");
    pigServer.registerQuery("B = foreach A generate x + y as sum;");

    Map<Operator, DataBag> derivedData = pigServer.getExamples("B");

    assertNotNull(derivedData);
}
 
Example #21
Source File: TestFRJoin2.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSoftLinkDoesNotCreateUnnecessaryConcatJob()
              throws Exception {
    PigServer pigServer = new PigServer(ExecType.MAPREDUCE,
                                  cluster.getProperties());

    pigServer.setBatchOn();
    pigServer.getPigContext().getProperties().setProperty(
              MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(FILE_MERGE_THRESHOLD));
    pigServer.getPigContext().getProperties().setProperty("pig.noSplitCombination", "false");
    String query = "A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);"
                   + "B = group A all;"
                   + "C = LOAD '" + INPUT_FILE + "' as (x:int,y:int);"
                   + "D = group C by x;"
                   + "E = group D all;"
                   + "F = FOREACH E generate B.$0;"
                   + "Z = LOAD '" + INPUT_FILE + "' as (x:int,y:int);"
                   + "Y = FOREACH E generate F.$0;"
                   + "STORE Y into '/tmp/output2';";
    MROperPlan mrplan = Util.buildMRPlanWithOptimizer(Util.buildPp(pigServer, query),pigServer.getPigContext());

    // look for concat job
    for(MapReduceOper mrOp: mrplan) {
        //concatjob == map-plan load-store && reudce-plan empty
        if( mrOp.mapPlan.size() == 2 && mrOp.reducePlan.isEmpty() ) {
            fail("Somehow concatjob was created even though there is no large or multiple inputs.");
        }
    }
}
 
Example #22
Source File: TestBlackAndWhitelistValidator.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test deleteFile fails if its disallowed via the blacklist
 */
@Test(expected = FrontendException.class)
public void testBlacklistRemoveWithPigServer() throws Exception {
    ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "rm");
    PigServer pigServer = new PigServer(ctx);

    pigServer.deleteFile("foo");
}
 
Example #23
Source File: TestExampleGenerator.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testFilter2() throws Exception {

    PigServer pigserver = new PigServer(pigContext);

    String query = "A = load " + A
            + " using PigStorage() as (x : int, y : int);\n";
    pigserver.registerQuery(query);
    query = "B = filter A by x > 5 AND y < 6;";
    pigserver.registerQuery(query);
    Map<Operator, DataBag> derivedData = pigserver.getExamples("B");

    assertNotNull(derivedData);
}
 
Example #24
Source File: TestTypedMap.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimpleMapKeyLookup() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL, new Properties());
    String[] input = {
            "[key#1,key2#2]",
            "[key#2]",
    };

    Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testSimpleMapKeyLookup", input);

    String query =
        "a = load '" + tmpDirName + "/testSimpleMapKeyLookup' as (m:map[int]);" +
        "b = foreach a generate m#'key';";
    Util.registerMultiLineQuery(pig, query);
    Schema sch = pig.dumpSchema("b");
    assertEquals("Checking expected schema",sch.toString(), "{int}");
    Iterator<Tuple> it = pig.openIterator("b");

    Assert.assertTrue(it.hasNext());
    Tuple t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue((Integer)t.get(0)==1);

    Assert.assertTrue(it.hasNext());
    t = it.next();
    Assert.assertTrue(t.size()==1);
    Assert.assertTrue((Integer)t.get(0)==2);

    Assert.assertFalse(it.hasNext());
}
 
Example #25
Source File: TestPigServer.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeFilter() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = filter a by field1 > 10;") ;
    Schema dumpedSchema = pig.dumpSchema("b") ;
    Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray");
    assertEquals(expectedSchema, dumpedSchema);
}
 
Example #26
Source File: TestUnionOnSchema.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Test UNION ONSCHEMA with input relation having udfs
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaInputUdfs() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);"
        + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);"
        + "f1 = foreach l1 generate i, CONCAT(j,j) as cj, " +
        		"org.apache.pig.test.TestUnionOnSchema\\$UDFTupleNullSchema(i,j) as uo;"
        + "u = union onschema f1, l2;"
    ; 
    Util.registerMultiLineQuery(pig, query);

    Schema sch = pig.dumpSchema("u");
    String expectedSch = "{i: int,cj: chararray,uo: (),j: chararray}";
    Assert.assertTrue( expectedSch.equals( sch.toString() ) );
    

    Iterator<Tuple> it = pig.openIterator("u");
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,null,null,'2')",
                        "(5,null,null,'3')",
                        "(1,'22',(1,'2'),null)",
                        "(5,'33',(5,'3'),null)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #27
Source File: TestExampleGenerator.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testForEachNestedBlock2() throws Exception {
    PigServer pigServer = new PigServer(pigContext);
    pigServer.registerQuery("A = load " + A.toString() + " as (x:int, y:int);");
    pigServer.registerQuery("B = group A by x;");
    pigServer.registerQuery("C = foreach B { FA = filter A by y == 6; DA = DISTINCT FA; generate group, COUNT(DA);};");
    Map<Operator, DataBag> derivedData = pigServer.getExamples("C");

    assertNotNull(derivedData);

}
 
Example #28
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Test
public void testShouldStoreBagAsHiveArray() throws IOException, InterruptedException, SerDeException {
    String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";
    String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();
    File outputFile = new File("testhiveColumnarStore");

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString
            + ";");
    server.registerQuery("b = FOREACH a GENERATE f1, TOBAG(f2,f3);");

    //when
    server.store("b", outputFile.getAbsolutePath(), storeString);

    //then
    Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc");

    ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>");

    assertEquals(2, struct.getFieldsAsList().size());
    Object o =  struct.getField(0);
    assertEquals(LazyString.class, o.getClass());
    o =  struct.getField(1);
    assertEquals(LazyArray.class, o.getClass());

    LazyArray arr = (LazyArray)o;
    List<Object> values = arr.getList();
    for(Object value : values) {
        assertEquals(LazyString.class, value.getClass());
        String valueStr =((LazyString) value).getWritableObject().toString();
        assertEquals("Sample value", valueStr);
    }

}
 
Example #29
Source File: TestRank1.java    From spork with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    pigServer = new PigServer(ExecType.LOCAL);

    data = resetData(pigServer);
    data.set("test01", tuple("A", 1, "N"), tuple("B", 2, "N"),
            tuple("C", 3, "M"), tuple("D", 4, "P"), tuple("E", 4, "Q"),
            tuple("E", 4, "Q"), tuple("F", 8, "Q"), tuple("F", 7, "Q"),
            tuple("F", 8, "T"), tuple("F", 8, "Q"), tuple("G", 10, "V"));

    data.set(
            "test02",
            tuple("Michael", "Blythe", 1, 1, 1, 1, 4557045.046, 98027),
            tuple("Linda", "Mitchell", 2, 1, 1, 1, 5200475.231, 98027),
            tuple("Jillian", "Carson", 3, 1, 1, 1, 3857163.633, 98027),
            tuple("Garrett", "Vargas", 4, 1, 1, 1, 1764938.986, 98027),
            tuple("Tsvi", "Reiter", 5, 1, 1, 2, 2811012.715, 98027),
            tuple("Shu", "Ito", 6, 6, 2, 2, 3018725.486, 98055),
            tuple("Jose", "Saraiva", 7, 6, 2, 2, 3189356.247, 98055),
            tuple("David", "Campbell", 8, 6, 2, 3, 3587378.426, 98055),
            tuple("Tete", "Mensa-Annan", 9, 6, 2, 3, 1931620.184, 98055),
            tuple("Lynn", "Tsoflias", 10, 6, 2, 3, 1758385.926, 98055),
            tuple("Rachel", "Valdez", 11, 6, 2, 4, 2241204.042, 98055),
            tuple("Jae", "Pak", 12, 6, 2, 4, 5015682.375, 98055),
            tuple("Ranjit", "Varkey Chudukatil", 13, 6, 2, 4,
                    3827950.238, 98055));
}
 
Example #30
Source File: BoundScript.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
 * Explain this pipeline.  Results will be printed to stdout.
 * @throws IOException if explain fails.
 */
public void explain() throws IOException {
    if (queries.isEmpty()) {
        LOG.info("No bound query to explain");
        return;
    }
    PigServer pigServer = new PigServer(scriptContext.getPigContext(), false);
    registerQuery(pigServer, queries.get(0));
    pigServer.explain(null, System.out);
}