Java Code Examples for org.apache.pig.PigServer

The following examples show how to use org.apache.pig.PigServer. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: spork   Author: sigmoidanalytics   File: TestSequenceFileLoader.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  pigServer = new PigServer(LOCAL);
  File tmpFile = File.createTempFile("test", ".txt");
  tmpFileName = tmpFile.getAbsolutePath();
  System.err.println("fileName: "+tmpFileName);
  Path path = new Path("file:///"+tmpFileName);
  JobConf conf = new JobConf();
  FileSystem fs = FileSystem.get(path.toUri(), conf);

  IntWritable key = new IntWritable();
  Text value = new Text();
  SequenceFile.Writer writer = null;
  try {
    writer = SequenceFile.createWriter(fs, conf, path,
                                       key.getClass(), value.getClass());
    for (int i=0; i < DATA.length; i++) {
      key.set(i);
      value.set(DATA[i]);
      writer.append(key, value);
    }
  } finally {
    IOUtils.closeStream(writer);
  }
}
 
Example #2
Source Project: spork   Author: sigmoidanalytics   File: TestDefaultDateTimeZone.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDST() throws Exception {
    String defaultDTZ = "America/New_York"; // a timezone that uses DST
	Properties config = new Properties();
    config.setProperty("pig.datetime.default.tz", defaultDTZ);
    PigServer pig = new PigServer(Util.getLocalTestMode(), config);
    pig.registerQuery("a = load '"
            + Util.encodeEscape(Util.generateURI(tmpFile.toString(), pig.getPigContext()))
            + "' as (test:datetime);");
    pig.registerQuery("b = filter a by test > ToDate('2014-01-01T00:00:00.000');");
    pig.registerQuery("c = foreach b generate ToString(test, 'Z') as tz;");
    Iterator<Tuple> actualItr = pig.openIterator("c");

    Tuple est = actualItr.next();
    assertEquals(Util.buildTuple("-0500"), est);
    Tuple edt = actualItr.next();
    assertEquals(Util.buildTuple("-0400"), edt);
}
 
Example #3
Source Project: spork   Author: sigmoidanalytics   File: TestBlackAndWhitelistValidator.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPreprocessorCommands2() throws Exception {
    try {
        ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "dEfaUlt");
        PigServer pigServer = new PigServer(ctx);
        Data data = resetData(pigServer);

        data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"),
                tuple("c", 3, "d"));

        StringBuilder script = new StringBuilder();
        script.append("set io.sort.mb 1000;")
        .append("%Default input 'foo';")
                .append("A = LOAD '$input' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);")
                .append("B = order A by f1,f2,f3 DESC;")
                .append("STORE B INTO 'bar' USING mock.Storage();");

        pigServer.registerScript(IOUtils.toInputStream(script));
        fail();
    } catch (Exception e) {
        // We check RuntimeException here and not FrontendException as Pig wraps the error from Preprocessor
        // within RuntimeException
        Util.assertExceptionAndMessage(RuntimeException.class, e,
                "DEFAULT command is not permitted. ");
    }
}
 
Example #4
Source Project: spork   Author: sigmoidanalytics   File: TestGrunt.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRegisterScripts() throws Throwable {
    String[] script = {
            "#!/usr/bin/python",
            "@outputSchema(\"x:{t:(num:long)}\")",
            "def square(number):" ,
            "\treturn (number * number)"
    };

    Util.createLocalInputFile( "testRegisterScripts.py", script);

    PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties());
    PigContext context = server.getPigContext();

    String strCmd = "register testRegisterScripts.py using jython as pig\n";

    ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
    InputStreamReader reader = new InputStreamReader(cmd);

    Grunt grunt = new Grunt(new BufferedReader(reader), context);

    grunt.exec();
    assertTrue(context.getFuncSpecFromAlias("pig.square") != null);

}
 
Example #5
Source Project: spork   Author: sigmoidanalytics   File: TestBlackAndWhitelistValidator.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests the blacklist filter. We blacklist "set" and make sure this test
 * throws a {@link FrontendException}
 * 
 * @throws Exception
 */
@Test
public void testBlacklist() throws Exception {
    try {
        ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "set");
        PigServer pigServer = new PigServer(ctx);
        Data data = resetData(pigServer);

        data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"),
                tuple("c", 3, "d"));

        StringBuilder script = new StringBuilder();
        script.append("set io.sort.mb 1000;")
                .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);")
                .append("B = order A by f1,f2,f3 DESC;")
                .append("STORE B INTO 'bar' USING mock.Storage();");

        pigServer.registerScript(IOUtils.toInputStream(script));
        fail();
    } catch (Exception e) {
        Util.assertExceptionAndMessage(FrontendException.class, e,
                "SET command is not permitted. ");
    }
}
 
Example #6
Source Project: spork   Author: sigmoidanalytics   File: TestXMLLoader.java    License: Apache License 2.0 6 votes vote down vote up
public void testShouldReturn0TupleCountIfEmptyFileIsPassed() throws Exception {
   // modify the data content to avoid end tag for </ignoreProperty>
   ArrayList<String[]> testData = new ArrayList<String[]>();

   String filename = TestHelper.createTempFile(testData, "");
   PigServer pig = new PigServer(LOCAL);
   filename = filename.replace("\\", "\\\\");
   String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('ignoreProperty') as (doc:chararray);";
   pig.registerQuery(query);
   Iterator<?> it = pig.openIterator("A");
   int tupleCount = 0;
   while (it.hasNext()) {
       Tuple tuple = (Tuple) it.next();
       if (tuple == null)
           break;
       else {
           if (tuple.size() > 0) {
               tupleCount++;
           }
       }
   }
   assertEquals(0, tupleCount);
}
 
Example #7
Source Project: parquet-mr   Author: apache   File: TestSummary.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPigScript() throws Exception {
  PigServer pigServer = new PigServer(ExecType.LOCAL);
  Data data = Storage.resetData(pigServer);
  List<Tuple> list = new ArrayList<Tuple>();
  for (int i = 0; i < 1002; i++) {
    list.add(t("a", "b" + i, 1l, b(t("a", m("foo", "bar")))));
  }
  data.set("in", "a:chararray, a1:chararray, b:int, c:{t:(a2:chararray, b2:[])}", list);
  pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();");
  pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE "+Summary.class.getName()+"(A);");
  pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();");
  System.out.println(data.get("out").get(0).get(0));
  TupleSummaryData s = SummaryData.fromJSON((String)data.get("out").get(0).get(0), TupleSummaryData.class);
  System.out.println(s);
}
 
Example #8
Source Project: spork   Author: sigmoidanalytics   File: TestHadoopJobHistoryLoader.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void testHadoopJHLoader() throws Exception {
    PigServer pig = new PigServer(ExecType.LOCAL);
    pig.registerQuery("a = load '" + INPUT_DIR 
            + "' using org.apache.pig.piggybank.storage.HadoopJobHistoryLoader() " 
            + "as (j:map[], m:map[], r:map[]);");
    Iterator<Tuple> iter = pig.openIterator("a");
    
    assertTrue(iter.hasNext());
    
    Tuple t = iter.next();
    
    Map<String, Object> job = (Map<String, Object>)t.get(0);
    
    assertEquals("3eb62180-5473-4301-aa22-467bd685d466", (String)job.get("PIG_SCRIPT_ID"));
    assertEquals("job_201004271216_9998", (String)job.get("JOBID"));
    assertEquals("job_201004271216_9995", (String)job.get("PIG_JOB_PARENTS"));
    assertEquals("0.8.0-dev", (String)job.get("PIG_VERSION"));
    assertEquals("0.20.2", (String)job.get("HADOOP_VERSION"));
    assertEquals("d", (String)job.get("PIG_JOB_ALIAS"));
    assertEquals("PigLatin:Test.pig", job.get("JOBNAME"));
    assertEquals("ORDER_BY", (String)job.get("PIG_JOB_FEATURE"));
    assertEquals("1", (String)job.get("TOTAL_MAPS"));
    assertEquals("1", (String)job.get("TOTAL_REDUCES"));              
}
 
Example #9
Source Project: spork   Author: sigmoidanalytics   File: TestHiveColumnarLoader.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadingSingleFileNoProjections() throws IOException {
    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')";

    String singlePartitionedFile = simpleDataFile.getAbsolutePath();

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString
            + ";");

    Iterator<Tuple> result = server.openIterator("a");

    int count = 0;
    Tuple t = null;
    while ((t = result.next()) != null) {
        assertEquals(3, t.size());
        assertEquals(DataType.CHARARRAY, t.getType(0));
        count++;
    }

    Assert.assertEquals(simpleRowCount, count);
}
 
Example #10
Source Project: spork   Author: sigmoidanalytics   File: TestBuiltInBagToTupleOrString.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPigScriptEmptyBagForBagToStringUDF() throws Exception {
	PigServer pigServer = new PigServer(ExecType.LOCAL);
	Data data = resetData(pigServer);

    data.set("foo", "myBag:bag{t:(l:chararray)}",
			tuple(bag()));

	pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();");
	pigServer.registerQuery("B = FOREACH A GENERATE BagToString(myBag) as myBag;");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");

    List<Tuple> out = data.get("bar");
    // empty bag will generate empty string
    assertEquals(tuple(""), out.get(0));

}
 
Example #11
Source Project: spork   Author: sigmoidanalytics   File: TestProjectStarExpander.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test projecting multiple *
 * @throws IOException
 * @throws ParseException
 */
@Test
public void testProjectStarMulti() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);"
        + "f = foreach l1 generate * as (aa, bb, cc), *;"
    ; 

    Util.registerMultiLineQuery(pig, query);
   
    Schema expectedSch = Utils.getSchemaFromString(
            "aa : int, bb : int, cc : int, a : int, b : int, c : int");
    Schema sch = pig.dumpSchema("f");
    assertEquals("Checking expected schema", expectedSch, sch);
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(10,20,30,10,20,30)",
                        "(11,21,31,11,21,31)",
                });
    Iterator<Tuple> it = pig.openIterator("f");
    Util.checkQueryOutputsAfterSort(it, expectedRes);
}
 
Example #12
Source Project: spork   Author: sigmoidanalytics   File: TestBlackAndWhitelistValidator.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPreprocessorCommand3() throws Exception {
    try {
        ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "Define");
        PigServer pigServer = new PigServer(ctx);
        Data data = resetData(pigServer);

        data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"),
                tuple("c", 3, "d"));

        StringBuilder script = new StringBuilder();
        script.append("set io.sort.mb 1000;")
        .append("DEFINE UrlDecode InvokeForString('java.net.URLDecoder.decode', 'String String');  ")
                .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);")
                .append("B = order A by f1,f2,f3 DESC;")
                .append("STORE B INTO 'bar' USING mock.Storage();");

        pigServer.registerScript(IOUtils.toInputStream(script));
        fail();
    } catch (Exception e) {
        Util.assertExceptionAndMessage(FrontendException.class, e,
                "Error during parsing. DEFINE command is not permitted. ");
    }
}
 
Example #13
Source Project: spork   Author: sigmoidanalytics   File: TestXMLLoader.java    License: Apache License 2.0 6 votes vote down vote up
public void testXMLLoaderShouldReturnValidXML() throws Exception {
  String filename = TestHelper.createTempFile(inlineClosedTags, "");
  PigServer pig = new PigServer(LOCAL);
  filename = filename.replace("\\", "\\\\");
  String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('event') as (doc:chararray);";
  pig.registerQuery(query);
  Iterator<?> it = pig.openIterator("A");
  while (it.hasNext()) {
      Tuple tuple = (Tuple) it.next();
      if (tuple == null)
          break;
      else {
          // Test it returns a valid XML
          DocumentBuilder docBuilder =
                  DocumentBuilderFactory.newInstance().newDocumentBuilder();
          docBuilder.parse(new ByteArrayInputStream(((String)tuple.get(0)).getBytes()));
      }
  }
}
 
Example #14
Source Project: spork   Author: sigmoidanalytics   File: TestCombinedLogLoader.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLoadFromPigServer() throws Exception {
    String filename = TestHelper.createTempFile(data, " ");
    PigServer pig = new PigServer(ExecType.LOCAL);
    filename = filename.replace("\\", "\\\\");
    pig.registerQuery("A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader();");
    Iterator<?> it = pig.openIterator("A");

    int tupleCount = 0;

    while (it.hasNext()) {
        Tuple tuple = (Tuple) it.next();
        if (tuple == null)
            break;
        else {
            TestHelper.examineTuple(EXPECTED, tuple, tupleCount);
            tupleCount++;
        }
    }
    assertEquals(data.size(), tupleCount);
}
 
Example #15
Source Project: spork   Author: sigmoidanalytics   File: TestBuiltInBagToTupleOrString.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testPigScriptNestedTupleForBagToStringUDF() throws Exception {
	PigServer pigServer = new PigServer(ExecType.LOCAL);
	Data data = resetData(pigServer);

    Tuple nestedTuple = tuple(bag(tuple("c"), tuple("d")));
    data.set("foo", "myBag:bag{t:(l:chararray)}",
			tuple(bag(tuple("a"), tuple("b"), nestedTuple, tuple("e"))));

	pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();");
	pigServer.registerQuery("B = FOREACH A GENERATE BagToString(myBag) as myBag;");
    pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();");

    List<Tuple> out = data.get("bar");
    assertEquals(tuple("a_b_{(c),(d)}_e"), out.get(0));

}
 
Example #16
Source Project: spork   Author: sigmoidanalytics   File: TestHiveColumnarLoader.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDatePartitionedFiles() throws IOException {
    int count = 0;

    String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'"
            + ", '" + startingDate + ":" + endingDate + "')";

    System.out.println(funcSpecString);

    PigServer server = new PigServer(ExecType.LOCAL);
    server.setBatchOn();
    server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader",
            new FuncSpec(funcSpecString));

    server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using "
            + funcSpecString + ";");
    Iterator<Tuple> result = server.openIterator("a");

    while ((result.next()) != null) {
        count++;
    }

    Assert.assertEquals(datePartitionedRowCount, count);
}
 
Example #17
Source Project: spork   Author: sigmoidanalytics   File: TestUnionOnSchema.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test UNION ONSCHEMA with cast from bytearray to another type
 * @throws IOException
 * @throws ParserException
 */
@Test
public void testUnionOnSchemaCastOnByteArray() throws IOException, ParserException {
    PigServer pig = new PigServer(ExecType.LOCAL);
    String query =
        "  l1 = load '" + INP_FILE_2NUMS + "' as (i, j);"
        + " f1 = foreach l1 generate (int)i, (int)j;"
        + "u = union onschema f1, l1;"
    ; 
    Util.registerMultiLineQuery(pig, query);
    Iterator<Tuple> it = pig.openIterator("u");
    
    List<Tuple> expectedRes = 
        Util.getTuplesFromConstantTupleStrings(
                new String[] {
                        "(1,2)",
                        "(5,3)",
                        "(1,2)",
                        "(5,3)"
                });
    Util.checkQueryOutputsAfterSort(it, expectedRes);

}
 
Example #18
Source Project: spork   Author: sigmoidanalytics   File: Util.java    License: Apache License 2.0 6 votes vote down vote up
static public void copyFromLocalToLocal(String fromLocalFileName,
        String toLocalFileName) throws IOException {
    if(Util.WINDOWS){
        fromLocalFileName = fromLocalFileName.replace('\\','/');
        toLocalFileName = toLocalFileName.replace('\\','/');
    }
    PigServer ps = new PigServer(ExecType.LOCAL, new Properties());
    String script = getMkDirCommandForHadoop2_0(toLocalFileName) + "fs -cp " + fromLocalFileName + " " + toLocalFileName;

    new File(toLocalFileName).deleteOnExit();

    GruntParser parser = new GruntParser(new StringReader(script), ps);
    parser.setInteractive(false);
    try {
        parser.parseStopOnError();
    } catch (org.apache.pig.tools.pigscript.parser.ParseException e) {
        throw new IOException(e);
    }

}
 
Example #19
Source Project: yauaa   Author: nielsbasjes   File: TestParseUserAgent.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testParseUserAgentPigUDF_allFields() throws Exception {
    PigServer pigServer = new PigServer(ExecType.LOCAL);
    Storage.Data storageData = resetData(pigServer);

    storageData.set("agents", "agent:chararray", tuple(testUserAgent));

    pigServer.registerQuery("define ParseUserAgent nl.basjes.parse.useragent.pig.ParseUserAgent();");
    pigServer.registerQuery("A = LOAD 'agents' USING mock.Storage();");
    pigServer.registerQuery("B = FOREACH A GENERATE ParseUserAgent(agent);");
    pigServer.registerQuery("STORE B INTO 'parsedAgents' USING mock.Storage();");
    verifyStorageData(storageData);
}
 
Example #20
Source Project: spork   Author: sigmoidanalytics   File: TestGrunt.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testParsingWordWithAsInForeachWithOutBlock() throws Throwable {
    PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties());
    PigContext context = server.getPigContext();

    String strCmd = "a = load 'foo' as (foo, fast); "
            + "b = group a by foo; c = foreach b generate SUM(a.fast);\n";

    ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes());
    InputStreamReader reader = new InputStreamReader(cmd);

    Grunt grunt = new Grunt(new BufferedReader(reader), context);

    grunt.exec();
}
 
Example #21
Source Project: spork   Author: sigmoidanalytics   File: TestStore.java    License: Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
    pig = new PigServer(cluster.getExecType(), cluster.getProperties());
    pc = pig.getPigContext();
    inputFileName = TESTDIR + "/TestStore-" + new Random().nextLong() + ".txt";
    outputFileName = TESTDIR + "/TestStore-output-" + new Random().nextLong() + ".txt";

}
 
Example #22
Source Project: spork   Author: sigmoidanalytics   File: TestExampleGenerator.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGroup() throws Exception {
    PigServer pigServer = new PigServer(pigContext);
    pigServer.registerQuery("A = load " + A.toString() + " as (x, y);");
    pigServer.registerQuery("B = group A by x;");
    Map<Operator, DataBag> derivedData = pigServer.getExamples("B");

    assertNotNull(derivedData);

}
 
Example #23
Source Project: spork   Author: sigmoidanalytics   File: TestPigScriptParser.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDefineUDF() throws Exception {
    PigServer ps = new PigServer(ExecType.LOCAL);
    String inputData[] = {
            "dshfdskfwww.xyz.com/sportsjoadfjdslpdshfdskfwww.xyz.com/sportsjoadfjdsl" ,
            "kas;dka;sd" ,
            "jsjsjwww.xyz.com/sports" ,
            "jsdLSJDcom/sports" ,
            "wwwJxyzMcom/sports"
    };
    File f = Util.createFile(inputData);

    String[] queryLines = new String[] {
            // the reason we have 4 backslashes below is we really want to put two backslashes but
            // since this is to be represented in a Java String, we escape each backslash with one more
            // backslash - hence 4. In a pig script in a file, this would be
            // www\\.xyz\\.com
            "define minelogs org.apache.pig.test.RegexGroupCount('www\\\\.xyz\\\\.com/sports');" ,
            "A = load '" + Util.generateURI(f.getAbsolutePath(), ps.getPigContext()) + "'  using PigStorage() as (source : chararray);" ,
            "B = foreach A generate minelogs(source) as sportslogs;" };
    for (String line : queryLines) {
        ps.registerQuery(line);
    }
    Iterator<Tuple> it = ps.openIterator("B");
    int[] expectedResults = new int[] {2,0,1,0,0};
    int i = 0;
    while (it.hasNext()) {
        Tuple t = it.next();
        assertEquals(expectedResults[i++], t.get(0));
    }
}
 
Example #24
Source Project: spork   Author: sigmoidanalytics   File: TestCompressedFiles.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCompressed1() throws Throwable {
    pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("A = foreach (cogroup (load '"
            + Util.generateURI(gzFile.toString(), pig.getPigContext())
            + "') by $1, (load '"
            + Util.generateURI(datFile.toString(), pig.getPigContext())
            + "') by $1) generate flatten( " + DIFF.class.getName()
            + "($1.$1,$2.$1)) ;");
    Iterator<Tuple> it = pig.openIterator("A");
    assertFalse(it.hasNext());
}
 
Example #25
Source Project: spork   Author: sigmoidanalytics   File: TestPigServer.java    License: Apache License 2.0 5 votes vote down vote up
@Test // PIG-2059
public void test1() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.setValidateEachStatement(true);
    pig.registerQuery("A = load 'x' as (u, v);") ;
    try {
        pig.registerQuery("B = foreach A generate $2;") ;
        fail("Query is supposed to fail.");
    } catch(FrontendException ex) {
        String msg = "Out of bound access. " +
        "Trying to access non-existent column: 2";
        Util.checkMessageInException(ex, msg);
    }
}
 
Example #26
Source Project: spork   Author: sigmoidanalytics   File: TestPigServer.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDescribeForeachFail() throws Throwable {
    PigServer pig = new PigServer(cluster.getExecType(), properties);
    pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ;
    pig.registerQuery("b = foreach a generate field1 + 10;") ;
    try {
        pig.dumpSchema("c") ;
        fail("Error expected");
    } catch (Exception e) {
        assertTrue(e.getMessage().contains("Unable to describe schema for alias c"));
    }
}
 
Example #27
Source Project: spork   Author: sigmoidanalytics   File: TestBZip.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBzipStoreInMultiQuery() throws Exception {
    String[] inputData = new String[] {
            "1\t2\r3\t4"
    };

    String inputFileName = "input.txt";
    Util.createInputFile(cluster, inputFileName, inputData);

    PigServer pig = new PigServer(cluster.getExecType(), properties);

    pig.setBatchOn();
    pig.registerQuery("a = load '" +  inputFileName + "';");
    pig.registerQuery("store a into 'output.bz2';");
    pig.registerQuery("store a into 'output';");
    pig.executeBatch();

    FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration(
            pig.getPigContext().getProperties()));
    FileStatus[] outputFiles = fs.listStatus(new Path("output"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);

    outputFiles = fs.listStatus(new Path("output.bz2"),
            Util.getSuccessMarkerPathFilter());
    assertTrue(outputFiles[0].getLen() > 0);
}
 
Example #28
Source Project: spork   Author: sigmoidanalytics   File: TestMapSideCogroup.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyDeltaFile() throws Exception{

    PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties());
    pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' using "+ DummyCollectableLoader.class.getName() +"() as (c1:chararray,c2:int);");
    pigServer.registerQuery("B = LOAD '" + EMPTY_FILE + "' using "+ DummyIndexableLoader.class.getName()   +"() as (c1:chararray,c2:int);");

    DataBag dbMergeCogrp = BagFactory.getInstance().newDefaultBag();

    pigServer.registerQuery("C = cogroup A by c1, B by c1 using 'merge';");
    Iterator<Tuple> iter = pigServer.openIterator("C");

    while(iter.hasNext()) {
        Tuple t = iter.next();
        dbMergeCogrp.add(t);
    }

    String[] results = new String[]{
            "(1,{(1,1),(1,2),(1,3)},{})",
            "(2,{(2,1),(2,2),(2,3)},{})",
            "(3,{(3,1),(3,2),(3,3)},{})"
    };

    assertEquals(3, dbMergeCogrp.size());
    Iterator<Tuple> itr = dbMergeCogrp.iterator();
    for(int i=0; i<3; i++){
        assertEquals(itr.next().toString(), results[i]);   
    }
    assertFalse(itr.hasNext());
}
 
Example #29
Source Project: spork   Author: sigmoidanalytics   File: TestStreamingUDF.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPythonUDF__withDateTime() throws Exception {
    pigServerLocal = new PigServer(ExecType.LOCAL);

    String[] pythonScript = {
            "from pig_util import outputSchema",
            "@outputSchema(\'d:datetime\')",
            "def py_func(dt):",
            "   return dt"
    };
    Util.createLocalInputFile( "pyfile_dt.py", pythonScript);

    
    Data data = resetData(pigServerLocal);
    Tuple t0 = tf.newTuple(new DateTime());
    Tuple t1 = tf.newTuple(new DateTime());
    data.set("testDateTuples", "d:datetime", t0, t1);

    pigServerLocal.registerQuery("REGISTER 'pyfile_dt.py' USING streaming_python AS pf;");
    pigServerLocal.registerQuery("A = LOAD 'testDateTuples' USING mock.Storage();");
    pigServerLocal.registerQuery("B = FOREACH A generate pf.py_func(d);");
    pigServerLocal.registerQuery("STORE B INTO 'date_out' USING mock.Storage();");
    
    List<Tuple> out = data.get("date_out");
    assertEquals(t0, out.get(0));
    assertEquals(t1, out.get(1));
}
 
Example #30
Source Project: spork   Author: sigmoidanalytics   File: TestDBStorage.java    License: Apache License 2.0 5 votes vote down vote up
public TestDBStorage() throws ExecException, IOException {
    // Initialise Pig server
    cluster = MiniCluster.buildCluster();
    pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
    pigServer.getPigContext().getProperties()
            .setProperty(MRConfiguration.MAP_MAX_ATTEMPTS, "1");
    pigServer.getPigContext().getProperties()
            .setProperty(MRConfiguration.REDUCE_MAX_ATTEMPTS, "1");
    System.out.println("Pig server initialized successfully");
    TMP_DIR = System.getProperty("user.dir") + "/build/test/";
    dblocation = TMP_DIR + "batchtest";
    url = "jdbc:hsqldb:file:" + dblocation
           + ";hsqldb.default_table_type=cached;hsqldb.cache_rows=100";
    // Initialise DBServer
    dbServer = new Server();
    dbServer.setDatabaseName(0, "batchtest");
    // dbServer.setDatabasePath(0, "mem:test;sql.enforce_strict_size=true");
    dbServer.setDatabasePath(0,
                        "file:" + TMP_DIR + "batchtest;sql.enforce_strict_size=true");
    dbServer.setLogWriter(null);
    dbServer.setErrWriter(null);
    dbServer.start();
    System.out.println("Database URL: " + dbUrl);
    try {
        Class.forName(driver);
    } catch (Exception e) {
        e.printStackTrace();
        System.out.println(this + ".setUp() error: " + e.getMessage());
    }
    System.out.println("Database server started on port: " + dbServer.getPort());
}