org.apache.pig.PigServer Java Examples
The following examples show how to use
org.apache.pig.PigServer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestUnionOnSchema.java From spork with Apache License 2.0 | 6 votes |
/** * Test UNION ONSCHEMA with cast from bytearray to another type * @throws IOException * @throws ParserException */ @Test public void testUnionOnSchemaCastOnByteArray() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_2NUMS + "' as (i, j);" + " f1 = foreach l1 generate (int)i, (int)j;" + "u = union onschema f1, l1;" ; Util.registerMultiLineQuery(pig, query); Iterator<Tuple> it = pig.openIterator("u"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(1,2)", "(5,3)", "(1,2)", "(5,3)" }); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example #2
Source File: TestBlackAndWhitelistValidator.java From spork with Apache License 2.0 | 6 votes |
@Test public void testPreprocessorCommand3() throws Exception { try { ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "Define"); PigServer pigServer = new PigServer(ctx); Data data = resetData(pigServer); data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d")); StringBuilder script = new StringBuilder(); script.append("set io.sort.mb 1000;") .append("DEFINE UrlDecode InvokeForString('java.net.URLDecoder.decode', 'String String'); ") .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") .append("B = order A by f1,f2,f3 DESC;") .append("STORE B INTO 'bar' USING mock.Storage();"); pigServer.registerScript(IOUtils.toInputStream(script)); fail(); } catch (Exception e) { Util.assertExceptionAndMessage(FrontendException.class, e, "Error during parsing. DEFINE command is not permitted. "); } }
Example #3
Source File: TestSequenceFileLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public void setUp() throws Exception { pigServer = new PigServer(LOCAL); File tmpFile = File.createTempFile("test", ".txt"); tmpFileName = tmpFile.getAbsolutePath(); System.err.println("fileName: "+tmpFileName); Path path = new Path("file:///"+tmpFileName); JobConf conf = new JobConf(); FileSystem fs = FileSystem.get(path.toUri(), conf); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i=0; i < DATA.length; i++) { key.set(i); value.set(DATA[i]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
Example #4
Source File: TestDefaultDateTimeZone.java From spork with Apache License 2.0 | 6 votes |
@Test public void testDST() throws Exception { String defaultDTZ = "America/New_York"; // a timezone that uses DST Properties config = new Properties(); config.setProperty("pig.datetime.default.tz", defaultDTZ); PigServer pig = new PigServer(Util.getLocalTestMode(), config); pig.registerQuery("a = load '" + Util.encodeEscape(Util.generateURI(tmpFile.toString(), pig.getPigContext())) + "' as (test:datetime);"); pig.registerQuery("b = filter a by test > ToDate('2014-01-01T00:00:00.000');"); pig.registerQuery("c = foreach b generate ToString(test, 'Z') as tz;"); Iterator<Tuple> actualItr = pig.openIterator("c"); Tuple est = actualItr.next(); assertEquals(Util.buildTuple("-0500"), est); Tuple edt = actualItr.next(); assertEquals(Util.buildTuple("-0400"), edt); }
Example #5
Source File: Util.java From spork with Apache License 2.0 | 6 votes |
static public void copyFromLocalToLocal(String fromLocalFileName, String toLocalFileName) throws IOException { if(Util.WINDOWS){ fromLocalFileName = fromLocalFileName.replace('\\','/'); toLocalFileName = toLocalFileName.replace('\\','/'); } PigServer ps = new PigServer(ExecType.LOCAL, new Properties()); String script = getMkDirCommandForHadoop2_0(toLocalFileName) + "fs -cp " + fromLocalFileName + " " + toLocalFileName; new File(toLocalFileName).deleteOnExit(); GruntParser parser = new GruntParser(new StringReader(script), ps); parser.setInteractive(false); try { parser.parseStopOnError(); } catch (org.apache.pig.tools.pigscript.parser.ParseException e) { throw new IOException(e); } }
Example #6
Source File: TestXMLLoader.java From spork with Apache License 2.0 | 6 votes |
public void testXMLLoaderShouldReturnValidXML() throws Exception { String filename = TestHelper.createTempFile(inlineClosedTags, ""); PigServer pig = new PigServer(LOCAL); filename = filename.replace("\\", "\\\\"); String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('event') as (doc:chararray);"; pig.registerQuery(query); Iterator<?> it = pig.openIterator("A"); while (it.hasNext()) { Tuple tuple = (Tuple) it.next(); if (tuple == null) break; else { // Test it returns a valid XML DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); docBuilder.parse(new ByteArrayInputStream(((String)tuple.get(0)).getBytes())); } } }
Example #7
Source File: TestBlackAndWhitelistValidator.java From spork with Apache License 2.0 | 6 votes |
@Test public void testPreprocessorCommands2() throws Exception { try { ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "dEfaUlt"); PigServer pigServer = new PigServer(ctx); Data data = resetData(pigServer); data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d")); StringBuilder script = new StringBuilder(); script.append("set io.sort.mb 1000;") .append("%Default input 'foo';") .append("A = LOAD '$input' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") .append("B = order A by f1,f2,f3 DESC;") .append("STORE B INTO 'bar' USING mock.Storage();"); pigServer.registerScript(IOUtils.toInputStream(script)); fail(); } catch (Exception e) { // We check RuntimeException here and not FrontendException as Pig wraps the error from Preprocessor // within RuntimeException Util.assertExceptionAndMessage(RuntimeException.class, e, "DEFAULT command is not permitted. "); } }
Example #8
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 6 votes |
@Test public void testDatePartitionedFiles() throws IOException { int count = 0; String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'" + ", '" + startingDate + ":" + endingDate + "')"; System.out.println(funcSpecString); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using " + funcSpecString + ";"); Iterator<Tuple> result = server.openIterator("a"); while ((result.next()) != null) { count++; } Assert.assertEquals(datePartitionedRowCount, count); }
Example #9
Source File: TestCombinedLogLoader.java From spork with Apache License 2.0 | 6 votes |
@Test public void testLoadFromPigServer() throws Exception { String filename = TestHelper.createTempFile(data, " "); PigServer pig = new PigServer(ExecType.LOCAL); filename = filename.replace("\\", "\\\\"); pig.registerQuery("A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader();"); Iterator<?> it = pig.openIterator("A"); int tupleCount = 0; while (it.hasNext()) { Tuple tuple = (Tuple) it.next(); if (tuple == null) break; else { TestHelper.examineTuple(EXPECTED, tuple, tupleCount); tupleCount++; } } assertEquals(data.size(), tupleCount); }
Example #10
Source File: TestProjectStarExpander.java From spork with Apache License 2.0 | 6 votes |
/** * Test projecting multiple * * @throws IOException * @throws ParseException */ @Test public void testProjectStarMulti() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);" + "f = foreach l1 generate * as (aa, bb, cc), *;" ; Util.registerMultiLineQuery(pig, query); Schema expectedSch = Utils.getSchemaFromString( "aa : int, bb : int, cc : int, a : int, b : int, c : int"); Schema sch = pig.dumpSchema("f"); assertEquals("Checking expected schema", expectedSch, sch); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(10,20,30,10,20,30)", "(11,21,31,11,21,31)", }); Iterator<Tuple> it = pig.openIterator("f"); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example #11
Source File: TestBuiltInBagToTupleOrString.java From spork with Apache License 2.0 | 6 votes |
@Test public void testPigScriptEmptyBagForBagToStringUDF() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = resetData(pigServer); data.set("foo", "myBag:bag{t:(l:chararray)}", tuple(bag())); pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH A GENERATE BagToString(myBag) as myBag;"); pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();"); List<Tuple> out = data.get("bar"); // empty bag will generate empty string assertEquals(tuple(""), out.get(0)); }
Example #12
Source File: TestGrunt.java From spork with Apache License 2.0 | 6 votes |
@Test public void testRegisterScripts() throws Throwable { String[] script = { "#!/usr/bin/python", "@outputSchema(\"x:{t:(num:long)}\")", "def square(number):" , "\treturn (number * number)" }; Util.createLocalInputFile( "testRegisterScripts.py", script); PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties()); PigContext context = server.getPigContext(); String strCmd = "register testRegisterScripts.py using jython as pig\n"; ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes()); InputStreamReader reader = new InputStreamReader(cmd); Grunt grunt = new Grunt(new BufferedReader(reader), context); grunt.exec(); assertTrue(context.getFuncSpecFromAlias("pig.square") != null); }
Example #13
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 6 votes |
@Test public void testReadingSingleFileNoProjections() throws IOException { String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String singlePartitionedFile = simpleDataFile.getAbsolutePath(); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString + ";"); Iterator<Tuple> result = server.openIterator("a"); int count = 0; Tuple t = null; while ((t = result.next()) != null) { assertEquals(3, t.size()); assertEquals(DataType.CHARARRAY, t.getType(0)); count++; } Assert.assertEquals(simpleRowCount, count); }
Example #14
Source File: TestBlackAndWhitelistValidator.java From spork with Apache License 2.0 | 6 votes |
/** * Tests the blacklist filter. We blacklist "set" and make sure this test * throws a {@link FrontendException} * * @throws Exception */ @Test public void testBlacklist() throws Exception { try { ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "set"); PigServer pigServer = new PigServer(ctx); Data data = resetData(pigServer); data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d")); StringBuilder script = new StringBuilder(); script.append("set io.sort.mb 1000;") .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") .append("B = order A by f1,f2,f3 DESC;") .append("STORE B INTO 'bar' USING mock.Storage();"); pigServer.registerScript(IOUtils.toInputStream(script)); fail(); } catch (Exception e) { Util.assertExceptionAndMessage(FrontendException.class, e, "SET command is not permitted. "); } }
Example #15
Source File: TestXMLLoader.java From spork with Apache License 2.0 | 6 votes |
public void testShouldReturn0TupleCountIfEmptyFileIsPassed() throws Exception { // modify the data content to avoid end tag for </ignoreProperty> ArrayList<String[]> testData = new ArrayList<String[]>(); String filename = TestHelper.createTempFile(testData, ""); PigServer pig = new PigServer(LOCAL); filename = filename.replace("\\", "\\\\"); String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('ignoreProperty') as (doc:chararray);"; pig.registerQuery(query); Iterator<?> it = pig.openIterator("A"); int tupleCount = 0; while (it.hasNext()) { Tuple tuple = (Tuple) it.next(); if (tuple == null) break; else { if (tuple.size() > 0) { tupleCount++; } } } assertEquals(0, tupleCount); }
Example #16
Source File: TestBuiltInBagToTupleOrString.java From spork with Apache License 2.0 | 6 votes |
@Test public void testPigScriptNestedTupleForBagToStringUDF() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = resetData(pigServer); Tuple nestedTuple = tuple(bag(tuple("c"), tuple("d"))); data.set("foo", "myBag:bag{t:(l:chararray)}", tuple(bag(tuple("a"), tuple("b"), nestedTuple, tuple("e")))); pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH A GENERATE BagToString(myBag) as myBag;"); pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();"); List<Tuple> out = data.get("bar"); assertEquals(tuple("a_b_{(c),(d)}_e"), out.get(0)); }
Example #17
Source File: TestSummary.java From parquet-mr with Apache License 2.0 | 6 votes |
@Test public void testPigScript() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < 1002; i++) { list.add(t("a", "b" + i, 1l, b(t("a", m("foo", "bar"))))); } data.set("in", "a:chararray, a1:chararray, b:int, c:{t:(a2:chararray, b2:[])}", list); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE "+Summary.class.getName()+"(A);"); pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();"); System.out.println(data.get("out").get(0).get(0)); TupleSummaryData s = SummaryData.fromJSON((String)data.get("out").get(0).get(0), TupleSummaryData.class); System.out.println(s); }
Example #18
Source File: TestHadoopJobHistoryLoader.java From spork with Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") @Test public void testHadoopJHLoader() throws Exception { PigServer pig = new PigServer(ExecType.LOCAL); pig.registerQuery("a = load '" + INPUT_DIR + "' using org.apache.pig.piggybank.storage.HadoopJobHistoryLoader() " + "as (j:map[], m:map[], r:map[]);"); Iterator<Tuple> iter = pig.openIterator("a"); assertTrue(iter.hasNext()); Tuple t = iter.next(); Map<String, Object> job = (Map<String, Object>)t.get(0); assertEquals("3eb62180-5473-4301-aa22-467bd685d466", (String)job.get("PIG_SCRIPT_ID")); assertEquals("job_201004271216_9998", (String)job.get("JOBID")); assertEquals("job_201004271216_9995", (String)job.get("PIG_JOB_PARENTS")); assertEquals("0.8.0-dev", (String)job.get("PIG_VERSION")); assertEquals("0.20.2", (String)job.get("HADOOP_VERSION")); assertEquals("d", (String)job.get("PIG_JOB_ALIAS")); assertEquals("PigLatin:Test.pig", job.get("JOBNAME")); assertEquals("ORDER_BY", (String)job.get("PIG_JOB_FEATURE")); assertEquals("1", (String)job.get("TOTAL_MAPS")); assertEquals("1", (String)job.get("TOTAL_REDUCES")); }
Example #19
Source File: TestBuiltin.java From spork with Apache License 2.0 | 5 votes |
/** * End-to-end testing of the CONCAT() builtin function for vararg parameters * @throws Exception */ @Test public void testComplexMultiCONCAT() throws Exception { String input = "vararg_concat_test_jira_3444.txt"; Util.createLocalInputFile(input, new String[]{"dummy"}); PigServer pigServer = new PigServer(ExecType.LOCAL); pigServer.registerQuery("A = LOAD '"+input+"' as (x:chararray);"); pigServer.registerQuery("B = foreach A generate CONCAT('a', CONCAT('b',CONCAT('c','d')));"); Iterator<Tuple> its = pigServer.openIterator("B"); Tuple t = its.next(); assertEquals("abcd",t.get(0)); pigServer.registerQuery("B = foreach A generate CONCAT('a', 'b', 'c', 'd');"); its = pigServer.openIterator("B"); t = its.next(); assertEquals("abcd",t.get(0)); pigServer.registerQuery("B = foreach A generate CONCAT('a', CONCAT('b','c'), 'd');"); its = pigServer.openIterator("B"); t = its.next(); assertEquals("abcd",t.get(0)); // Concat on a null value returns null pigServer.registerQuery("B = foreach A generate CONCAT('a', CONCAT('b',Null), 'd');"); its = pigServer.openIterator("B"); t = its.next(); assertNull(t.get(0)); }
Example #20
Source File: TestExampleGenerator.java From spork with Apache License 2.0 | 5 votes |
@Test public void testForeach() throws ExecException, IOException { PigServer pigServer = new PigServer(pigContext); pigServer.registerQuery("A = load " + A + " using PigStorage() as (x : int, y : int);"); pigServer.registerQuery("B = foreach A generate x + y as sum;"); Map<Operator, DataBag> derivedData = pigServer.getExamples("B"); assertNotNull(derivedData); }
Example #21
Source File: TestFRJoin2.java From spork with Apache License 2.0 | 5 votes |
@Test public void testSoftLinkDoesNotCreateUnnecessaryConcatJob() throws Exception { PigServer pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.setBatchOn(); pigServer.getPigContext().getProperties().setProperty( MRCompiler.FILE_CONCATENATION_THRESHOLD, String.valueOf(FILE_MERGE_THRESHOLD)); pigServer.getPigContext().getProperties().setProperty("pig.noSplitCombination", "false"); String query = "A = LOAD '" + INPUT_FILE + "' as (x:int,y:int);" + "B = group A all;" + "C = LOAD '" + INPUT_FILE + "' as (x:int,y:int);" + "D = group C by x;" + "E = group D all;" + "F = FOREACH E generate B.$0;" + "Z = LOAD '" + INPUT_FILE + "' as (x:int,y:int);" + "Y = FOREACH E generate F.$0;" + "STORE Y into '/tmp/output2';"; MROperPlan mrplan = Util.buildMRPlanWithOptimizer(Util.buildPp(pigServer, query),pigServer.getPigContext()); // look for concat job for(MapReduceOper mrOp: mrplan) { //concatjob == map-plan load-store && reudce-plan empty if( mrOp.mapPlan.size() == 2 && mrOp.reducePlan.isEmpty() ) { fail("Somehow concatjob was created even though there is no large or multiple inputs."); } } }
Example #22
Source File: TestBlackAndWhitelistValidator.java From spork with Apache License 2.0 | 5 votes |
/** * Test deleteFile fails if its disallowed via the blacklist */ @Test(expected = FrontendException.class) public void testBlacklistRemoveWithPigServer() throws Exception { ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "rm"); PigServer pigServer = new PigServer(ctx); pigServer.deleteFile("foo"); }
Example #23
Source File: TestExampleGenerator.java From spork with Apache License 2.0 | 5 votes |
@Test public void testFilter2() throws Exception { PigServer pigserver = new PigServer(pigContext); String query = "A = load " + A + " using PigStorage() as (x : int, y : int);\n"; pigserver.registerQuery(query); query = "B = filter A by x > 5 AND y < 6;"; pigserver.registerQuery(query); Map<Operator, DataBag> derivedData = pigserver.getExamples("B"); assertNotNull(derivedData); }
Example #24
Source File: TestTypedMap.java From spork with Apache License 2.0 | 5 votes |
@Test public void testSimpleMapKeyLookup() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL, new Properties()); String[] input = { "[key#1,key2#2]", "[key#2]", }; Util.createInputFile(FileSystem.getLocal(new Configuration()), tmpDirName + "/testSimpleMapKeyLookup", input); String query = "a = load '" + tmpDirName + "/testSimpleMapKeyLookup' as (m:map[int]);" + "b = foreach a generate m#'key';"; Util.registerMultiLineQuery(pig, query); Schema sch = pig.dumpSchema("b"); assertEquals("Checking expected schema",sch.toString(), "{int}"); Iterator<Tuple> it = pig.openIterator("b"); Assert.assertTrue(it.hasNext()); Tuple t = it.next(); Assert.assertTrue(t.size()==1); Assert.assertTrue((Integer)t.get(0)==1); Assert.assertTrue(it.hasNext()); t = it.next(); Assert.assertTrue(t.size()==1); Assert.assertTrue((Integer)t.get(0)==2); Assert.assertFalse(it.hasNext()); }
Example #25
Source File: TestPigServer.java From spork with Apache License 2.0 | 5 votes |
@Test public void testDescribeFilter() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ; pig.registerQuery("b = filter a by field1 > 10;") ; Schema dumpedSchema = pig.dumpSchema("b") ; Schema expectedSchema = Utils.getSchemaFromString("field1: int,field2: float,field3: chararray"); assertEquals(expectedSchema, dumpedSchema); }
Example #26
Source File: TestUnionOnSchema.java From spork with Apache License 2.0 | 5 votes |
/** * Test UNION ONSCHEMA with input relation having udfs * @throws IOException * @throws ParserException */ @Test public void testUnionOnSchemaInputUdfs() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);" + "l2 = load '" + INP_FILE_2NUMS + "' as (i : int, j : chararray);" + "f1 = foreach l1 generate i, CONCAT(j,j) as cj, " + "org.apache.pig.test.TestUnionOnSchema\\$UDFTupleNullSchema(i,j) as uo;" + "u = union onschema f1, l2;" ; Util.registerMultiLineQuery(pig, query); Schema sch = pig.dumpSchema("u"); String expectedSch = "{i: int,cj: chararray,uo: (),j: chararray}"; Assert.assertTrue( expectedSch.equals( sch.toString() ) ); Iterator<Tuple> it = pig.openIterator("u"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(1,null,null,'2')", "(5,null,null,'3')", "(1,'22',(1,'2'),null)", "(5,'33',(5,'3'),null)" }); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example #27
Source File: TestExampleGenerator.java From spork with Apache License 2.0 | 5 votes |
@Test public void testForEachNestedBlock2() throws Exception { PigServer pigServer = new PigServer(pigContext); pigServer.registerQuery("A = load " + A.toString() + " as (x:int, y:int);"); pigServer.registerQuery("B = group A by x;"); pigServer.registerQuery("C = foreach B { FA = filter A by y == 6; DA = DISTINCT FA; generate group, COUNT(DA);};"); Map<Operator, DataBag> derivedData = pigServer.getExamples("C"); assertNotNull(derivedData); }
Example #28
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 5 votes |
@Test public void testShouldStoreBagAsHiveArray() throws IOException, InterruptedException, SerDeException { String loadString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String storeString = "org.apache.pig.piggybank.storage.HiveColumnarStorage()"; String singlePartitionedFile = simpleDataFile.getAbsolutePath(); File outputFile = new File("testhiveColumnarStore"); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + loadString + ";"); server.registerQuery("b = FOREACH a GENERATE f1, TOBAG(f2,f3);"); //when server.store("b", outputFile.getAbsolutePath(), storeString); //then Path outputPath = new Path(outputFile.getAbsolutePath()+"/part-m-00000.rc"); ColumnarStruct struct = readRow(outputFile, outputPath, "f1 string,f2 array<string>"); assertEquals(2, struct.getFieldsAsList().size()); Object o = struct.getField(0); assertEquals(LazyString.class, o.getClass()); o = struct.getField(1); assertEquals(LazyArray.class, o.getClass()); LazyArray arr = (LazyArray)o; List<Object> values = arr.getList(); for(Object value : values) { assertEquals(LazyString.class, value.getClass()); String valueStr =((LazyString) value).getWritableObject().toString(); assertEquals("Sample value", valueStr); } }
Example #29
Source File: TestRank1.java From spork with Apache License 2.0 | 5 votes |
@Before public void setUp() throws Exception { pigServer = new PigServer(ExecType.LOCAL); data = resetData(pigServer); data.set("test01", tuple("A", 1, "N"), tuple("B", 2, "N"), tuple("C", 3, "M"), tuple("D", 4, "P"), tuple("E", 4, "Q"), tuple("E", 4, "Q"), tuple("F", 8, "Q"), tuple("F", 7, "Q"), tuple("F", 8, "T"), tuple("F", 8, "Q"), tuple("G", 10, "V")); data.set( "test02", tuple("Michael", "Blythe", 1, 1, 1, 1, 4557045.046, 98027), tuple("Linda", "Mitchell", 2, 1, 1, 1, 5200475.231, 98027), tuple("Jillian", "Carson", 3, 1, 1, 1, 3857163.633, 98027), tuple("Garrett", "Vargas", 4, 1, 1, 1, 1764938.986, 98027), tuple("Tsvi", "Reiter", 5, 1, 1, 2, 2811012.715, 98027), tuple("Shu", "Ito", 6, 6, 2, 2, 3018725.486, 98055), tuple("Jose", "Saraiva", 7, 6, 2, 2, 3189356.247, 98055), tuple("David", "Campbell", 8, 6, 2, 3, 3587378.426, 98055), tuple("Tete", "Mensa-Annan", 9, 6, 2, 3, 1931620.184, 98055), tuple("Lynn", "Tsoflias", 10, 6, 2, 3, 1758385.926, 98055), tuple("Rachel", "Valdez", 11, 6, 2, 4, 2241204.042, 98055), tuple("Jae", "Pak", 12, 6, 2, 4, 5015682.375, 98055), tuple("Ranjit", "Varkey Chudukatil", 13, 6, 2, 4, 3827950.238, 98055)); }
Example #30
Source File: BoundScript.java From spork with Apache License 2.0 | 5 votes |
/** * Explain this pipeline. Results will be printed to stdout. * @throws IOException if explain fails. */ public void explain() throws IOException { if (queries.isEmpty()) { LOG.info("No bound query to explain"); return; } PigServer pigServer = new PigServer(scriptContext.getPigContext(), false); registerQuery(pigServer, queries.get(0)); pigServer.explain(null, System.out); }