Java Code Examples for org.apache.pig.PigServer
The following examples show how to use
org.apache.pig.PigServer.
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: spork Author: sigmoidanalytics File: TestSequenceFileLoader.java License: Apache License 2.0 | 6 votes |
@Override public void setUp() throws Exception { pigServer = new PigServer(LOCAL); File tmpFile = File.createTempFile("test", ".txt"); tmpFileName = tmpFile.getAbsolutePath(); System.err.println("fileName: "+tmpFileName); Path path = new Path("file:///"+tmpFileName); JobConf conf = new JobConf(); FileSystem fs = FileSystem.get(path.toUri(), conf); IntWritable key = new IntWritable(); Text value = new Text(); SequenceFile.Writer writer = null; try { writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass()); for (int i=0; i < DATA.length; i++) { key.set(i); value.set(DATA[i]); writer.append(key, value); } } finally { IOUtils.closeStream(writer); } }
Example #2
Source Project: spork Author: sigmoidanalytics File: TestDefaultDateTimeZone.java License: Apache License 2.0 | 6 votes |
@Test public void testDST() throws Exception { String defaultDTZ = "America/New_York"; // a timezone that uses DST Properties config = new Properties(); config.setProperty("pig.datetime.default.tz", defaultDTZ); PigServer pig = new PigServer(Util.getLocalTestMode(), config); pig.registerQuery("a = load '" + Util.encodeEscape(Util.generateURI(tmpFile.toString(), pig.getPigContext())) + "' as (test:datetime);"); pig.registerQuery("b = filter a by test > ToDate('2014-01-01T00:00:00.000');"); pig.registerQuery("c = foreach b generate ToString(test, 'Z') as tz;"); Iterator<Tuple> actualItr = pig.openIterator("c"); Tuple est = actualItr.next(); assertEquals(Util.buildTuple("-0500"), est); Tuple edt = actualItr.next(); assertEquals(Util.buildTuple("-0400"), edt); }
Example #3
Source Project: spork Author: sigmoidanalytics File: TestBlackAndWhitelistValidator.java License: Apache License 2.0 | 6 votes |
@Test public void testPreprocessorCommands2() throws Exception { try { ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "dEfaUlt"); PigServer pigServer = new PigServer(ctx); Data data = resetData(pigServer); data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d")); StringBuilder script = new StringBuilder(); script.append("set io.sort.mb 1000;") .append("%Default input 'foo';") .append("A = LOAD '$input' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") .append("B = order A by f1,f2,f3 DESC;") .append("STORE B INTO 'bar' USING mock.Storage();"); pigServer.registerScript(IOUtils.toInputStream(script)); fail(); } catch (Exception e) { // We check RuntimeException here and not FrontendException as Pig wraps the error from Preprocessor // within RuntimeException Util.assertExceptionAndMessage(RuntimeException.class, e, "DEFAULT command is not permitted. "); } }
Example #4
Source Project: spork Author: sigmoidanalytics File: TestGrunt.java License: Apache License 2.0 | 6 votes |
@Test public void testRegisterScripts() throws Throwable { String[] script = { "#!/usr/bin/python", "@outputSchema(\"x:{t:(num:long)}\")", "def square(number):" , "\treturn (number * number)" }; Util.createLocalInputFile( "testRegisterScripts.py", script); PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties()); PigContext context = server.getPigContext(); String strCmd = "register testRegisterScripts.py using jython as pig\n"; ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes()); InputStreamReader reader = new InputStreamReader(cmd); Grunt grunt = new Grunt(new BufferedReader(reader), context); grunt.exec(); assertTrue(context.getFuncSpecFromAlias("pig.square") != null); }
Example #5
Source Project: spork Author: sigmoidanalytics File: TestBlackAndWhitelistValidator.java License: Apache License 2.0 | 6 votes |
/** * Tests the blacklist filter. We blacklist "set" and make sure this test * throws a {@link FrontendException} * * @throws Exception */ @Test public void testBlacklist() throws Exception { try { ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "set"); PigServer pigServer = new PigServer(ctx); Data data = resetData(pigServer); data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d")); StringBuilder script = new StringBuilder(); script.append("set io.sort.mb 1000;") .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") .append("B = order A by f1,f2,f3 DESC;") .append("STORE B INTO 'bar' USING mock.Storage();"); pigServer.registerScript(IOUtils.toInputStream(script)); fail(); } catch (Exception e) { Util.assertExceptionAndMessage(FrontendException.class, e, "SET command is not permitted. "); } }
Example #6
Source Project: spork Author: sigmoidanalytics File: TestXMLLoader.java License: Apache License 2.0 | 6 votes |
public void testShouldReturn0TupleCountIfEmptyFileIsPassed() throws Exception { // modify the data content to avoid end tag for </ignoreProperty> ArrayList<String[]> testData = new ArrayList<String[]>(); String filename = TestHelper.createTempFile(testData, ""); PigServer pig = new PigServer(LOCAL); filename = filename.replace("\\", "\\\\"); String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('ignoreProperty') as (doc:chararray);"; pig.registerQuery(query); Iterator<?> it = pig.openIterator("A"); int tupleCount = 0; while (it.hasNext()) { Tuple tuple = (Tuple) it.next(); if (tuple == null) break; else { if (tuple.size() > 0) { tupleCount++; } } } assertEquals(0, tupleCount); }
Example #7
Source Project: parquet-mr Author: apache File: TestSummary.java License: Apache License 2.0 | 6 votes |
@Test public void testPigScript() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = Storage.resetData(pigServer); List<Tuple> list = new ArrayList<Tuple>(); for (int i = 0; i < 1002; i++) { list.add(t("a", "b" + i, 1l, b(t("a", m("foo", "bar"))))); } data.set("in", "a:chararray, a1:chararray, b:int, c:{t:(a2:chararray, b2:[])}", list); pigServer.registerQuery("A = LOAD 'in' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH (GROUP A ALL) GENERATE "+Summary.class.getName()+"(A);"); pigServer.registerQuery("STORE B INTO 'out' USING mock.Storage();"); System.out.println(data.get("out").get(0).get(0)); TupleSummaryData s = SummaryData.fromJSON((String)data.get("out").get(0).get(0), TupleSummaryData.class); System.out.println(s); }
Example #8
Source Project: spork Author: sigmoidanalytics File: TestHadoopJobHistoryLoader.java License: Apache License 2.0 | 6 votes |
@SuppressWarnings("unchecked") @Test public void testHadoopJHLoader() throws Exception { PigServer pig = new PigServer(ExecType.LOCAL); pig.registerQuery("a = load '" + INPUT_DIR + "' using org.apache.pig.piggybank.storage.HadoopJobHistoryLoader() " + "as (j:map[], m:map[], r:map[]);"); Iterator<Tuple> iter = pig.openIterator("a"); assertTrue(iter.hasNext()); Tuple t = iter.next(); Map<String, Object> job = (Map<String, Object>)t.get(0); assertEquals("3eb62180-5473-4301-aa22-467bd685d466", (String)job.get("PIG_SCRIPT_ID")); assertEquals("job_201004271216_9998", (String)job.get("JOBID")); assertEquals("job_201004271216_9995", (String)job.get("PIG_JOB_PARENTS")); assertEquals("0.8.0-dev", (String)job.get("PIG_VERSION")); assertEquals("0.20.2", (String)job.get("HADOOP_VERSION")); assertEquals("d", (String)job.get("PIG_JOB_ALIAS")); assertEquals("PigLatin:Test.pig", job.get("JOBNAME")); assertEquals("ORDER_BY", (String)job.get("PIG_JOB_FEATURE")); assertEquals("1", (String)job.get("TOTAL_MAPS")); assertEquals("1", (String)job.get("TOTAL_REDUCES")); }
Example #9
Source Project: spork Author: sigmoidanalytics File: TestHiveColumnarLoader.java License: Apache License 2.0 | 6 votes |
@Test public void testReadingSingleFileNoProjections() throws IOException { String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string')"; String singlePartitionedFile = simpleDataFile.getAbsolutePath(); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(singlePartitionedFile) + "' using " + funcSpecString + ";"); Iterator<Tuple> result = server.openIterator("a"); int count = 0; Tuple t = null; while ((t = result.next()) != null) { assertEquals(3, t.size()); assertEquals(DataType.CHARARRAY, t.getType(0)); count++; } Assert.assertEquals(simpleRowCount, count); }
Example #10
Source Project: spork Author: sigmoidanalytics File: TestBuiltInBagToTupleOrString.java License: Apache License 2.0 | 6 votes |
@Test public void testPigScriptEmptyBagForBagToStringUDF() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = resetData(pigServer); data.set("foo", "myBag:bag{t:(l:chararray)}", tuple(bag())); pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH A GENERATE BagToString(myBag) as myBag;"); pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();"); List<Tuple> out = data.get("bar"); // empty bag will generate empty string assertEquals(tuple(""), out.get(0)); }
Example #11
Source Project: spork Author: sigmoidanalytics File: TestProjectStarExpander.java License: Apache License 2.0 | 6 votes |
/** * Test projecting multiple * * @throws IOException * @throws ParseException */ @Test public void testProjectStarMulti() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_5FIELDS + "' as (a : int, b : int, c : int);" + "f = foreach l1 generate * as (aa, bb, cc), *;" ; Util.registerMultiLineQuery(pig, query); Schema expectedSch = Utils.getSchemaFromString( "aa : int, bb : int, cc : int, a : int, b : int, c : int"); Schema sch = pig.dumpSchema("f"); assertEquals("Checking expected schema", expectedSch, sch); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(10,20,30,10,20,30)", "(11,21,31,11,21,31)", }); Iterator<Tuple> it = pig.openIterator("f"); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example #12
Source Project: spork Author: sigmoidanalytics File: TestBlackAndWhitelistValidator.java License: Apache License 2.0 | 6 votes |
@Test public void testPreprocessorCommand3() throws Exception { try { ctx.getProperties().setProperty(PigConfiguration.PIG_BLACKLIST, "Define"); PigServer pigServer = new PigServer(ctx); Data data = resetData(pigServer); data.set("foo", tuple("a", 1, "b"), tuple("b", 2, "c"), tuple("c", 3, "d")); StringBuilder script = new StringBuilder(); script.append("set io.sort.mb 1000;") .append("DEFINE UrlDecode InvokeForString('java.net.URLDecoder.decode', 'String String'); ") .append("A = LOAD 'foo' USING mock.Storage() AS (f1:chararray,f2:int,f3:chararray);") .append("B = order A by f1,f2,f3 DESC;") .append("STORE B INTO 'bar' USING mock.Storage();"); pigServer.registerScript(IOUtils.toInputStream(script)); fail(); } catch (Exception e) { Util.assertExceptionAndMessage(FrontendException.class, e, "Error during parsing. DEFINE command is not permitted. "); } }
Example #13
Source Project: spork Author: sigmoidanalytics File: TestXMLLoader.java License: Apache License 2.0 | 6 votes |
public void testXMLLoaderShouldReturnValidXML() throws Exception { String filename = TestHelper.createTempFile(inlineClosedTags, ""); PigServer pig = new PigServer(LOCAL); filename = filename.replace("\\", "\\\\"); String query = "A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.XMLLoader('event') as (doc:chararray);"; pig.registerQuery(query); Iterator<?> it = pig.openIterator("A"); while (it.hasNext()) { Tuple tuple = (Tuple) it.next(); if (tuple == null) break; else { // Test it returns a valid XML DocumentBuilder docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder(); docBuilder.parse(new ByteArrayInputStream(((String)tuple.get(0)).getBytes())); } } }
Example #14
Source Project: spork Author: sigmoidanalytics File: TestCombinedLogLoader.java License: Apache License 2.0 | 6 votes |
@Test public void testLoadFromPigServer() throws Exception { String filename = TestHelper.createTempFile(data, " "); PigServer pig = new PigServer(ExecType.LOCAL); filename = filename.replace("\\", "\\\\"); pig.registerQuery("A = LOAD '" + filename + "' USING org.apache.pig.piggybank.storage.apachelog.CombinedLogLoader();"); Iterator<?> it = pig.openIterator("A"); int tupleCount = 0; while (it.hasNext()) { Tuple tuple = (Tuple) it.next(); if (tuple == null) break; else { TestHelper.examineTuple(EXPECTED, tuple, tupleCount); tupleCount++; } } assertEquals(data.size(), tupleCount); }
Example #15
Source Project: spork Author: sigmoidanalytics File: TestBuiltInBagToTupleOrString.java License: Apache License 2.0 | 6 votes |
@Test public void testPigScriptNestedTupleForBagToStringUDF() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Data data = resetData(pigServer); Tuple nestedTuple = tuple(bag(tuple("c"), tuple("d"))); data.set("foo", "myBag:bag{t:(l:chararray)}", tuple(bag(tuple("a"), tuple("b"), nestedTuple, tuple("e")))); pigServer.registerQuery("A = LOAD 'foo' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH A GENERATE BagToString(myBag) as myBag;"); pigServer.registerQuery("STORE B INTO 'bar' USING mock.Storage();"); List<Tuple> out = data.get("bar"); assertEquals(tuple("a_b_{(c),(d)}_e"), out.get(0)); }
Example #16
Source Project: spork Author: sigmoidanalytics File: TestHiveColumnarLoader.java License: Apache License 2.0 | 6 votes |
@Test public void testDatePartitionedFiles() throws IOException { int count = 0; String funcSpecString = "org.apache.pig.piggybank.storage.HiveColumnarLoader('f1 string,f2 string,f3 string'" + ", '" + startingDate + ":" + endingDate + "')"; System.out.println(funcSpecString); PigServer server = new PigServer(ExecType.LOCAL); server.setBatchOn(); server.registerFunction("org.apache.pig.piggybank.storage.HiveColumnarLoader", new FuncSpec(funcSpecString)); server.registerQuery("a = LOAD '" + Util.encodeEscape(datePartitionedDir.getAbsolutePath()) + "' using " + funcSpecString + ";"); Iterator<Tuple> result = server.openIterator("a"); while ((result.next()) != null) { count++; } Assert.assertEquals(datePartitionedRowCount, count); }
Example #17
Source Project: spork Author: sigmoidanalytics File: TestUnionOnSchema.java License: Apache License 2.0 | 6 votes |
/** * Test UNION ONSCHEMA with cast from bytearray to another type * @throws IOException * @throws ParserException */ @Test public void testUnionOnSchemaCastOnByteArray() throws IOException, ParserException { PigServer pig = new PigServer(ExecType.LOCAL); String query = " l1 = load '" + INP_FILE_2NUMS + "' as (i, j);" + " f1 = foreach l1 generate (int)i, (int)j;" + "u = union onschema f1, l1;" ; Util.registerMultiLineQuery(pig, query); Iterator<Tuple> it = pig.openIterator("u"); List<Tuple> expectedRes = Util.getTuplesFromConstantTupleStrings( new String[] { "(1,2)", "(5,3)", "(1,2)", "(5,3)" }); Util.checkQueryOutputsAfterSort(it, expectedRes); }
Example #18
Source Project: spork Author: sigmoidanalytics File: Util.java License: Apache License 2.0 | 6 votes |
static public void copyFromLocalToLocal(String fromLocalFileName, String toLocalFileName) throws IOException { if(Util.WINDOWS){ fromLocalFileName = fromLocalFileName.replace('\\','/'); toLocalFileName = toLocalFileName.replace('\\','/'); } PigServer ps = new PigServer(ExecType.LOCAL, new Properties()); String script = getMkDirCommandForHadoop2_0(toLocalFileName) + "fs -cp " + fromLocalFileName + " " + toLocalFileName; new File(toLocalFileName).deleteOnExit(); GruntParser parser = new GruntParser(new StringReader(script), ps); parser.setInteractive(false); try { parser.parseStopOnError(); } catch (org.apache.pig.tools.pigscript.parser.ParseException e) { throw new IOException(e); } }
Example #19
Source Project: yauaa Author: nielsbasjes File: TestParseUserAgent.java License: Apache License 2.0 | 5 votes |
@Test public void testParseUserAgentPigUDF_allFields() throws Exception { PigServer pigServer = new PigServer(ExecType.LOCAL); Storage.Data storageData = resetData(pigServer); storageData.set("agents", "agent:chararray", tuple(testUserAgent)); pigServer.registerQuery("define ParseUserAgent nl.basjes.parse.useragent.pig.ParseUserAgent();"); pigServer.registerQuery("A = LOAD 'agents' USING mock.Storage();"); pigServer.registerQuery("B = FOREACH A GENERATE ParseUserAgent(agent);"); pigServer.registerQuery("STORE B INTO 'parsedAgents' USING mock.Storage();"); verifyStorageData(storageData); }
Example #20
Source Project: spork Author: sigmoidanalytics File: TestGrunt.java License: Apache License 2.0 | 5 votes |
@Test public void testParsingWordWithAsInForeachWithOutBlock() throws Throwable { PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties()); PigContext context = server.getPigContext(); String strCmd = "a = load 'foo' as (foo, fast); " + "b = group a by foo; c = foreach b generate SUM(a.fast);\n"; ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes()); InputStreamReader reader = new InputStreamReader(cmd); Grunt grunt = new Grunt(new BufferedReader(reader), context); grunt.exec(); }
Example #21
Source Project: spork Author: sigmoidanalytics File: TestStore.java License: Apache License 2.0 | 5 votes |
@Before public void setUp() throws Exception { pig = new PigServer(cluster.getExecType(), cluster.getProperties()); pc = pig.getPigContext(); inputFileName = TESTDIR + "/TestStore-" + new Random().nextLong() + ".txt"; outputFileName = TESTDIR + "/TestStore-output-" + new Random().nextLong() + ".txt"; }
Example #22
Source Project: spork Author: sigmoidanalytics File: TestExampleGenerator.java License: Apache License 2.0 | 5 votes |
@Test public void testGroup() throws Exception { PigServer pigServer = new PigServer(pigContext); pigServer.registerQuery("A = load " + A.toString() + " as (x, y);"); pigServer.registerQuery("B = group A by x;"); Map<Operator, DataBag> derivedData = pigServer.getExamples("B"); assertNotNull(derivedData); }
Example #23
Source Project: spork Author: sigmoidanalytics File: TestPigScriptParser.java License: Apache License 2.0 | 5 votes |
@Test public void testDefineUDF() throws Exception { PigServer ps = new PigServer(ExecType.LOCAL); String inputData[] = { "dshfdskfwww.xyz.com/sportsjoadfjdslpdshfdskfwww.xyz.com/sportsjoadfjdsl" , "kas;dka;sd" , "jsjsjwww.xyz.com/sports" , "jsdLSJDcom/sports" , "wwwJxyzMcom/sports" }; File f = Util.createFile(inputData); String[] queryLines = new String[] { // the reason we have 4 backslashes below is we really want to put two backslashes but // since this is to be represented in a Java String, we escape each backslash with one more // backslash - hence 4. In a pig script in a file, this would be // www\\.xyz\\.com "define minelogs org.apache.pig.test.RegexGroupCount('www\\\\.xyz\\\\.com/sports');" , "A = load '" + Util.generateURI(f.getAbsolutePath(), ps.getPigContext()) + "' using PigStorage() as (source : chararray);" , "B = foreach A generate minelogs(source) as sportslogs;" }; for (String line : queryLines) { ps.registerQuery(line); } Iterator<Tuple> it = ps.openIterator("B"); int[] expectedResults = new int[] {2,0,1,0,0}; int i = 0; while (it.hasNext()) { Tuple t = it.next(); assertEquals(expectedResults[i++], t.get(0)); } }
Example #24
Source Project: spork Author: sigmoidanalytics File: TestCompressedFiles.java License: Apache License 2.0 | 5 votes |
@Test public void testCompressed1() throws Throwable { pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("A = foreach (cogroup (load '" + Util.generateURI(gzFile.toString(), pig.getPigContext()) + "') by $1, (load '" + Util.generateURI(datFile.toString(), pig.getPigContext()) + "') by $1) generate flatten( " + DIFF.class.getName() + "($1.$1,$2.$1)) ;"); Iterator<Tuple> it = pig.openIterator("A"); assertFalse(it.hasNext()); }
Example #25
Source Project: spork Author: sigmoidanalytics File: TestPigServer.java License: Apache License 2.0 | 5 votes |
@Test // PIG-2059 public void test1() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.setValidateEachStatement(true); pig.registerQuery("A = load 'x' as (u, v);") ; try { pig.registerQuery("B = foreach A generate $2;") ; fail("Query is supposed to fail."); } catch(FrontendException ex) { String msg = "Out of bound access. " + "Trying to access non-existent column: 2"; Util.checkMessageInException(ex, msg); } }
Example #26
Source Project: spork Author: sigmoidanalytics File: TestPigServer.java License: Apache License 2.0 | 5 votes |
@Test public void testDescribeForeachFail() throws Throwable { PigServer pig = new PigServer(cluster.getExecType(), properties); pig.registerQuery("a = load 'a' as (field1: int, field2: float, field3: chararray );") ; pig.registerQuery("b = foreach a generate field1 + 10;") ; try { pig.dumpSchema("c") ; fail("Error expected"); } catch (Exception e) { assertTrue(e.getMessage().contains("Unable to describe schema for alias c")); } }
Example #27
Source Project: spork Author: sigmoidanalytics File: TestBZip.java License: Apache License 2.0 | 5 votes |
@Test public void testBzipStoreInMultiQuery() throws Exception { String[] inputData = new String[] { "1\t2\r3\t4" }; String inputFileName = "input.txt"; Util.createInputFile(cluster, inputFileName, inputData); PigServer pig = new PigServer(cluster.getExecType(), properties); pig.setBatchOn(); pig.registerQuery("a = load '" + inputFileName + "';"); pig.registerQuery("store a into 'output.bz2';"); pig.registerQuery("store a into 'output';"); pig.executeBatch(); FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration( pig.getPigContext().getProperties())); FileStatus[] outputFiles = fs.listStatus(new Path("output"), Util.getSuccessMarkerPathFilter()); assertTrue(outputFiles[0].getLen() > 0); outputFiles = fs.listStatus(new Path("output.bz2"), Util.getSuccessMarkerPathFilter()); assertTrue(outputFiles[0].getLen() > 0); }
Example #28
Source Project: spork Author: sigmoidanalytics File: TestMapSideCogroup.java License: Apache License 2.0 | 5 votes |
@Test public void testEmptyDeltaFile() throws Exception{ PigServer pigServer = new PigServer(cluster.getExecType(), cluster.getProperties()); pigServer.registerQuery("A = LOAD '" + INPUT_FILE1 + "' using "+ DummyCollectableLoader.class.getName() +"() as (c1:chararray,c2:int);"); pigServer.registerQuery("B = LOAD '" + EMPTY_FILE + "' using "+ DummyIndexableLoader.class.getName() +"() as (c1:chararray,c2:int);"); DataBag dbMergeCogrp = BagFactory.getInstance().newDefaultBag(); pigServer.registerQuery("C = cogroup A by c1, B by c1 using 'merge';"); Iterator<Tuple> iter = pigServer.openIterator("C"); while(iter.hasNext()) { Tuple t = iter.next(); dbMergeCogrp.add(t); } String[] results = new String[]{ "(1,{(1,1),(1,2),(1,3)},{})", "(2,{(2,1),(2,2),(2,3)},{})", "(3,{(3,1),(3,2),(3,3)},{})" }; assertEquals(3, dbMergeCogrp.size()); Iterator<Tuple> itr = dbMergeCogrp.iterator(); for(int i=0; i<3; i++){ assertEquals(itr.next().toString(), results[i]); } assertFalse(itr.hasNext()); }
Example #29
Source Project: spork Author: sigmoidanalytics File: TestStreamingUDF.java License: Apache License 2.0 | 5 votes |
@Test public void testPythonUDF__withDateTime() throws Exception { pigServerLocal = new PigServer(ExecType.LOCAL); String[] pythonScript = { "from pig_util import outputSchema", "@outputSchema(\'d:datetime\')", "def py_func(dt):", " return dt" }; Util.createLocalInputFile( "pyfile_dt.py", pythonScript); Data data = resetData(pigServerLocal); Tuple t0 = tf.newTuple(new DateTime()); Tuple t1 = tf.newTuple(new DateTime()); data.set("testDateTuples", "d:datetime", t0, t1); pigServerLocal.registerQuery("REGISTER 'pyfile_dt.py' USING streaming_python AS pf;"); pigServerLocal.registerQuery("A = LOAD 'testDateTuples' USING mock.Storage();"); pigServerLocal.registerQuery("B = FOREACH A generate pf.py_func(d);"); pigServerLocal.registerQuery("STORE B INTO 'date_out' USING mock.Storage();"); List<Tuple> out = data.get("date_out"); assertEquals(t0, out.get(0)); assertEquals(t1, out.get(1)); }
Example #30
Source Project: spork Author: sigmoidanalytics File: TestDBStorage.java License: Apache License 2.0 | 5 votes |
public TestDBStorage() throws ExecException, IOException { // Initialise Pig server cluster = MiniCluster.buildCluster(); pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); pigServer.getPigContext().getProperties() .setProperty(MRConfiguration.MAP_MAX_ATTEMPTS, "1"); pigServer.getPigContext().getProperties() .setProperty(MRConfiguration.REDUCE_MAX_ATTEMPTS, "1"); System.out.println("Pig server initialized successfully"); TMP_DIR = System.getProperty("user.dir") + "/build/test/"; dblocation = TMP_DIR + "batchtest"; url = "jdbc:hsqldb:file:" + dblocation + ";hsqldb.default_table_type=cached;hsqldb.cache_rows=100"; // Initialise DBServer dbServer = new Server(); dbServer.setDatabaseName(0, "batchtest"); // dbServer.setDatabasePath(0, "mem:test;sql.enforce_strict_size=true"); dbServer.setDatabasePath(0, "file:" + TMP_DIR + "batchtest;sql.enforce_strict_size=true"); dbServer.setLogWriter(null); dbServer.setErrWriter(null); dbServer.start(); System.out.println("Database URL: " + dbUrl); try { Class.forName(driver); } catch (Exception e) { e.printStackTrace(); System.out.println(this + ".setUp() error: " + e.getMessage()); } System.out.println("Database server started on port: " + dbServer.getPort()); }