org.apache.pig.impl.PigContext Java Examples
The following examples show how to use
org.apache.pig.impl.PigContext.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JobControlCompiler.java From spork with Apache License 2.0 | 6 votes |
public static void setOutputFormat(org.apache.hadoop.mapreduce.Job job) { // the OutputFormat we report to Hadoop is always PigOutputFormat which // can be wrapped with LazyOutputFormat provided if it is supported by // the Hadoop version and PigConfiguration.PIG_OUTPUT_LAZY is set if ("true".equalsIgnoreCase(job.getConfiguration().get(PigConfiguration.PIG_OUTPUT_LAZY))) { try { Class<?> clazz = PigContext .resolveClassName("org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat"); Method method = clazz.getMethod("setOutputFormatClass", org.apache.hadoop.mapreduce.Job.class, Class.class); method.invoke(null, job, PigOutputFormat.class); } catch (Exception e) { job.setOutputFormatClass(PigOutputFormat.class); log.warn(PigConfiguration.PIG_OUTPUT_LAZY + " is set but LazyOutputFormat couldn't be loaded. Default PigOutputFormat will be used"); } } else { job.setOutputFormatClass(PigOutputFormat.class); } }
Example #2
Source File: LineageTrimmingVisitor.java From spork with Apache License 2.0 | 6 votes |
public LineageTrimmingVisitor(LogicalPlan plan, Map<LOLoad, DataBag> baseData, ExampleGenerator eg, Map<Operator, PhysicalOperator> LogToPhyMap, PhysicalPlan physPlan, PigContext pc) throws IOException, InterruptedException { super(plan, new PreOrderDepthFirstWalker(plan)); // this.baseData.putAll(baseData); this.baseData = baseData; this.plan = plan; this.LogToPhyMap = LogToPhyMap; this.pc = pc; this.physPlan = physPlan; this.eg = eg; this.inputToDataMap = new HashMap<FileSpec, DataBag>(); init(); }
Example #3
Source File: TestGrunt.java From spork with Apache License 2.0 | 6 votes |
@Test public void testBagConstantWithSchema() throws Throwable { PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties()); PigContext context = server.getPigContext(); String strCmd = "a = load 'input1'; b = foreach a generate " + "{(1, '1', 0.4f),(2, '2', 0.45)} as " + "b: bag{t:(i: int, c:chararray, d: double)};\n"; ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes()); InputStreamReader reader = new InputStreamReader(cmd); Grunt grunt = new Grunt(new BufferedReader(reader), context); grunt.exec(); }
Example #4
Source File: POCast.java From spork with Apache License 2.0 | 6 votes |
private void instantiateFunc() throws IOException { if (caster != null) return; if (funcSpec != null) { Object obj = PigContext .instantiateFuncFromSpec(funcSpec); if (obj instanceof LoadFunc) { caster = ((LoadFunc)obj).getLoadCaster(); } else if (obj instanceof StreamToPig) { caster = ((StreamToPig)obj).getLoadCaster(); } else { throw new IOException("Invalid class type " + funcSpec.getClassName()); } } }
Example #5
Source File: FetchOptimizer.java From spork with Apache License 2.0 | 6 votes |
/** * Checks whether the plan fulfills the prerequisites needed for fetching. * * @param pc PigContext * @param pp the physical plan to be examined * @return */ private static boolean isEligible(PigContext pc, PhysicalPlan pp) { if (!isFetchEnabled(pc)) { return false; } List<PhysicalOperator> roots = pp.getRoots(); for (PhysicalOperator po : roots) { if (!(po instanceof POLoad)) { String msg = "Expected physical operator at root is POLoad. Found : " + po.getClass().getCanonicalName() + ". Fetch optimizer will be disabled."; LOG.debug(msg); return false; } } //consider single leaf jobs only int leafSize = pp.getLeaves().size(); if (pp.getLeaves().size() != 1) { LOG.debug("Expected physical plan should have one leaf. Found " + leafSize); return false; } return true; }
Example #6
Source File: TestGroupConstParallelTez.java From spork with Apache License 2.0 | 6 votes |
@Override public void checkGroupNonConstWithParallelResult(PhysicalPlan pp, PigContext pc) throws Exception { TezOperPlan tezPlan = buildTezPlan(pp, pc); LoaderProcessor loaderStorer = new LoaderProcessor(tezPlan, pc); loaderStorer.visit(); ParallelismSetter parallelismSetter = new ParallelismSetter(tezPlan, pc); parallelismSetter.visit(); DAG tezDag = getTezDAG(tezPlan, pc); TezDagBuilder dagBuilder = new TezDagBuilder(pc, tezPlan, tezDag, null); dagBuilder.visit(); for (Vertex v : tezDag.getVertices()) { if (!v.getInputVertices().isEmpty()) { assertEquals(v.getParallelism(), 100); } } }
Example #7
Source File: FetchOptimizer.java From spork with Apache License 2.0 | 6 votes |
/** * Visits the plan with {@link FetchablePlanVisitor} and checks whether the * plan is fetchable. * * @param pc PigContext * @param pp the physical plan to be examined * @return true if the plan is fetchable * @throws VisitorException */ public static boolean isPlanFetchable(PigContext pc, PhysicalPlan pp) throws VisitorException { if (isEligible(pc, pp)) { FetchablePlanVisitor fpv = new FetchablePlanVisitor(pc, pp); fpv.visit(); // Plan is fetchable only if FetchablePlanVisitor returns true AND // limit is present in the plan, i.e: limit is pushed up to the loader. // Limit is a safeguard. If the input is large, and there is no limit, // fetch optimizer will fetch the entire input to the client. That can be dangerous. if (!fpv.isPlanFetchable()) { return false; } for (POLoad load : PlanHelper.getPhysicalOperators(pp, POLoad.class)) { if (load.getLimit() == -1) { return false; } } pc.getProperties().setProperty(PigImplConstants.CONVERTED_TO_FETCH, "true"); init(pp); return true; } return false; }
Example #8
Source File: EmptyPigStats.java From spork with Apache License 2.0 | 6 votes |
public EmptyPigStats(PigContext pigContext, POStore poStore) { super.pigContext = pigContext; super.startTime = super.endTime = System.currentTimeMillis(); super.userId = System.getProperty("user.name"); Configuration conf = ConfigurationUtil.toConfiguration(pigContext.getProperties()); // initialize empty stats OutputStats os = new OutputStats(null, -1, -1, true); os.setConf(conf); os.setPOStore(poStore); this.outputStatsList = Collections.unmodifiableList(Arrays.asList(os)); InputStats is = new InputStats(null, -1, -1, true); is.setConf(conf); this.inputStatsList = Collections.unmodifiableList(Arrays.asList(is)); }
Example #9
Source File: PigTest.java From spork with Apache License 2.0 | 6 votes |
/** * Registers a pig scripts with its variables substituted. * * @throws IOException If a temp file containing the pig script could not be created. * @throws ParseException The pig script could not have all its variables substituted. */ protected void registerScript() throws IOException, ParseException { getCluster(); BufferedReader reader = new BufferedReader(new StringReader(this.originalTextPigScript)); PigContext context = getPigServer().getPigContext(); String substitutedPig = context.doParamSubstitution(reader, args == null ? null : Arrays.asList(args), argFiles == null ? null : Arrays.asList(argFiles)); LOG.info(substitutedPig); File f = File.createTempFile("tmp", "pigunit"); PrintWriter pw = new PrintWriter(f); pw.println(substitutedPig); pw.close(); String pigSubstitutedFile = f.getCanonicalPath(); getPigServer().registerScript(pigSubstitutedFile, aliasOverrides); }
Example #10
Source File: TestGrunt.java From spork with Apache License 2.0 | 6 votes |
@Test public void testIllustrateScript6() throws Throwable { // empty line/field test PigServer server = new PigServer(ExecType.LOCAL, new Properties()); PigContext context = server.getPigContext(); String strCmd = "illustrate -script " + basedir + "/illustrate6.pig;"; ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes()); InputStreamReader reader = new InputStreamReader(cmd); Grunt grunt = new Grunt(new BufferedReader(reader), context); grunt.exec(); }
Example #11
Source File: GruntParser.java From spork with Apache License 2.0 | 5 votes |
private String runPreprocessor(String scriptPath, List<String> params, List<String> paramFiles) throws IOException, ParseException { PigContext context = mPigServer.getPigContext(); BufferedReader reader = new BufferedReader(new FileReader(scriptPath)); String result = context.doParamSubstitution(reader, params, paramFiles); reader.close(); return result; }
Example #12
Source File: PythonScriptEngine.java From spork with Apache License 2.0 | 5 votes |
@Override protected Map<String, List<PigStats>> main(PigContext context, String scriptFile) throws IOException { log.warn("ScriptFile: " + scriptFile); registerFunctions(scriptFile, null, context); return getPigStatsMap(); }
Example #13
Source File: TestMRExecutionEngine.java From spork with Apache License 2.0 | 5 votes |
@Test(expected = ExecException.class) public void testJobConfGeneration() throws ExecException { Configuration conf = new Configuration(false); conf.set("foo", "bar"); PigContext pigContext = new PigContext(ExecType.MAPREDUCE, conf); // This should fail as pig expects Hadoop configs are present in // classpath. pigContext.connect(); }
Example #14
Source File: TestGrunt.java From spork with Apache License 2.0 | 5 votes |
@Test public void testBagSchema() throws Throwable { PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties()); PigContext context = server.getPigContext(); String strCmd = "a = load 'input1' as (b: bag{t:(i: int, c:chararray, f: float)});\n"; ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes()); InputStreamReader reader = new InputStreamReader(cmd); Grunt grunt = new Grunt(new BufferedReader(reader), context); grunt.exec(); }
Example #15
Source File: TestGrunt.java From spork with Apache License 2.0 | 5 votes |
@Test public void testFsCommand() throws Throwable { PigServer server = new PigServer(cluster.getExecType(),cluster.getProperties()); PigContext context = server.getPigContext(); String strCmd = "fs -ls /;" +"fs -mkdir /fstmp;" +"fs -mkdir /fstmp/foo;" +"cd /fstmp;" +"fs -copyFromLocal test/org/apache/pig/test/data/passwd bar;" +"a = load 'bar';" +"cd foo;" +"store a into 'baz';" +"cd /;" +"fs -ls .;" +"fs -rmr /fstmp/foo/baz;" +"cd"; ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes()); InputStreamReader reader = new InputStreamReader(cmd); Grunt grunt = new Grunt(new BufferedReader(reader), context); grunt.exec(); }
Example #16
Source File: FetchLauncher.java From spork with Apache License 2.0 | 5 votes |
/** * Creates an empty MR plan * * @param pp - Physical plan * @param pc - PigContext * @param ps - PrintStream to write the plan to * @param format format of the output plan * @throws PlanException * @throws VisitorException * @throws IOException */ public void explain(PhysicalPlan pp, PigContext pc, PrintStream ps, String format) throws PlanException, VisitorException, IOException { if ("xml".equals(format)) { ps.println("<mapReducePlan>No MR jobs. Fetch only</mapReducePlan>"); } else { ps.println("#--------------------------------------------------"); ps.println("# Map Reduce Plan "); ps.println("#--------------------------------------------------"); ps.println("No MR jobs. Fetch only."); } return; }
Example #17
Source File: TestGrunt.java From spork with Apache License 2.0 | 5 votes |
@Test public void testFileCmds() throws Throwable { PigServer server = new PigServer(cluster.getExecType(), cluster.getProperties()); PigContext context = server.getPigContext(); String strCmd = "rmf bar; rmf baz;" +"a = load '" + Util.generateURI("file:test/org/apache/pig/test/data/passwd", context) + "';" +"store a into 'bar';" +"cp bar baz;" +"rm bar; rm baz;" +"store a into 'baz';" +"store a into 'bar';" +"rm baz; rm bar;" +"store a into 'baz';" +"mv baz bar;" +"b = load 'bar';" +"store b into 'baz';" +"cat baz;" +"rm baz;" +"rm bar;\n"; ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes()); InputStreamReader reader = new InputStreamReader(cmd); Grunt grunt = new Grunt(new BufferedReader(reader), context); grunt.exec(); }
Example #18
Source File: TestMultiQueryLocal.java From spork with Apache License 2.0 | 5 votes |
@Before public void setUp() throws Exception { PigContext context = new PigContext(ExecType.LOCAL, new Properties()); context.getProperties().setProperty(PigConfiguration.PIG_OPT_MULTIQUERY, ""+true); myPig = new PigServer(context); myPig.getPigContext().getProperties().setProperty("pig.usenewlogicalplan", "false"); myPig.getPigContext().getProperties().setProperty(PigConfiguration.PIG_TEMP_DIR, "build/test/tmp/"); TMP_DIR = FileLocalizer.getTemporaryPath(myPig.getPigContext()).toUri().getPath(); deleteOutputFiles(); }
Example #19
Source File: ForEachConverter.java From spork with Apache License 2.0 | 5 votes |
void initializeJobConf() { if (this.jobConf == null) { this.jobConf = KryoSerializer.deserializeJobConf(this.confBytes); PigMapReduce.sJobConfInternal.set(jobConf); try { MapRedUtil.setupUDFContext(jobConf); PigContext pc = (PigContext) ObjectSerializer.deserialize(jobConf.get("pig.pigContext")); SchemaTupleBackend.initialize(jobConf, pc); } catch (IOException ioe) { String msg = "Problem while configuring UDFContext from ForEachConverter."; throw new RuntimeException(msg, ioe); } } }
Example #20
Source File: Main.java From spork with Apache License 2.0 | 5 votes |
protected static PigProgressNotificationListener makeListener(Properties properties) { try { return PigContext.instantiateObjectFromParams( ConfigurationUtil.toConfiguration(properties), PROGRESS_NOTIFICATION_LISTENER_KEY, PROGRESS_NOTIFICATION_LISTENER_ARG_KEY, PigProgressNotificationListener.class); } catch (ExecException e) { throw new RuntimeException(e); } }
Example #21
Source File: Grunt.java From spork with Apache License 2.0 | 5 votes |
public Grunt(BufferedReader in, PigContext pigContext) throws ExecException { this.in = in; this.pig = new PigServer(pigContext); if (in != null) { parser = new GruntParser(in, pig); } }
Example #22
Source File: EvalFunc.java From spork with Apache License 2.0 | 5 votes |
private Type getReturnTypeFromSpec(FuncSpec funcSpec){ try{ return ((EvalFunc<?>)PigContext.instantiateFuncFromSpec(funcSpec)).getReturnType(); }catch (ClassCastException e){ throw new RuntimeException(funcSpec + " does not specify an eval func", e); } }
Example #23
Source File: TestFRJoin.java From spork with Apache License 2.0 | 5 votes |
private void setUpHashTable() throws IOException { FileSpec replFile = new FileSpec(repl, new FuncSpec(PigStorage.class.getName() + "()")); POLoad ld = new POLoad(new OperatorKey("Repl File Loader", 1L), replFile); PigContext pc = new PigContext(ExecType.MAPREDUCE, PigMapReduce.sJobConfInternal.get()); pc.connect(); ld.setPc(pc); for (Result res = ld.getNextTuple(); res.returnStatus != POStatus.STATUS_EOP; res = ld .getNextTuple()) { Tuple tup = (Tuple)res.result; LoadFunc lf = ((LoadFunc)PigContext.instantiateFuncFromSpec(ld.getLFile().getFuncSpec())); String key = lf.getLoadCaster().bytesToCharArray( ((DataByteArray)tup.get(keyField)).get()); Tuple csttup = TupleFactory.getInstance().newTuple(2); csttup.set(0, key); csttup.set(1, lf.getLoadCaster().bytesToInteger(((DataByteArray)tup.get(1)).get())); DataBag vals = null; if (replTbl.containsKey(key)) { vals = replTbl.get(key); } else { vals = BagFactory.getInstance().newDefaultBag(); replTbl.put(key, vals); } vals.add(csttup); } }
Example #24
Source File: InvokerGenerator.java From spork with Apache License 2.0 | 5 votes |
private Class<?>[] getArgumentClassArray(String[] argumentTypes) { Class<?>[] arguments = new Class<?>[argumentTypes.length]; for (int i = 0; i < argumentTypes.length; i++) { try { arguments[i]= nameToClassObjectMap.get(argumentTypes[i]); if (arguments[i] == null) { arguments[i] = PigContext.resolveClassName(argumentTypes[i]); } } catch (IOException e) { throw new RuntimeException("Unable to find class in PigContext: " + argumentTypes[i], e); } } return arguments; }
Example #25
Source File: TezResourceManager.java From spork with Apache License 2.0 | 5 votes |
public void init(PigContext pigContext, Configuration conf) throws IOException { if (!inited) { this.stagingDir = FileLocalizer.getTemporaryResourcePath(pigContext); this.remoteFs = FileSystem.get(conf); this.conf = conf; this.pigContext = pigContext; this.inited = true; } }
Example #26
Source File: TestSecondarySort.java From spork with Apache License 2.0 | 5 votes |
@Before public void setUp() throws Exception { if (cluster == null) { cluster = getCluster(); pc = new PigContext(cluster.getExecType(), cluster.getProperties()); try { pc.connect(); } catch (ExecException e) { throw new RuntimeException(e); } } pigServer = new PigServer(pc); }
Example #27
Source File: TestBZip.java From spork with Apache License 2.0 | 5 votes |
@Test public void testBzipStoreInMultiQuery2() throws Exception { String[] inputData = new String[] { "1\t2\r3\t4" }; String inputFileName = "input2.txt"; Util.createInputFile(cluster, inputFileName, inputData); PigServer pig = new PigServer(cluster.getExecType(), properties); PigContext pigContext = pig.getPigContext(); pigContext.getProperties().setProperty( "output.compression.enabled", "true" ); pigContext.getProperties().setProperty( "output.compression.codec", "org.apache.hadoop.io.compress.BZip2Codec" ); pig.setBatchOn(); pig.registerQuery("a = load '" + inputFileName + "';"); pig.registerQuery("store a into 'output2.bz2';"); pig.registerQuery("store a into 'output2';"); pig.executeBatch(); FileSystem fs = FileSystem.get(ConfigurationUtil.toConfiguration( pig.getPigContext().getProperties())); FileStatus[] outputFiles = fs.listStatus(new Path("output2"), Util.getSuccessMarkerPathFilter()); assertTrue(outputFiles[0].getLen() > 0); outputFiles = fs.listStatus(new Path("output2.bz2"), Util.getSuccessMarkerPathFilter()); assertTrue(outputFiles[0].getLen() > 0); }
Example #28
Source File: ReadToEndLoader.java From spork with Apache License 2.0 | 5 votes |
public ReadToEndLoader(LoadFunc wrappedLoadFunc, Configuration conf, String inputLocation, int splitIndex, PigContext pigContext) throws IOException { this.wrappedLoadFunc = wrappedLoadFunc; this.inputLocation = inputLocation; this.conf = conf; this.curSplitIndex = splitIndex; this.pigContext = pigContext; init(); }
Example #29
Source File: QueryParserUtils.java From spork with Apache License 2.0 | 5 votes |
static void setHdfsServers(String absolutePath, PigContext pigContext) throws URISyntaxException { // Get native host String defaultFS = (String)pigContext.getProperties().get("fs.default.name"); if (defaultFS==null) defaultFS = (String)pigContext.getProperties().get("fs.defaultFS"); URI defaultFSURI = new URI(defaultFS); Configuration conf = new Configuration(true); ConfigurationUtil.mergeConf(conf, ConfigurationUtil.toConfiguration(pigContext.getProperties())); Set<String> remoteHosts = getRemoteHosts(absolutePath, defaultFSURI, conf); String hdfsServersString = (String)pigContext.getProperties().get(MRConfiguration.JOB_HDFS_SERVERS); if (hdfsServersString == null) hdfsServersString = ""; String hdfsServers[] = hdfsServersString.split(","); for (String remoteHost : remoteHosts) { boolean existing = false; for (String hdfsServer : hdfsServers) { if (hdfsServer.equals(remoteHost)) { existing = true; } } if (!existing) { if (!hdfsServersString.isEmpty()) { hdfsServersString = hdfsServersString + ","; } hdfsServersString = hdfsServersString + remoteHost; } } if (!hdfsServersString.isEmpty()) { pigContext.getProperties().setProperty(MRConfiguration.JOB_HDFS_SERVERS, hdfsServersString); } }
Example #30
Source File: HJob.java From spork with Apache License 2.0 | 5 votes |
public HJob(JOB_STATUS status, PigContext pigContext, POStore store, String alias) { this.status = status; this.pigContext = pigContext; this.poStore = store; this.outFileSpec = poStore.getSFile(); this.alias = alias; }