Java Code Examples for org.apache.pig.impl.PigContext#setPackageImportList()
The following examples show how to use
org.apache.pig.impl.PigContext#setPackageImportList() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PigProcessor.java From spork with Apache License 2.0 | 4 votes |
@SuppressWarnings("unchecked") @Override public void initialize() throws Exception { // Reset any static variables to avoid conflict in container-reuse. sampleVertex = null; sampleMap = null; // Reset static variables cleared for avoiding OOM. new JVMReuseImpl().cleanupStaticData(); UserPayload payload = getContext().getUserPayload(); conf = TezUtils.createConfFromUserPayload(payload); PigContext.setPackageImportList((ArrayList<String>) ObjectSerializer .deserialize(conf.get("udf.import.list"))); PigContext pc = (PigContext) ObjectSerializer.deserialize(conf.get("pig.pigContext")); // To determine front-end in UDFContext conf.set(MRConfiguration.JOB_APPLICATION_ATTEMPT_ID, getContext().getUniqueIdentifier()); conf.set(PigConstants.TASK_INDEX, Integer.toString(getContext().getTaskIndex())); UDFContext.getUDFContext().addJobConf(conf); UDFContext.getUDFContext().deserialize(); String execPlanString = conf.get(PLAN); execPlan = (PhysicalPlan) ObjectSerializer.deserialize(execPlanString); SchemaTupleBackend.initialize(conf, pc); PigMapReduce.sJobContext = HadoopShims.createJobContext(conf, new org.apache.hadoop.mapreduce.JobID()); // Set the job conf as a thread-local member of PigMapReduce // for backwards compatibility with the existing code base. PigMapReduce.sJobConfInternal.set(conf); Utils.setDefaultTimeZone(conf); boolean aggregateWarning = "true".equalsIgnoreCase(pc.getProperties().getProperty("aggregate.warning")); PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance(); pigStatusReporter.setContext(new TezTaskContext(getContext())); pigHadoopLogger = PigHadoopLogger.getInstance(); pigHadoopLogger.setReporter(pigStatusReporter); pigHadoopLogger.setAggregate(aggregateWarning); PhysicalOperator.setPigLogger(pigHadoopLogger); LinkedList<TezTaskConfigurable> tezTCs = PlanHelper.getPhysicalOperators(execPlan, TezTaskConfigurable.class); for (TezTaskConfigurable tezTC : tezTCs){ tezTC.initialize(getContext()); } }
Example 2
Source File: PigInputFormat.java From spork with Apache License 2.0 | 4 votes |
@SuppressWarnings({ "rawtypes", "unchecked" }) @Override public org.apache.hadoop.mapreduce.RecordReader<Text, Tuple> createRecordReader( org.apache.hadoop.mapreduce.InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // We need to create a TaskAttemptContext based on the Configuration which // was used in the getSplits() to produce the split supplied here. For // this, let's find out the input of the script which produced the split // supplied here and then get the corresponding Configuration and setup // TaskAttemptContext based on it and then call the real InputFormat's // createRecordReader() method PigSplit pigSplit = (PigSplit)split; activeSplit = pigSplit; // XXX hadoop 20 new API integration: get around a hadoop 20 bug by // passing total # of splits to each split so it can be retrieved // here and set it to the configuration object. This number is needed // by PoissonSampleLoader to compute the number of samples int n = pigSplit.getTotalSplits(); context.getConfiguration().setInt("pig.mapsplits.count", n); Configuration conf = context.getConfiguration(); PigContext.setPackageImportList((ArrayList<String>) ObjectSerializer .deserialize(conf.get("udf.import.list"))); MapRedUtil.setupUDFContext(conf); LoadFunc loadFunc = getLoadFunc(pigSplit.getInputIndex(), conf); // Pass loader signature to LoadFunc and to InputFormat through // the conf passLoadSignature(loadFunc, pigSplit.getInputIndex(), conf); // merge entries from split specific conf into the conf we got PigInputFormat.mergeSplitSpecificConf(loadFunc, pigSplit, conf); // for backward compatibility PigInputFormat.sJob = conf; InputFormat inputFormat = loadFunc.getInputFormat(); List<Long> inpLimitLists = (ArrayList<Long>)ObjectSerializer.deserialize( conf.get("pig.inpLimits")); return new PigRecordReader(inputFormat, pigSplit, loadFunc, context, inpLimitLists.get(pigSplit.getInputIndex())); }
Example 3
Source File: PigGenericMapReduce.java From spork with Apache License 2.0 | 4 votes |
/** * Configures the Reduce plan, the POPackage operator * and the reporter thread */ @SuppressWarnings("unchecked") @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); inIllustrator = inIllustrator(context); if (inIllustrator) pack = getPack(context); Configuration jConf = context.getConfiguration(); SpillableMemoryManager.configure(ConfigurationUtil.toProperties(jConf)); context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId())); sJobContext = context; sJobConfInternal.set(context.getConfiguration()); sJobConf = context.getConfiguration(); try { PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(jConf.get("udf.import.list"))); pigContext = (PigContext)ObjectSerializer.deserialize(jConf.get("pig.pigContext")); // This attempts to fetch all of the generated code from the distributed cache, and resolve it SchemaTupleBackend.initialize(jConf, pigContext); if (rp == null) rp = (PhysicalPlan) ObjectSerializer.deserialize(jConf .get("pig.reducePlan")); stores = PlanHelper.getPhysicalOperators(rp, POStore.class); if (!inIllustrator) pack = (POPackage)ObjectSerializer.deserialize(jConf.get("pig.reduce.package")); // To be removed if(rp.isEmpty()) log.debug("Reduce Plan empty!"); else{ ByteArrayOutputStream baos = new ByteArrayOutputStream(); rp.explain(baos); log.debug(baos.toString()); } pigReporter = new ProgressableReporter(); if(!(rp.isEmpty())) { roots = rp.getRoots().toArray(new PhysicalOperator[1]); leaf = rp.getLeaves().get(0); } // Get the UDF specific context MapRedUtil.setupUDFContext(jConf); } catch (IOException ioe) { String msg = "Problem while configuring reduce plan."; throw new RuntimeException(msg, ioe); } log.info("Aliases being processed per job phase (AliasName[line,offset]): " + jConf.get("pig.alias.location")); Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get()); }
Example 4
Source File: PigCombiner.java From spork with Apache License 2.0 | 4 votes |
/** * Configures the Reduce plan, the POPackage operator * and the reporter thread */ @SuppressWarnings("unchecked") @Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration jConf = context.getConfiguration(); try { PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(jConf.get("udf.import.list"))); pigContext = (PigContext)ObjectSerializer.deserialize(jConf.get("pig.pigContext")); if (pigContext.getLog4jProperties()!=null) PropertyConfigurator.configure(pigContext.getLog4jProperties()); cp = (PhysicalPlan) ObjectSerializer.deserialize(jConf .get("pig.combinePlan")); pack = (POPackage)ObjectSerializer.deserialize(jConf.get("pig.combine.package")); // To be removed if(cp.isEmpty()) log.debug("Combine Plan empty!"); else{ ByteArrayOutputStream baos = new ByteArrayOutputStream(); cp.explain(baos); log.debug(baos.toString()); } keyType = ((byte[])ObjectSerializer.deserialize(jConf.get("pig.map.keytype")))[0]; // till here pigReporter = new ProgressableReporter(); if(!(cp.isEmpty())) { roots = cp.getRoots().toArray(new PhysicalOperator[1]); leaf = cp.getLeaves().get(0); } } catch (IOException ioe) { String msg = "Problem while configuring combiner's reduce plan."; throw new RuntimeException(msg, ioe); } // Avoid log spamming if (firstTime) { log.info("Aliases being processed per job phase (AliasName[line,offset]): " + jConf.get("pig.alias.location")); firstTime = false; } }
Example 5
Source File: PigGenericMapBase.java From spork with Apache License 2.0 | 4 votes |
/** * Configures the mapper with the map plan and the * reproter thread */ @SuppressWarnings("unchecked") @Override public void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration job = context.getConfiguration(); SpillableMemoryManager.configure(ConfigurationUtil.toProperties(job)); context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId())); PigMapReduce.sJobContext = context; PigMapReduce.sJobConfInternal.set(context.getConfiguration()); PigMapReduce.sJobConf = context.getConfiguration(); inIllustrator = inIllustrator(context); PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(job.get("udf.import.list"))); pigContext = (PigContext)ObjectSerializer.deserialize(job.get("pig.pigContext")); // This attempts to fetch all of the generated code from the distributed cache, and resolve it SchemaTupleBackend.initialize(job, pigContext); if (pigContext.getLog4jProperties()!=null) PropertyConfigurator.configure(pigContext.getLog4jProperties()); if (mp == null) mp = (PhysicalPlan) ObjectSerializer.deserialize( job.get("pig.mapPlan")); stores = PlanHelper.getPhysicalOperators(mp, POStore.class); // To be removed if(mp.isEmpty()) log.debug("Map Plan empty!"); else{ ByteArrayOutputStream baos = new ByteArrayOutputStream(); mp.explain(baos); log.debug(baos.toString()); } keyType = ((byte[])ObjectSerializer.deserialize(job.get("pig.map.keytype")))[0]; // till here pigReporter = new ProgressableReporter(); // Get the UDF specific context MapRedUtil.setupUDFContext(job); if(!(mp.isEmpty())) { PigSplit split = (PigSplit)context.getInputSplit(); List<OperatorKey> targetOpKeys = split.getTargetOps(); ArrayList<PhysicalOperator> targetOpsAsList = new ArrayList<PhysicalOperator>(); for (OperatorKey targetKey : targetOpKeys) { targetOpsAsList.add(mp.getOperator(targetKey)); } roots = targetOpsAsList.toArray(new PhysicalOperator[1]); leaf = mp.getLeaves().get(0); } PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance(); pigStatusReporter.setContext(new MRTaskContext(context)); log.info("Aliases being processed per job phase (AliasName[line,offset]): " + job.get("pig.alias.location")); Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get()); }