Java Code Examples for org.apache.pig.impl.PigContext#setPackageImportList()

The following examples show how to use org.apache.pig.impl.PigContext#setPackageImportList() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: PigProcessor.java From spork with Apache License 2.0

4 votes

@SuppressWarnings("unchecked")
@Override
public void initialize() throws Exception {
    // Reset any static variables to avoid conflict in container-reuse.
    sampleVertex = null;
    sampleMap = null;

    // Reset static variables cleared for avoiding OOM.
    new JVMReuseImpl().cleanupStaticData();

    UserPayload payload = getContext().getUserPayload();
    conf = TezUtils.createConfFromUserPayload(payload);
    PigContext.setPackageImportList((ArrayList<String>) ObjectSerializer
            .deserialize(conf.get("udf.import.list")));
    PigContext pc = (PigContext) ObjectSerializer.deserialize(conf.get("pig.pigContext"));

    // To determine front-end in UDFContext
    conf.set(MRConfiguration.JOB_APPLICATION_ATTEMPT_ID, getContext().getUniqueIdentifier());
    conf.set(PigConstants.TASK_INDEX, Integer.toString(getContext().getTaskIndex()));
    UDFContext.getUDFContext().addJobConf(conf);
    UDFContext.getUDFContext().deserialize();

    String execPlanString = conf.get(PLAN);
    execPlan = (PhysicalPlan) ObjectSerializer.deserialize(execPlanString);
    SchemaTupleBackend.initialize(conf, pc);
    PigMapReduce.sJobContext = HadoopShims.createJobContext(conf, new org.apache.hadoop.mapreduce.JobID());

    // Set the job conf as a thread-local member of PigMapReduce
    // for backwards compatibility with the existing code base.
    PigMapReduce.sJobConfInternal.set(conf);

    Utils.setDefaultTimeZone(conf);

    boolean aggregateWarning = "true".equalsIgnoreCase(pc.getProperties().getProperty("aggregate.warning"));
    PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
    pigStatusReporter.setContext(new TezTaskContext(getContext()));
    pigHadoopLogger = PigHadoopLogger.getInstance();
    pigHadoopLogger.setReporter(pigStatusReporter);
    pigHadoopLogger.setAggregate(aggregateWarning);
    PhysicalOperator.setPigLogger(pigHadoopLogger);

    LinkedList<TezTaskConfigurable> tezTCs = PlanHelper.getPhysicalOperators(execPlan, TezTaskConfigurable.class);
    for (TezTaskConfigurable tezTC : tezTCs){
        tezTC.initialize(getContext());
    }
}

Example 2

Source File: PigInputFormat.java From spork with Apache License 2.0

4 votes

@SuppressWarnings({ "rawtypes", "unchecked" })
@Override
public org.apache.hadoop.mapreduce.RecordReader<Text, Tuple> createRecordReader(
        org.apache.hadoop.mapreduce.InputSplit split,
        TaskAttemptContext context) throws IOException,
        InterruptedException {
    // We need to create a TaskAttemptContext based on the Configuration which
    // was used in the getSplits() to produce the split supplied here. For
    // this, let's find out the input of the script which produced the split
    // supplied here and then get the corresponding Configuration and setup
    // TaskAttemptContext based on it and then call the real InputFormat's
    // createRecordReader() method

    PigSplit pigSplit = (PigSplit)split;
    activeSplit = pigSplit;
    // XXX hadoop 20 new API integration: get around a hadoop 20 bug by
    // passing total # of splits to each split so it can be retrieved
    // here and set it to the configuration object. This number is needed
    // by PoissonSampleLoader to compute the number of samples
    int n = pigSplit.getTotalSplits();
    context.getConfiguration().setInt("pig.mapsplits.count", n);
    Configuration conf = context.getConfiguration();
    PigContext.setPackageImportList((ArrayList<String>) ObjectSerializer
            .deserialize(conf.get("udf.import.list")));
    MapRedUtil.setupUDFContext(conf);
    LoadFunc loadFunc = getLoadFunc(pigSplit.getInputIndex(), conf);
    // Pass loader signature to LoadFunc and to InputFormat through
    // the conf
    passLoadSignature(loadFunc, pigSplit.getInputIndex(), conf);

    // merge entries from split specific conf into the conf we got
    PigInputFormat.mergeSplitSpecificConf(loadFunc, pigSplit, conf);

    // for backward compatibility
    PigInputFormat.sJob = conf;

    InputFormat inputFormat = loadFunc.getInputFormat();

    List<Long> inpLimitLists =
            (ArrayList<Long>)ObjectSerializer.deserialize(
                    conf.get("pig.inpLimits"));

    return new PigRecordReader(inputFormat, pigSplit, loadFunc, context, inpLimitLists.get(pigSplit.getInputIndex()));
}

Example 3

Source File: PigGenericMapReduce.java From spork with Apache License 2.0

4 votes

/**
 * Configures the Reduce plan, the POPackage operator
 * and the reporter thread
 */
@SuppressWarnings("unchecked")
@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    inIllustrator = inIllustrator(context);
    if (inIllustrator)
        pack = getPack(context);
    Configuration jConf = context.getConfiguration();
    SpillableMemoryManager.configure(ConfigurationUtil.toProperties(jConf));
    context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
    sJobContext = context;
    sJobConfInternal.set(context.getConfiguration());
    sJobConf = context.getConfiguration();
    try {
        PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(jConf.get("udf.import.list")));
        pigContext = (PigContext)ObjectSerializer.deserialize(jConf.get("pig.pigContext"));

        // This attempts to fetch all of the generated code from the distributed cache, and resolve it
        SchemaTupleBackend.initialize(jConf, pigContext);

        if (rp == null)
            rp = (PhysicalPlan) ObjectSerializer.deserialize(jConf
                    .get("pig.reducePlan"));
        stores = PlanHelper.getPhysicalOperators(rp, POStore.class);

        if (!inIllustrator)
            pack = (POPackage)ObjectSerializer.deserialize(jConf.get("pig.reduce.package"));
        // To be removed
        if(rp.isEmpty())
            log.debug("Reduce Plan empty!");
        else{
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            rp.explain(baos);
            log.debug(baos.toString());
        }
        pigReporter = new ProgressableReporter();
        if(!(rp.isEmpty())) {
            roots = rp.getRoots().toArray(new PhysicalOperator[1]);
            leaf = rp.getLeaves().get(0);
        }

        // Get the UDF specific context
        MapRedUtil.setupUDFContext(jConf);

    } catch (IOException ioe) {
        String msg = "Problem while configuring reduce plan.";
        throw new RuntimeException(msg, ioe);
    }

    log.info("Aliases being processed per job phase (AliasName[line,offset]): " + jConf.get("pig.alias.location"));

    Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get());
}

Example 4

Source File: PigCombiner.java From spork with Apache License 2.0

4 votes

/**
 * Configures the Reduce plan, the POPackage operator
 * and the reporter thread
 */
@SuppressWarnings("unchecked")
@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration jConf = context.getConfiguration();
    try {
        PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(jConf.get("udf.import.list")));
        pigContext = (PigContext)ObjectSerializer.deserialize(jConf.get("pig.pigContext"));
        if (pigContext.getLog4jProperties()!=null)
            PropertyConfigurator.configure(pigContext.getLog4jProperties());

        cp = (PhysicalPlan) ObjectSerializer.deserialize(jConf
                .get("pig.combinePlan"));
        pack = (POPackage)ObjectSerializer.deserialize(jConf.get("pig.combine.package"));
        // To be removed
        if(cp.isEmpty())
            log.debug("Combine Plan empty!");
        else{
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            cp.explain(baos);
            log.debug(baos.toString());
        }

        keyType = ((byte[])ObjectSerializer.deserialize(jConf.get("pig.map.keytype")))[0];
        // till here

        pigReporter = new ProgressableReporter();
        if(!(cp.isEmpty())) {
            roots = cp.getRoots().toArray(new PhysicalOperator[1]);
            leaf = cp.getLeaves().get(0);
        }
    } catch (IOException ioe) {
        String msg = "Problem while configuring combiner's reduce plan.";
        throw new RuntimeException(msg, ioe);
    }

    // Avoid log spamming
    if (firstTime) {
        log.info("Aliases being processed per job phase (AliasName[line,offset]): " + jConf.get("pig.alias.location"));
        firstTime = false;
    }
}

Example 5

Source File: PigGenericMapBase.java From spork with Apache License 2.0

4 votes

/**
 * Configures the mapper with the map plan and the
 * reproter thread
 */
@SuppressWarnings("unchecked")
@Override
public void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    Configuration job = context.getConfiguration();
    SpillableMemoryManager.configure(ConfigurationUtil.toProperties(job));
    context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
    PigMapReduce.sJobContext = context;
    PigMapReduce.sJobConfInternal.set(context.getConfiguration());
    PigMapReduce.sJobConf = context.getConfiguration();
    inIllustrator = inIllustrator(context);

    PigContext.setPackageImportList((ArrayList<String>)ObjectSerializer.deserialize(job.get("udf.import.list")));
    pigContext = (PigContext)ObjectSerializer.deserialize(job.get("pig.pigContext"));

    // This attempts to fetch all of the generated code from the distributed cache, and resolve it
    SchemaTupleBackend.initialize(job, pigContext);

    if (pigContext.getLog4jProperties()!=null)
        PropertyConfigurator.configure(pigContext.getLog4jProperties());

    if (mp == null)
        mp = (PhysicalPlan) ObjectSerializer.deserialize(
            job.get("pig.mapPlan"));
    stores = PlanHelper.getPhysicalOperators(mp, POStore.class);

    // To be removed
    if(mp.isEmpty())
        log.debug("Map Plan empty!");
    else{
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        mp.explain(baos);
        log.debug(baos.toString());
    }
    keyType = ((byte[])ObjectSerializer.deserialize(job.get("pig.map.keytype")))[0];
    // till here

    pigReporter = new ProgressableReporter();
    // Get the UDF specific context
    MapRedUtil.setupUDFContext(job);

    if(!(mp.isEmpty())) {

        PigSplit split = (PigSplit)context.getInputSplit();
        List<OperatorKey> targetOpKeys = split.getTargetOps();

        ArrayList<PhysicalOperator> targetOpsAsList = new ArrayList<PhysicalOperator>();
        for (OperatorKey targetKey : targetOpKeys) {
            targetOpsAsList.add(mp.getOperator(targetKey));
        }
        roots = targetOpsAsList.toArray(new PhysicalOperator[1]);
        leaf = mp.getLeaves().get(0);
    }

    PigStatusReporter pigStatusReporter = PigStatusReporter.getInstance();
    pigStatusReporter.setContext(new MRTaskContext(context));

    log.info("Aliases being processed per job phase (AliasName[line,offset]): " + job.get("pig.alias.location"));

    Utils.setDefaultTimeZone(PigMapReduce.sJobConfInternal.get());
}