Java Code Examples for org.apache.hadoop.mapred.JobConf#getUseNewMapper()

The following examples show how to use org.apache.hadoop.mapred.JobConf#getUseNewMapper() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JobSubmitter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void checkSpecs(Job job) throws ClassNotFoundException, 
    InterruptedException, IOException {
  JobConf jConf = (JobConf)job.getConfiguration();
  // Check the output specification
  if (jConf.getNumReduceTasks() == 0 ? 
      jConf.getUseNewMapper() : jConf.getUseNewReducer()) {
    org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
      ReflectionUtils.newInstance(job.getOutputFormatClass(),
        job.getConfiguration());
    output.checkOutputSpecs(job);
  } else {
    jConf.getOutputFormat().checkOutputSpecs(jtFs, jConf);
  }
}
 
Example 2
Source File: JobSubmitter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private int writeSplits(org.apache.hadoop.mapreduce.JobContext job,
    Path jobSubmitDir) throws IOException,
    InterruptedException, ClassNotFoundException {
  JobConf jConf = (JobConf)job.getConfiguration();
  int maps;
  if (jConf.getUseNewMapper()) {
    maps = writeNewSplits(job, jobSubmitDir);
  } else {
    maps = writeOldSplits(jConf, jobSubmitDir);
  }
  return maps;
}
 
Example 3
Source File: JobSubmitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void checkSpecs(Job job) throws ClassNotFoundException, 
    InterruptedException, IOException {
  JobConf jConf = (JobConf)job.getConfiguration();
  // Check the output specification
  if (jConf.getNumReduceTasks() == 0 ? 
      jConf.getUseNewMapper() : jConf.getUseNewReducer()) {
    org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
      ReflectionUtils.newInstance(job.getOutputFormatClass(),
        job.getConfiguration());
    output.checkOutputSpecs(job);
  } else {
    jConf.getOutputFormat().checkOutputSpecs(jtFs, jConf);
  }
}
 
Example 4
Source File: JobSubmitter.java    From big-c with Apache License 2.0 5 votes vote down vote up
private int writeSplits(org.apache.hadoop.mapreduce.JobContext job,
    Path jobSubmitDir) throws IOException,
    InterruptedException, ClassNotFoundException {
  JobConf jConf = (JobConf)job.getConfiguration();
  int maps;
  if (jConf.getUseNewMapper()) {
    maps = writeNewSplits(job, jobSubmitDir);
  } else {
    maps = writeOldSplits(jConf, jobSubmitDir);
  }
  return maps;
}
 
Example 5
Source File: HadoopV2TaskContext.java    From ignite with Apache License 2.0 5 votes vote down vote up
/**
 * @param taskInfo Task info.
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId,
    @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        }
        catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        initializePartiallyRawComparator(jobConf);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    }
    finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}
 
Example 6
Source File: MRHelpers.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
/**
 * Helper api to generate splits
 * @param conf Configuration with all necessary information set to generate
 * splits. The following are required at a minimum:
 *
 *   - mapred.mapper.new-api: determine whether mapred.InputFormat or
 *     mapreduce.InputFormat is to be used
 *   - mapred.input.format.class or mapreduce.job.inputformat.class:
 *     determines the InputFormat class to be used
 *
 * In addition to this, all the configs needed by the InputFormat class also
 * have to be set. For example, FileInputFormat needs the input directory
 * paths to be set in the config.
 *
 * @param inputSplitsDir Directory in which the splits file and meta info file
 * will be generated. job.split and job.splitmetainfo files in this directory
 * will be overwritten. Should be a fully-qualified path.
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public static InputSplitInfoDisk generateInputSplits(Configuration conf,
    Path inputSplitsDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  Job job = Job.getInstance(conf);
  JobConf jobConf = new JobConf(conf);
  conf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
  if (jobConf.getUseNewMapper()) {
    LOG.info("Generating new input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeNewSplits(job, inputSplitsDir);
  } else {
    LOG.info("Generating old input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeOldSplits(jobConf, inputSplitsDir);
  }
}
 
Example 7
Source File: MRInputSplitDistributor.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Override
public List<Event> initialize(TezRootInputInitializerContext rootInputContext)
    throws IOException {
  Stopwatch sw = null;
  if (LOG.isDebugEnabled()) {
    sw = new Stopwatch().start();
  }
  MRInputUserPayloadProto userPayloadProto = MRHelpers.parseMRInputPayload(rootInputContext.getUserPayload());
  if (LOG.isDebugEnabled()) {
    sw.stop();
    LOG.debug("Time to parse MRInput payload into prot: "
        + sw.elapsedMillis());  
  }
  Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
      .getConfigurationBytes());
  JobConf jobConf = new JobConf(conf);
  boolean useNewApi = jobConf.getUseNewMapper();
  sendSerializedEvents = conf.getBoolean(
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD,
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
  LOG.info("Emitting serialized splits: " + sendSerializedEvents);

  this.splitsProto = userPayloadProto.getSplits();
  
  MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
  updatedPayloadBuilder.clearSplits();

  List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
  RootInputUpdatePayloadEvent updatePayloadEvent = new RootInputUpdatePayloadEvent(
      updatedPayloadBuilder.build().toByteArray());

  events.add(updatePayloadEvent);
  int count = 0;

  for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {

    RootInputDataInformationEvent diEvent;

    if (sendSerializedEvents) {
      // Unnecessary array copy, can be avoided by using ByteBuffer instead of
      // a raw array.
      diEvent = new RootInputDataInformationEvent(count++, mrSplit.toByteArray());
    } else {
      if (useNewApi) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
            .getNewSplitDetailsFromEvent(mrSplit, conf);
        diEvent = new RootInputDataInformationEvent(count++, newInputSplit);
      } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
            .getOldSplitDetailsFromEvent(mrSplit, conf);
        diEvent = new RootInputDataInformationEvent(count++, oldInputSplit);
      }
    }
    events.add(diEvent);
  }

  return events;
}
 
Example 8
Source File: MRInputHelpers.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * Helper api to generate splits
 * @param conf Configuration with all necessary information set to generate
 * splits. The following are required at a minimum:
 *
 *   - mapred.mapper.new-api: determine whether mapred.InputFormat or
 *     mapreduce.InputFormat is to be used
 *   - mapred.input.format.class or mapreduce.job.inputformat.class:
 *     determines the InputFormat class to be used
 *
 * In addition to this, all the configs needed by the InputFormat class also
 * have to be set. For example, FileInputFormat needs the input directory
 * paths to be set in the config.
 *
 * @param inputSplitsDir Directory in which the splits file and meta info file
 * will be generated. job.split and job.splitmetainfo files in this directory
 * will be overwritten. Should be a fully-qualified path.
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk generateInputSplits(Configuration conf,
                                                      Path inputSplitsDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  Job job = Job.getInstance(conf);
  JobConf jobConf = new JobConf(conf);
  conf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
  if (jobConf.getUseNewMapper()) {
    LOG.info("Generating new input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeNewSplits(job, inputSplitsDir);
  } else {
    LOG.info("Generating old input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeOldSplits(jobConf, inputSplitsDir);
  }
}
 
Example 9
Source File: MRInputSplitDistributor.java    From tez with Apache License 2.0 4 votes vote down vote up
@Override
public List<Event> initialize() throws IOException {
  StopWatch sw = new StopWatch().start();
  MRInputUserPayloadProto userPayloadProto = MRInputHelpers
      .parseMRInputPayload(getContext().getInputUserPayload());
  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time to parse MRInput payload into prot: "
        + sw.now(TimeUnit.MILLISECONDS));
  }
  Configuration conf = TezUtils.createConfFromByteString(userPayloadProto
      .getConfigurationBytes());
  JobConf jobConf = new JobConf(conf);
  boolean useNewApi = jobConf.getUseNewMapper();
  sendSerializedEvents = conf.getBoolean(
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD,
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
  LOG.info("Emitting serialized splits: " + sendSerializedEvents);

  this.splitsProto = userPayloadProto.getSplits();
  
  MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
  updatedPayloadBuilder.clearSplits();

  List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
  InputUpdatePayloadEvent updatePayloadEvent = InputUpdatePayloadEvent.create(
      updatedPayloadBuilder.build().toByteString().asReadOnlyByteBuffer());

  events.add(updatePayloadEvent);
  int count = 0;

  for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {

    InputDataInformationEvent diEvent;

    if (sendSerializedEvents) {
      // Unnecessary array copy, can be avoided by using ByteBuffer instead of
      // a raw array.
      diEvent = InputDataInformationEvent.createWithSerializedPayload(count++,
          mrSplit.toByteString().asReadOnlyByteBuffer());
    } else {
      if (useNewApi) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
            .getNewSplitDetailsFromEvent(mrSplit, conf);
        diEvent = InputDataInformationEvent.createWithObjectPayload(count++, newInputSplit);
      } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
            .getOldSplitDetailsFromEvent(mrSplit, conf);
        diEvent = InputDataInformationEvent.createWithObjectPayload(count++, oldInputSplit);
      }
    }
    events.add(diEvent);
  }

  return events;
}