Java Code Examples for org.apache.hadoop.mapred.JobConf#getUseNewMapper()

The following examples show how to use org.apache.hadoop.mapred.JobConf#getUseNewMapper() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: JobSubmitter.java From hadoop with Apache License 2.0

5 votes

private void checkSpecs(Job job) throws ClassNotFoundException, 
    InterruptedException, IOException {
  JobConf jConf = (JobConf)job.getConfiguration();
  // Check the output specification
  if (jConf.getNumReduceTasks() == 0 ? 
      jConf.getUseNewMapper() : jConf.getUseNewReducer()) {
    org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
      ReflectionUtils.newInstance(job.getOutputFormatClass(),
        job.getConfiguration());
    output.checkOutputSpecs(job);
  } else {
    jConf.getOutputFormat().checkOutputSpecs(jtFs, jConf);
  }
}

Example 2

Source File: JobSubmitter.java From hadoop with Apache License 2.0

5 votes

private int writeSplits(org.apache.hadoop.mapreduce.JobContext job,
    Path jobSubmitDir) throws IOException,
    InterruptedException, ClassNotFoundException {
  JobConf jConf = (JobConf)job.getConfiguration();
  int maps;
  if (jConf.getUseNewMapper()) {
    maps = writeNewSplits(job, jobSubmitDir);
  } else {
    maps = writeOldSplits(jConf, jobSubmitDir);
  }
  return maps;
}

Example 3

Source File: JobSubmitter.java From big-c with Apache License 2.0

5 votes

private void checkSpecs(Job job) throws ClassNotFoundException, 
    InterruptedException, IOException {
  JobConf jConf = (JobConf)job.getConfiguration();
  // Check the output specification
  if (jConf.getNumReduceTasks() == 0 ? 
      jConf.getUseNewMapper() : jConf.getUseNewReducer()) {
    org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
      ReflectionUtils.newInstance(job.getOutputFormatClass(),
        job.getConfiguration());
    output.checkOutputSpecs(job);
  } else {
    jConf.getOutputFormat().checkOutputSpecs(jtFs, jConf);
  }
}

Example 4

Source File: JobSubmitter.java From big-c with Apache License 2.0

5 votes

private int writeSplits(org.apache.hadoop.mapreduce.JobContext job,
    Path jobSubmitDir) throws IOException,
    InterruptedException, ClassNotFoundException {
  JobConf jConf = (JobConf)job.getConfiguration();
  int maps;
  if (jConf.getUseNewMapper()) {
    maps = writeNewSplits(job, jobSubmitDir);
  } else {
    maps = writeOldSplits(jConf, jobSubmitDir);
  }
  return maps;
}

Example 5

Source File: HadoopV2TaskContext.java From ignite with Apache License 2.0

5 votes

/**
 * @param taskInfo Task info.
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId,
    @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        }
        catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        initializePartiallyRawComparator(jobConf);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    }
    finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

Example 6

Source File: MRHelpers.java From incubator-tez with Apache License 2.0

4 votes

/**
 * Helper api to generate splits
 * @param conf Configuration with all necessary information set to generate
 * splits. The following are required at a minimum:
 *
 *   - mapred.mapper.new-api: determine whether mapred.InputFormat or
 *     mapreduce.InputFormat is to be used
 *   - mapred.input.format.class or mapreduce.job.inputformat.class:
 *     determines the InputFormat class to be used
 *
 * In addition to this, all the configs needed by the InputFormat class also
 * have to be set. For example, FileInputFormat needs the input directory
 * paths to be set in the config.
 *
 * @param inputSplitsDir Directory in which the splits file and meta info file
 * will be generated. job.split and job.splitmetainfo files in this directory
 * will be overwritten. Should be a fully-qualified path.
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public static InputSplitInfoDisk generateInputSplits(Configuration conf,
    Path inputSplitsDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  Job job = Job.getInstance(conf);
  JobConf jobConf = new JobConf(conf);
  conf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
  if (jobConf.getUseNewMapper()) {
    LOG.info("Generating new input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeNewSplits(job, inputSplitsDir);
  } else {
    LOG.info("Generating old input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeOldSplits(jobConf, inputSplitsDir);
  }
}

Example 7

Source File: MRInputSplitDistributor.java From incubator-tez with Apache License 2.0

4 votes

@Override
public List<Event> initialize(TezRootInputInitializerContext rootInputContext)
    throws IOException {
  Stopwatch sw = null;
  if (LOG.isDebugEnabled()) {
    sw = new Stopwatch().start();
  }
  MRInputUserPayloadProto userPayloadProto = MRHelpers.parseMRInputPayload(rootInputContext.getUserPayload());
  if (LOG.isDebugEnabled()) {
    sw.stop();
    LOG.debug("Time to parse MRInput payload into prot: "
        + sw.elapsedMillis());  
  }
  Configuration conf = MRHelpers.createConfFromByteString(userPayloadProto
      .getConfigurationBytes());
  JobConf jobConf = new JobConf(conf);
  boolean useNewApi = jobConf.getUseNewMapper();
  sendSerializedEvents = conf.getBoolean(
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD,
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
  LOG.info("Emitting serialized splits: " + sendSerializedEvents);

  this.splitsProto = userPayloadProto.getSplits();
  
  MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
  updatedPayloadBuilder.clearSplits();

  List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
  RootInputUpdatePayloadEvent updatePayloadEvent = new RootInputUpdatePayloadEvent(
      updatedPayloadBuilder.build().toByteArray());

  events.add(updatePayloadEvent);
  int count = 0;

  for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {

    RootInputDataInformationEvent diEvent;

    if (sendSerializedEvents) {
      // Unnecessary array copy, can be avoided by using ByteBuffer instead of
      // a raw array.
      diEvent = new RootInputDataInformationEvent(count++, mrSplit.toByteArray());
    } else {
      if (useNewApi) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
            .getNewSplitDetailsFromEvent(mrSplit, conf);
        diEvent = new RootInputDataInformationEvent(count++, newInputSplit);
      } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
            .getOldSplitDetailsFromEvent(mrSplit, conf);
        diEvent = new RootInputDataInformationEvent(count++, oldInputSplit);
      }
    }
    events.add(diEvent);
  }

  return events;
}

Example 8

Source File: MRInputHelpers.java From tez with Apache License 2.0

4 votes

/**
 * Helper api to generate splits
 * @param conf Configuration with all necessary information set to generate
 * splits. The following are required at a minimum:
 *
 *   - mapred.mapper.new-api: determine whether mapred.InputFormat or
 *     mapreduce.InputFormat is to be used
 *   - mapred.input.format.class or mapreduce.job.inputformat.class:
 *     determines the InputFormat class to be used
 *
 * In addition to this, all the configs needed by the InputFormat class also
 * have to be set. For example, FileInputFormat needs the input directory
 * paths to be set in the config.
 *
 * @param inputSplitsDir Directory in which the splits file and meta info file
 * will be generated. job.split and job.splitmetainfo files in this directory
 * will be overwritten. Should be a fully-qualified path.
 *
 * @return InputSplitInfo containing the split files' information and the
 * number of splits generated to be used to determining parallelism of
 * the map stage.
 *
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
private static InputSplitInfoDisk generateInputSplits(Configuration conf,
                                                      Path inputSplitsDir) throws IOException, InterruptedException,
    ClassNotFoundException {
  Job job = Job.getInstance(conf);
  JobConf jobConf = new JobConf(conf);
  conf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);
  if (jobConf.getUseNewMapper()) {
    LOG.info("Generating new input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeNewSplits(job, inputSplitsDir);
  } else {
    LOG.info("Generating old input splits"
        + ", splitsDir=" + inputSplitsDir.toString());
    return writeOldSplits(jobConf, inputSplitsDir);
  }
}

Example 9

Source File: MRInputSplitDistributor.java From tez with Apache License 2.0

4 votes

@Override
public List<Event> initialize() throws IOException {
  StopWatch sw = new StopWatch().start();
  MRInputUserPayloadProto userPayloadProto = MRInputHelpers
      .parseMRInputPayload(getContext().getInputUserPayload());
  sw.stop();
  if (LOG.isDebugEnabled()) {
    LOG.debug("Time to parse MRInput payload into prot: "
        + sw.now(TimeUnit.MILLISECONDS));
  }
  Configuration conf = TezUtils.createConfFromByteString(userPayloadProto
      .getConfigurationBytes());
  JobConf jobConf = new JobConf(conf);
  boolean useNewApi = jobConf.getUseNewMapper();
  sendSerializedEvents = conf.getBoolean(
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD,
      MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT);
  LOG.info("Emitting serialized splits: " + sendSerializedEvents);

  this.splitsProto = userPayloadProto.getSplits();
  
  MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto.newBuilder(userPayloadProto);
  updatedPayloadBuilder.clearSplits();

  List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1);
  InputUpdatePayloadEvent updatePayloadEvent = InputUpdatePayloadEvent.create(
      updatedPayloadBuilder.build().toByteString().asReadOnlyByteBuffer());

  events.add(updatePayloadEvent);
  int count = 0;

  for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) {

    InputDataInformationEvent diEvent;

    if (sendSerializedEvents) {
      // Unnecessary array copy, can be avoided by using ByteBuffer instead of
      // a raw array.
      diEvent = InputDataInformationEvent.createWithSerializedPayload(count++,
          mrSplit.toByteString().asReadOnlyByteBuffer());
    } else {
      if (useNewApi) {
        org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils
            .getNewSplitDetailsFromEvent(mrSplit, conf);
        diEvent = InputDataInformationEvent.createWithObjectPayload(count++, newInputSplit);
      } else {
        org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils
            .getOldSplitDetailsFromEvent(mrSplit, conf);
        diEvent = InputDataInformationEvent.createWithObjectPayload(count++, oldInputSplit);
      }
    }
    events.add(diEvent);
  }

  return events;
}