Java Code Examples for org.apache.tez.mapreduce.hadoop.MRHelpers#translateVertexConfToTez()

The following examples show how to use org.apache.tez.mapreduce.hadoop.MRHelpers#translateVertexConfToTez() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MRInput.java From incubator-tez with Apache License 2.0

6 votes

/**
 * Helper API to generate the user payload for the MRInput and
 * MRInputAMSplitGenerator (if used). The InputFormat will be invoked by Tez
 * at DAG runtime to generate the input splits.
 * 
 * @param conf
 *          Configuration for the InputFormat
 * @param inputFormatClassName
 *          Name of the class of the InputFormat
 * @param useNewApi
 *          use new mapreduce API or old mapred API
 * @param groupSplitsInAM
 *          do grouping of splits in the AM. If true then splits generated by
 *          the InputFormat will be grouped in the AM based on available
 *          resources, locality etc. This option may be set to true only when
 *          using MRInputAMSplitGenerator as the initializer class in
 *          {@link Vertex#addInput(String, org.apache.tez.dag.api.InputDescriptor, Class)}
 * @return returns the user payload to be set on the InputDescriptor of  MRInput
 * @throws IOException
 */
public static byte[] createUserPayload(Configuration conf,
    String inputFormatClassName, boolean useNewApi, boolean groupSplitsInAM)
    throws IOException {
  Configuration inputConf = new JobConf(conf);
  String wrappedInputFormatClassName = null;
  String configInputFormatClassName = null;
  if (groupSplitsInAM) {
    wrappedInputFormatClassName = inputFormatClassName;
    configInputFormatClassName = TezGroupedSplitsInputFormat.class.getName();
  } else {
    wrappedInputFormatClassName = null;
    configInputFormatClassName = inputFormatClassName;
  }
  inputConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR,
      configInputFormatClassName);
  inputConf.setBoolean("mapred.mapper.new-api", useNewApi);
  MRHelpers.translateVertexConfToTez(inputConf);
  MRHelpers.doJobClientMagic(inputConf);
  if (groupSplitsInAM) {
    return MRHelpers.createMRInputPayloadWithGrouping(inputConf,
        wrappedInputFormatClassName);
  } else {
    return MRHelpers.createMRInputPayload(inputConf, null);
  }
}

Example 2

Source File: MROutput.java From incubator-tez with Apache License 2.0

5 votes

/**
 * Creates the user payload to be set on the OutputDescriptor for MROutput
 * @param conf Configuration for the OutputFormat
 * @param outputFormatName Name of the class of the OutputFormat
 * @param useNewApi Use new mapreduce API or old mapred API
 * @return
 * @throws IOException
 */
public static byte[] createUserPayload(Configuration conf, 
    String outputFormatName, boolean useNewApi) throws IOException {
  Configuration outputConf = new JobConf(conf);
  outputConf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormatName);
  outputConf.setBoolean("mapred.mapper.new-api", useNewApi);
  MRHelpers.translateVertexConfToTez(outputConf);
  MRHelpers.doJobClientMagic(outputConf);
  return TezUtils.createUserPayloadFromConf(outputConf);
}

Example 3

Source File: TestMapProcessor.java From incubator-tez with Apache License 2.0

4 votes

@Test
  public void testMapProcessor() throws Exception {
    String dagName = "mrdag0";
    String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);

    MRHelpers.translateVertexConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir,
        "localized-resources").toUri().toString());
    
    Path mapInput = new Path(workDir, "map0");
    
    
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput);
    
    InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
        new InputDescriptor(MRInputLegacy.class.getName())
            .setUserPayload(MRHelpers.createMRInputPayload(jobConf, null)),
        1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", 
        new OutputDescriptor(LocalOnFileSorterOutput.class.getName())
            .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);

    LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0,
        new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName,
        Collections.singletonList(mapInputSpec),
        Collections.singletonList(mapOutputSpec));
    
    task.initialize();
    task.run();
    task.close();
    
    TezInputContext inputContext = task.getInputContexts().iterator().next();
    TezTaskOutput mapOutputs = new TezLocalTaskOutputFiles(jobConf, inputContext.getUniqueIdentifier());
    
    
    // TODO NEWTEZ FIXME OutputCommitter verification
//    MRTask mrTask = (MRTask)t.getProcessor();
//    Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask
//        .getCommitter().getClass().getName());
//    t.close();

    Path mapOutputFile = mapOutputs.getInputFile(new InputAttemptIdentifier(0, 0));
    LOG.info("mapOutputFile = " + mapOutputFile);
    IFile.Reader reader =
        new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1);
    LongWritable key = new LongWritable();
    Text value = new Text();
    DataInputBuffer keyBuf = new DataInputBuffer();
    DataInputBuffer valueBuf = new DataInputBuffer();
    long prev = Long.MIN_VALUE;
    while (reader.nextRawKey(keyBuf)) {
      reader.nextRawValue(valueBuf);
      key.readFields(keyBuf);
      value.readFields(valueBuf);
      if (prev != Long.MIN_VALUE) {
        assert(prev <= key.get());
        prev = key.get();
      }
      LOG.info("key = " + key.get() + "; value = " + value);
    }
    reader.close();
  }