Java Code Examples for org.apache.tez.mapreduce.hadoop.MRHelpers#translateMRConfToTez()

The following examples show how to use org.apache.tez.mapreduce.hadoop.MRHelpers#translateMRConfToTez() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MROutput.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Creates the user payload to be set on the OutputDescriptor for MROutput
 */
private UserPayload createUserPayload() {
  // set which api is being used always
  conf.setBoolean(MRJobConfig.NEW_API_REDUCER_CONFIG, useNewApi);
  conf.setBoolean(MRJobConfig.NEW_API_MAPPER_CONFIG, useNewApi);
  if (outputFormatProvided) {
    if (useNewApi) {
      conf.set(MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, outputFormat.getName());
    } else {
      conf.set("mapred.output.format.class", outputFormat.getName());
    }
  }
  MRHelpers.translateMRConfToTez(conf);
  try {
    return TezUtils.createUserPayloadFromConf(conf);
  } catch (IOException e) {
    throw new TezUncheckedException(e);
  }
}
 
Example 2
Source File: MRInput.java    From tez with Apache License 2.0 6 votes vote down vote up
private DataSourceDescriptor createCustomDataSource() throws IOException {
  setupBasicConf(conf);

  MRHelpers.translateMRConfToTez(conf);

  Collection<URI> uris = maybeGetURIsForCredentials();

  UserPayload payload = MRInputHelpersInternal.createMRInputPayload(
      conf, groupSplitsInAM, sortSplitsInAM);

  DataSourceDescriptor ds = DataSourceDescriptor
      .create(InputDescriptor.create(inputClassName).setUserPayload(payload),
          customInitializerDescriptor, null);

  if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
      TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
    ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
  }

  if (uris != null) {
    ds.addURIsForCredentials(uris);
  }
  return ds;
}
 
Example 3
Source File: MRInput.java    From tez with Apache License 2.0 6 votes vote down vote up
private DataSourceDescriptor createGeneratorDataSource() throws IOException {
  setupBasicConf(conf);
  MRHelpers.translateMRConfToTez(conf);
  
  Collection<URI> uris = maybeGetURIsForCredentials();

  UserPayload payload = MRInputHelpersInternal.createMRInputPayload(
      conf, groupSplitsInAM, sortSplitsInAM);

  DataSourceDescriptor ds = DataSourceDescriptor.create(
      InputDescriptor.create(inputClassName).setUserPayload(payload),
      InputInitializerDescriptor.create(MRInputAMSplitGenerator.class.getName()), null);

  if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
      TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
    ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
  }

  if (uris != null) {
    ds.addURIsForCredentials(uris);
  }
  return ds;
}
 
Example 4
Source File: MRInput.java    From tez with Apache License 2.0 5 votes vote down vote up
private DataSourceDescriptor createDistributorDataSource() throws IOException {
  InputSplitInfo inputSplitInfo;
  setupBasicConf(conf);
  try {
    inputSplitInfo = MRInputHelpers.generateInputSplitsToMem(conf, false, true, 0);
  } catch (Exception e) {
    throw new TezUncheckedException(e);
  }
  MRHelpers.translateMRConfToTez(conf);

  UserPayload payload = MRInputHelpersInternal.createMRInputPayload(conf,
      inputSplitInfo.getSplitsProto());
  Credentials credentials = null;
  if (getCredentialsForSourceFilesystem && inputSplitInfo.getCredentials() != null) {
    credentials = inputSplitInfo.getCredentials();
  }
  DataSourceDescriptor ds = DataSourceDescriptor.create(
      InputDescriptor.create(inputClassName).setUserPayload(payload),
      InputInitializerDescriptor.create(MRInputSplitDistributor.class.getName()),
      inputSplitInfo.getNumTasks(), credentials,
      VertexLocationHint.create(inputSplitInfo.getTaskLocationHints()), null);
  if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
      TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
    ds.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
  }

  return ds;
}
 
Example 5
Source File: TestMapProcessor.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 5000)
  public void testMapProcessor() throws Exception {
    String dagName = "mrdag0";
    String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);

    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir,
        "localized-resources").toUri().toString());
    
    Path mapInput = new Path(workDir, "map0");
    
    
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 10);

    InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
        InputDescriptor.create(MRInputLegacy.class.getName())
            .setUserPayload(UserPayload.create(ByteBuffer.wrap(
                MRRuntimeProtos.MRInputUserPayloadProto.newBuilder()
                    .setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf)).build()
                    .toByteArray()))),
        1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex", 
        OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName())
            .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);

    TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
    LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0,
        new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName,
        Collections.singletonList(mapInputSpec), Collections.singletonList(mapOutputSpec),
        sharedExecutor);

    task.initialize();
    task.run();
    task.close();
    sharedExecutor.shutdownNow();

    OutputContext outputContext = task.getOutputContexts().iterator().next();
    TezTaskOutput mapOutputs = new TezTaskOutputFiles(
        jobConf, outputContext.getUniqueIdentifier(),
        outputContext.getDagIdentifier());
    
    
    // TODO NEWTEZ FIXME OutputCommitter verification
//    MRTask mrTask = (MRTask)t.getProcessor();
//    Assert.assertEquals(TezNullOutputCommitter.class.getName(), mrTask
//        .getCommitter().getClass().getName());
//    t.close();

    Path mapOutputFile = getMapOutputFile(jobConf, outputContext);
    LOG.info("mapOutputFile = " + mapOutputFile);
    IFile.Reader reader =
        new IFile.Reader(localFs, mapOutputFile, null, null, null, false, 0, -1);
    LongWritable key = new LongWritable();
    Text value = new Text();
    DataInputBuffer keyBuf = new DataInputBuffer();
    DataInputBuffer valueBuf = new DataInputBuffer();
    long prev = Long.MIN_VALUE;
    while (reader.nextRawKey(keyBuf)) {
      reader.nextRawValue(valueBuf);
      key.readFields(keyBuf);
      value.readFields(valueBuf);
      if (prev != Long.MIN_VALUE) {
        assert(prev <= key.get());
        prev = key.get();
      }
      LOG.info("key = " + key.get() + "; value = " + value);
    }
    reader.close();
  }
 
Example 6
Source File: TestMapProcessor.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 30000)
public void testMapProcessorProgress() throws Exception {
  String dagName = "mrdag0";
  String vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
  JobConf jobConf = new JobConf(defaultConf);
  setUpJobConf(jobConf);

  MRHelpers.translateMRConfToTez(jobConf);
  jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

  jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

  jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, new Path(workDir,
      "localized-resources").toUri().toString());

  Path mapInput = new Path(workDir, "map0");


  MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput, 100000);

  InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
      InputDescriptor.create(MRInputLegacy.class.getName())
          .setUserPayload(UserPayload.create(ByteBuffer.wrap(
              MRRuntimeProtos.MRInputUserPayloadProto.newBuilder()
                  .setConfigurationBytes(TezUtils.createByteStringFromConf
                      (jobConf)).build()
                  .toByteArray()))),
      1);
  OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex",
      OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName())
          .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)), 1);

  TezSharedExecutor sharedExecutor = new TezSharedExecutor(jobConf);
  final LogicalIOProcessorRuntimeTask task = MapUtils.createLogicalTask
      (localFs, workDir, jobConf, 0,
          new Path(workDir, "map0"), new TestUmbilical(), dagName, vertexName,
          Collections.singletonList(mapInputSpec),
          Collections.singletonList(mapOutputSpec), sharedExecutor);

  ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
  Thread monitorProgress = new Thread(new Runnable() {
    @Override
    public void run() {
      float prog = task.getProgress();
      if(prog > 0.0f && prog < 1.0f)
        progressUpdate = prog;
    }
  });

  task.initialize();
  scheduler.scheduleAtFixedRate(monitorProgress, 0, 1,
      TimeUnit.MILLISECONDS);
  task.run();
  Assert.assertTrue("Progress Updates should be captured!",
      progressUpdate > 0.0f && progressUpdate < 1.0f);
  task.close();
  sharedExecutor.shutdownNow();
}