org.apache.tez.mapreduce.input.MRInput Java Exaples

Source File: POSimpleTezLoad.java From spork with Apache License 2.0

6 votes

@Override
public void attachInputs(Map<String, LogicalInput> inputs,
        Configuration conf)
        throws ExecException {
    this.conf = conf;
    LogicalInput logInput = inputs.get(inputKey);
    if (logInput == null || !(logInput instanceof MRInput)) {
        throw new ExecException("POSimpleTezLoad only accepts MRInputs");
    }
    input = (MRInput) logInput;
    try {
        reader = input.getReader();
        // Set split index, MergeCoGroup need it. And this input is the only input of the
        // MergeCoGroup vertex.
        if (reader instanceof MRReader) {
            int splitIndex = ((PigSplit)((MRReader)reader).getSplit()).getSplitIndex();
            PigMapReduce.sJobContext.getConfiguration().setInt(PigImplConstants.PIG_SPLIT_INDEX, splitIndex);
        }
    } catch (IOException e) {
        throw new ExecException(e);
    }
}

Source File: WordCount.java From incubator-tez with Apache License 2.0

6 votes

@Override
public void run() throws Exception {
  Preconditions.checkArgument(getInputs().size() == 1);
  Preconditions.checkArgument(getOutputs().size() == 1);
  MRInput input = (MRInput) getInputs().values().iterator().next();
  KeyValueReader kvReader = input.getReader();
  Output output = getOutputs().values().iterator().next();
  KeyValueWriter kvWriter = (KeyValueWriter) output.getWriter();
  while (kvReader.next()) {
    StringTokenizer itr = new StringTokenizer(kvReader.getCurrentValue().toString());
    while (itr.hasMoreTokens()) {
      word.set(itr.nextToken());
      kvWriter.write(word, one);
    }
  }
}

Source File: YARNRunner.java From tez with Apache License 2.0

6 votes

@Private
private static DataSourceDescriptor configureMRInputWithLegacySplitsGenerated(Configuration conf,
                                                                              boolean useLegacyInput) {
  InputDescriptor inputDescriptor;

  try {
    inputDescriptor = InputDescriptor.create(useLegacyInput ? MRInputLegacy.class
        .getName() : MRInput.class.getName())
        .setUserPayload(MRInputHelpersInternal.createMRInputPayload(conf, null));
  } catch (IOException e) {
    throw new TezUncheckedException(e);
  }

  DataSourceDescriptor dsd = DataSourceDescriptor.create(inputDescriptor, null, null);
  if (conf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
      TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
    dsd.getInputDescriptor().setHistoryText(TezUtils.convertToHistoryText(conf));
  }

  return dsd;
}

Source File: WordCount.java From incubator-tez with Apache License 2.0

5 votes

private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {

  Configuration inputConf = new Configuration(tezConf);
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  InputDescriptor id = new InputDescriptor(MRInput.class.getName())
      .setUserPayload(MRInput.createUserPayload(inputConf,
          TextInputFormat.class.getName(), true, true));

  Configuration outputConf = new Configuration(tezConf);
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        outputConf, TextOutputFormat.class.getName(), true));

  Vertex tokenizerVertex = new Vertex("tokenizer", new ProcessorDescriptor(
      TokenProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf));
  tokenizerVertex.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex summerVertex = new Vertex("summer",
      new ProcessorDescriptor(
          SumProcessor.class.getName()), 1, MRHelpers.getReduceResource(tezConf));
  summerVertex.addOutput("MROutput", od, MROutputCommitter.class);

  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  DAG dag = new DAG("WordCount");
  dag.addVertex(tokenizerVertex)
      .addVertex(summerVertex)
      .addEdge(
          new Edge(tokenizerVertex, summerVertex, edgeConf.createDefaultEdgeProperty()));
  return dag;  
}

Source File: TestHistoryParser.java From tez with Apache License 2.0

4 votes

private String runWordCount(String tokenizerProcessor, String summationProcessor,
    String dagName, boolean withTimeline)
    throws Exception {
  //HDFS path
  Path outputLoc = new Path("/tmp/outPath_" + System.currentTimeMillis());

  DataSourceDescriptor dataSource = MRInput.createConfigBuilder(conf,
      TextInputFormat.class, inputLoc.toString()).build();

  DataSinkDescriptor dataSink =
      MROutput.createConfigBuilder(conf, TextOutputFormat.class, outputLoc.toString()).build();

  Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
      tokenizerProcessor)).addDataSource(INPUT, dataSource);

  OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName()).build();

  Vertex summationVertex = Vertex.create(SUMMATION,
      ProcessorDescriptor.create(summationProcessor), 1).addDataSink(OUTPUT, dataSink);

  // Create DAG and add the vertices. Connect the producer and consumer vertices via the edge
  DAG dag = DAG.create(dagName);
  dag.addVertex(tokenizerVertex).addVertex(summationVertex).addEdge(
      Edge.create(tokenizerVertex, summationVertex, edgeConf.createDefaultEdgeProperty()));

  TezClient tezClient = getTezClient(withTimeline);

  // Update Caller Context
  CallerContext callerContext = CallerContext.create("TezExamples", "Tez WordCount Example Job");
  ApplicationId appId = tezClient.getAppMasterApplicationId();
  if (appId == null) {
    appId = ApplicationId.newInstance(1001l, 1);
  }
  callerContext.setCallerIdAndType(appId.toString(), "TezApplication");
  dag.setCallerContext(callerContext);

  DAGClient client = tezClient.submitDAG(dag);
  client.waitForCompletionWithStatusUpdates(Sets.newHashSet(StatusGetOpts.GET_COUNTERS));
  TezDAGID tezDAGID = TezDAGID.getInstance(tezClient.getAppMasterApplicationId(), 1);

  if (tezClient != null) {
    tezClient.stop();
  }
  return tezDAGID.toString();
}

Source File: TopK.java From sequenceiq-samples with Apache License 2.0

4 votes

private DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
        String columnIndex, String top, String numPartitions) throws IOException {

    DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
            TextInputFormat.class, inputPath).build();

    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
            TextOutputFormat.class, outputPath).build();

    Vertex tokenizerVertex = Vertex.create(TOKENIZER,
            ProcessorDescriptor.create(TokenProcessor.class.getName())
                    .setUserPayload(createPayload(Integer.valueOf(columnIndex))))
            .addDataSource(INPUT, dataSource);

    int topK = Integer.valueOf(top);
    Vertex sumVertex = Vertex.create(SUM,
            ProcessorDescriptor.create(SumProcessor.class.getName())
                    .setUserPayload(createPayload(topK)), Integer.valueOf(numPartitions));

    // parallelism must be set to 1 as the writer needs to see the global picture of
    // the data set
    // multiple tasks from the writer will result in multiple list of the top K
    // elements as all task will take the partitioned data's top K element
    Vertex writerVertex = Vertex.create(WRITER,
            ProcessorDescriptor.create(Writer.class.getName())
                    .setUserPayload(createPayload(topK)), 1)
            .addDataSink(OUTPUT, dataSink);

    OrderedPartitionedKVEdgeConfig tokenSumEdge = OrderedPartitionedKVEdgeConfig
            .newBuilder(Text.class.getName(), IntWritable.class.getName(),
                    HashPartitioner.class.getName()).build();

    UnorderedKVEdgeConfig sumWriterEdge = UnorderedKVEdgeConfig
            .newBuilder(IntWritable.class.getName(), Text.class.getName()).build();

    DAG dag = DAG.create("topk");
    return dag
            .addVertex(tokenizerVertex)
            .addVertex(sumVertex)
            .addVertex(writerVertex)
            .addEdge(Edge.create(tokenizerVertex, sumVertex, tokenSumEdge.createDefaultEdgeProperty()))
            .addEdge(Edge.create(sumVertex, writerVertex, sumWriterEdge.createDefaultBroadcastEdgeProperty()));
}

Source File: TestMRInputAMSplitGenerator.java From tez with Apache License 2.0

4 votes

private void testGroupSplitsAndSortSplits(boolean groupSplitsEnabled,
    boolean sortSplitsEnabled) throws Exception {
  Configuration conf = new Configuration();
  String[] splitLengths = new String[50];
  for (int i = 0; i < splitLengths.length; i++) {
    splitLengths[i] = Integer.toString(1000 * (i + 1));
  }
  conf.setStrings(SPLITS_LENGTHS, splitLengths);
  DataSourceDescriptor dataSource = MRInput.createConfigBuilder(
      conf, InputFormatForTest.class).
      groupSplits(groupSplitsEnabled).sortSplits(sortSplitsEnabled).build();
  UserPayload userPayload = dataSource.getInputDescriptor().getUserPayload();

  InputInitializerContext context =
      new TezTestUtils.TezRootInputInitializerContextForTest(userPayload, new Configuration(false));
  MRInputAMSplitGenerator splitGenerator =
      new MRInputAMSplitGenerator(context);

  List<Event> events = splitGenerator.initialize();

  assertTrue(events.get(0) instanceof InputConfigureVertexTasksEvent);
  boolean shuffled = false;
  InputSplit previousIs = null;
  int numRawInputSplits = 0;
  for (int i = 1; i < events.size(); i++) {
    assertTrue(events.get(i) instanceof InputDataInformationEvent);
    InputDataInformationEvent diEvent = (InputDataInformationEvent) (events.get(i));
    assertNull(diEvent.getDeserializedUserPayload());
    assertNotNull(diEvent.getUserPayload());
    MRSplitProto eventProto = MRSplitProto.parseFrom(ByteString.copyFrom(
        diEvent.getUserPayload()));
    InputSplit is = MRInputUtils.getNewSplitDetailsFromEvent(
        eventProto, new Configuration());
    if (groupSplitsEnabled) {
      numRawInputSplits += ((TezGroupedSplit)is).getGroupedSplits().size();
      for (InputSplit inputSplit : ((TezGroupedSplit)is).getGroupedSplits()) {
        assertTrue(inputSplit instanceof InputSplitForTest);
      }
      assertTrue(((TezGroupedSplit)is).getGroupedSplits().get(0)
          instanceof InputSplitForTest);
    } else {
      numRawInputSplits++;
      assertTrue(is instanceof InputSplitForTest);
    }
    // The splits in the list returned from InputFormat has ascending
    // size in order.
    // If sortSplitsEnabled is true, MRInputAMSplitGenerator will sort the
    // splits in descending order.
    // If sortSplitsEnabled is false, MRInputAMSplitGenerator will shuffle
    // the splits.
    if (previousIs != null) {
      if (sortSplitsEnabled) {
        assertTrue(is.getLength() <= previousIs.getLength());
      } else {
        shuffled |= (is.getLength() > previousIs.getLength());
      }
    }
    previousIs = is;
  }
  assertEquals(splitLengths.length, numRawInputSplits);
  if (!sortSplitsEnabled) {
    assertTrue(shuffled);
  }
}

Source File: MRInputBase.java From tez with Apache License 2.0

4 votes

public List<Event> initialize() throws IOException {
  getContext().requestInitialMemory(0l, null); // mandatory call
  MRRuntimeProtos.MRInputUserPayloadProto mrUserPayload =
      MRInputHelpers.parseMRInputPayload(getContext().getUserPayload());
  boolean isGrouped = mrUserPayload.getGroupingEnabled();
  Preconditions.checkArgument(mrUserPayload.hasSplits() == false,
      "Split information not expected in " + this.getClass().getName());

  Configuration conf = new JobConf(getContext().getContainerConfiguration());
  TezUtils.addToConfFromByteString(conf, mrUserPayload.getConfigurationBytes());
  this.jobConf = new JobConf(conf);
  useNewApi = this.jobConf.getUseNewMapper();
  if (isGrouped) {
    if (useNewApi) {
      jobConf.set(MRJobConfig.INPUT_FORMAT_CLASS_ATTR,
          org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat.class.getName());
    } else {
      jobConf.set("mapred.input.format.class",
          org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat.class.getName());
    }
  }


  // Add tokens to the jobConf - in case they are accessed within the RR / IF
  jobConf.getCredentials().mergeAll(UserGroupInformation.getCurrentUser().getCredentials());

  TaskAttemptID taskAttemptId = new TaskAttemptID(
      new TaskID(
          Long.toString(getContext().getApplicationId().getClusterTimestamp()),
          getContext().getApplicationId().getId(), TaskType.MAP,
          getContext().getTaskIndex()),
      getContext().getTaskAttemptNumber());

  jobConf.set(MRJobConfig.TASK_ATTEMPT_ID,
      taskAttemptId.toString());
  jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID,
      getContext().getDAGAttemptNumber());
  jobConf.setInt(MRInput.TEZ_MAPREDUCE_DAG_INDEX, getContext().getDagIdentifier());
  jobConf.setInt(MRInput.TEZ_MAPREDUCE_VERTEX_INDEX, getContext().getTaskVertexIndex());
  jobConf.setInt(MRInput.TEZ_MAPREDUCE_TASK_INDEX, getContext().getTaskIndex());
  jobConf.setInt(MRInput.TEZ_MAPREDUCE_TASK_ATTEMPT_INDEX, getContext().getTaskAttemptNumber());
  jobConf.set(MRInput.TEZ_MAPREDUCE_DAG_NAME, getContext().getDAGName());
  jobConf.set(MRInput.TEZ_MAPREDUCE_VERTEX_NAME, getContext().getTaskVertexName());
  jobConf.setInt(MRInput.TEZ_MAPREDUCE_INPUT_INDEX, getContext().getInputIndex());
  jobConf.set(MRInput.TEZ_MAPREDUCE_INPUT_NAME, getContext().getSourceVertexName());
  jobConf.set(MRInput.TEZ_MAPREDUCE_APPLICATION_ID, getContext().getApplicationId().toString());
  jobConf.set(MRInput.TEZ_MAPREDUCE_UNIQUE_IDENTIFIER, getContext().getUniqueIdentifier());
  jobConf.setInt(MRInput.TEZ_MAPREDUCE_DAG_ATTEMPT_NUMBER, getContext().getDAGAttemptNumber());

  TezDAGID tezDAGID = TezDAGID.getInstance(getContext().getApplicationId(), getContext().getDagIdentifier());
  TezVertexID tezVertexID = TezVertexID.getInstance(tezDAGID, getContext().getTaskVertexIndex());
  TezTaskID tezTaskID = TezTaskID.getInstance(tezVertexID, getContext().getTaskIndex());
  TezTaskAttemptID tezTaskAttemptID = TezTaskAttemptID.getInstance(tezTaskID, getContext().getTaskAttemptNumber());
  jobConf.set(MRInput.TEZ_MAPREDUCE_DAG_ID, tezDAGID.toString());
  jobConf.set(MRInput.TEZ_MAPREDUCE_VERTEX_ID, tezVertexID.toString());
  jobConf.set(MRInput.TEZ_MAPREDUCE_TASK_ID, tezTaskID.toString());
  jobConf.set(MRInput.TEZ_MAPREDUCE_TASK_ATTEMPT_ID, tezTaskAttemptID.toString());

  this.inputRecordCounter = getContext().getCounters().findCounter(
      TaskCounter.INPUT_RECORDS_PROCESSED);


  return null;
}

Source File: MapProcessor.java From tez with Apache License 2.0

4 votes

private NewRecordReader(MRInput in) throws IOException {
  this.in = in;
  this.reader = in.getReader();
}

Source File: UnionExample.java From tez with Apache License 2.0

4 votes

private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {
  DAG dag = DAG.create("UnionExample");
  
  int numMaps = -1;
  Configuration inputConf = new Configuration(tezConf);
  inputConf.setBoolean("mapred.mapper.new-api", false);
  inputConf.set("mapred.input.format.class", TextInputFormat.class.getName());
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  MRInput.MRInputConfigBuilder configurer = MRInput.createConfigBuilder(inputConf, null);
  DataSourceDescriptor dataSource = configurer.generateSplitsInAM(false).build();

  Vertex mapVertex1 = Vertex.create("map1", ProcessorDescriptor.create(
      TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);

  Vertex mapVertex2 = Vertex.create("map2", ProcessorDescriptor.create(
      TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);

  Vertex mapVertex3 = Vertex.create("map3", ProcessorDescriptor.create(
      TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);

  Vertex checkerVertex = Vertex.create("checker", ProcessorDescriptor.create(
      UnionProcessor.class.getName()), 1);

  Configuration outputConf = new Configuration(tezConf);
  outputConf.setBoolean("mapred.reducer.new-api", false);
  outputConf.set("mapred.output.format.class", TextOutputFormat.class.getName());
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  DataSinkDescriptor od = MROutput.createConfigBuilder(outputConf, null).build();
  checkerVertex.addDataSink("union", od);
  

  Configuration allPartsConf = new Configuration(tezConf);
  DataSinkDescriptor od2 = MROutput.createConfigBuilder(allPartsConf,
      TextOutputFormat.class, outputPath + "-all-parts").build();
  checkerVertex.addDataSink("all-parts", od2);

  Configuration partsConf = new Configuration(tezConf);    
  DataSinkDescriptor od1 = MROutput.createConfigBuilder(partsConf,
      TextOutputFormat.class, outputPath + "-parts").build();
  VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
  unionVertex.addDataSink("parts", od1);

  OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName()).build();

  dag.addVertex(mapVertex1)
      .addVertex(mapVertex2)
      .addVertex(mapVertex3)
      .addVertex(checkerVertex)
      .addEdge(
          Edge.create(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty()))
      .addEdge(
          GroupInputEdge.create(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(),
              InputDescriptor.create(
                  ConcatenatedMergedKeyValuesInput.class.getName())));
  return dag;  
}

Source File: FilterByWordInputProcessor.java From tez with Apache License 2.0

4 votes

@Override
public void run(Map<String, LogicalInput> _inputs,
    Map<String, LogicalOutput> _outputs) throws Exception {
  this.inputs = _inputs;
  this.outputs = _outputs;
  this.progressHelper = new ProgressHelper(this.inputs, getContext(),this.getClass().getSimpleName());
  if (_inputs.size() != 1) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
  }

  if (_outputs.size() != 1) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
  }
  
  for (LogicalInput input : _inputs.values()) {
    input.start();
  }
  for (LogicalOutput output : _outputs.values()) {
    output.start();
  }

  LogicalInput li = _inputs.values().iterator().next();
  if (! (li instanceof MRInput)) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
  }

  LogicalOutput lo = _outputs.values().iterator().next();
  if (! (lo instanceof UnorderedKVOutput)) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
  }
  progressHelper.scheduleProgressTaskService(0, 100);
  MRInputLegacy mrInput = (MRInputLegacy) li;
  mrInput.init();
  UnorderedKVOutput kvOutput = (UnorderedKVOutput) lo;

  Configuration updatedConf = mrInput.getConfigUpdates();
  Text srcFile = new Text();
  srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
  if (updatedConf != null) {
    String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
    if (fileName != null) {
      LOG.info("Processing file: " + fileName);
      srcFile.set(fileName);
    }
  }

  KeyValueReader kvReader = mrInput.getReader();
  KeyValueWriter kvWriter = kvOutput.getWriter();

  while (kvReader.next()) {
    Object key = kvReader.getCurrentKey();
    Object val = kvReader.getCurrentValue();

    Text valText = (Text) val;
    String readVal = valText.toString();
    if (readVal.contains(filterWord)) {
      LongWritable lineNum = (LongWritable) key;
      TextLongPair outVal = new TextLongPair(srcFile, lineNum);
      kvWriter.write(valText, outVal);
    }
  }
}

Source File: OrderedWordCount.java From tez with Apache License 2.0

4 votes

public static DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
    int numPartitions, boolean disableSplitGrouping, boolean isGenerateSplitInClient, String dagName) throws IOException {

  DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
      TextInputFormat.class, inputPath).groupSplits(!disableSplitGrouping)
        .generateSplitsInAM(!isGenerateSplitInClient).build();

  DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
      TextOutputFormat.class, outputPath).build();

  Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
      TokenProcessor.class.getName()));
  tokenizerVertex.addDataSource(INPUT, dataSource);

  // Use Text key and IntWritable value to bring counts for each word in the same partition
  // The setFromConfiguration call is optional and allows overriding the config options with
  // command line parameters.
  OrderedPartitionedKVEdgeConfig summationEdgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName())
      .setFromConfiguration(tezConf)
      .build();

  // This vertex will be reading intermediate data via an input edge and writing intermediate data
  // via an output edge.
  Vertex summationVertex = Vertex.create(SUMMATION, ProcessorDescriptor.create(
      SumProcessor.class.getName()), numPartitions);
  
  // Use IntWritable key and Text value to bring all words with the same count in the same 
  // partition. The data will be ordered by count and words grouped by count. The
  // setFromConfiguration call is optional and allows overriding the config options with
  // command line parameters.
  OrderedPartitionedKVEdgeConfig sorterEdgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(IntWritable.class.getName(), Text.class.getName(),
          HashPartitioner.class.getName())
      .setFromConfiguration(tezConf)
      .build();

  // Use 1 task to bring all the data in one place for global sorted order. Essentially the number
  // of partitions is 1. So the NoOpSorter can be used to produce the globally ordered output
  Vertex sorterVertex = Vertex.create(SORTER, ProcessorDescriptor.create(
      NoOpSorter.class.getName()), 1);
  sorterVertex.addDataSink(OUTPUT, dataSink);

  // No need to add jar containing this class as assumed to be part of the tez jars.
  
  DAG dag = DAG.create(dagName);
  dag.addVertex(tokenizerVertex)
      .addVertex(summationVertex)
      .addVertex(sorterVertex)
      .addEdge(
          Edge.create(tokenizerVertex, summationVertex,
              summationEdgeConf.createDefaultEdgeProperty()))
      .addEdge(
          Edge.create(summationVertex, sorterVertex, sorterEdgeConf.createDefaultEdgeProperty()));
  return dag;  
}

Source File: JoinValidate.java From tez with Apache License 2.0

4 votes

@VisibleForTesting
DAG createDag(TezConfiguration tezConf, Path lhs, Path rhs, int numPartitions)
    throws IOException {
  DAG dag = DAG.create(getDagName());
  if (getDefaultExecutionContext() != null) {
    dag.setExecutionContext(getDefaultExecutionContext());
  }

  // Configuration for intermediate output - shared by Vertex1 and Vertex2
  // This should only be setting selective keys from the underlying conf. Fix after there's a
  // better mechanism to configure the IOs. The setFromConfiguration call is optional and allows
  // overriding the config options with command line parameters.
  OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), NullWritable.class.getName(),
          HashPartitioner.class.getName())
      .setFromConfiguration(tezConf)
      .build();

  Vertex lhsVertex = Vertex.create(LHS_INPUT_NAME, ProcessorDescriptor.create(
      ForwardingProcessor.class.getName())).addDataSource("lhs",
      MRInput
          .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
              lhs.toUri().toString()).groupSplits(!isDisableSplitGrouping())
              .generateSplitsInAM(!isGenerateSplitInClient()).build());
  setVertexExecutionContext(lhsVertex, getLhsExecutionContext());

  Vertex rhsVertex = Vertex.create(RHS_INPUT_NAME, ProcessorDescriptor.create(
      ForwardingProcessor.class.getName())).addDataSource("rhs",
      MRInput
          .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
              rhs.toUri().toString()).groupSplits(!isDisableSplitGrouping())
              .generateSplitsInAM(!isGenerateSplitInClient()).build());
  setVertexExecutionContext(rhsVertex, getRhsExecutionContext());

  Vertex joinValidateVertex = Vertex.create("joinvalidate", ProcessorDescriptor.create(
      JoinValidateProcessor.class.getName()), numPartitions);
  setVertexExecutionContext(joinValidateVertex, getValidateExecutionContext());

  Edge e1 = Edge.create(lhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
  Edge e2 = Edge.create(rhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());

  dag.addVertex(lhsVertex).addVertex(rhsVertex).addVertex(joinValidateVertex).addEdge(e1)
      .addEdge(e2);
  return dag;
}

Source File: FilterByWordInputProcessor.java From incubator-tez with Apache License 2.0

4 votes

@Override
public void run(Map<String, LogicalInput> inputs,
    Map<String, LogicalOutput> outputs) throws Exception {
  
  if (inputs.size() != 1) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single input");
  }

  if (outputs.size() != 1) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with a single output");
  }
  
  for (LogicalInput input : inputs.values()) {
    input.start();
  }
  for (LogicalOutput output : outputs.values()) {
    output.start();
  }

  LogicalInput li = inputs.values().iterator().next();
  if (! (li instanceof MRInput)) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with MRInput");
  }

  LogicalOutput lo = outputs.values().iterator().next();
  if (! (lo instanceof OnFileUnorderedKVOutput)) {
    throw new IllegalStateException("FilterByWordInputProcessor processor can only work with OnFileUnorderedKVOutput");
  }

  
  
  
  MRInputLegacy mrInput = (MRInputLegacy) li;
  mrInput.init();
  OnFileUnorderedKVOutput kvOutput = (OnFileUnorderedKVOutput) lo;

  Configuration updatedConf = mrInput.getConfigUpdates();
  Text srcFile = new Text();
  srcFile.set("UNKNOWN_FILENAME_IN_PROCESSOR");
  if (updatedConf != null) {
    String fileName = updatedConf.get(MRJobConfig.MAP_INPUT_FILE);
    if (fileName != null) {
      LOG.info("Processing file: " + fileName);
      srcFile.set(fileName);
    }
  }

  KeyValueReader kvReader = mrInput.getReader();
  KeyValueWriter kvWriter = kvOutput.getWriter();

  while (kvReader.next()) {
    Object key = kvReader.getCurrentKey();
    Object val = kvReader.getCurrentValue();

    Text valText = (Text) val;
    String readVal = valText.toString();
    if (readVal.contains(filterWord)) {
      LongWritable lineNum = (LongWritable) key;
      TextLongPair outVal = new TextLongPair(srcFile, lineNum);
      kvWriter.write(valText, outVal);
    }
  }
}

Source File: IntersectValidate.java From incubator-tez with Apache License 2.0

4 votes

private DAG createDag(TezConfiguration tezConf, Path lhs, Path rhs, int numPartitions)
    throws IOException {
  DAG dag = new DAG("IntersectValidate");

  // Configuration for src1
  Configuration lhsInputConf = new Configuration(tezConf);
  lhsInputConf.set(FileInputFormat.INPUT_DIR, lhs.toUri().toString());
  byte[] streamInputPayload = MRInput.createUserPayload(lhsInputConf,
      TextInputFormat.class.getName(), true, false);

  // Configuration for src2
  Configuration rhsInputConf = new Configuration(tezConf);
  rhsInputConf.set(FileInputFormat.INPUT_DIR, rhs.toUri().toString());
  byte[] hashInputPayload = MRInput.createUserPayload(rhsInputConf,
      TextInputFormat.class.getName(), true, false);

  // Configuration for intermediate output - shared by Vertex1 and Vertex2
  // This should only be setting selective keys from the underlying conf. Fix after there's a
  // better mechanism to configure the IOs.
  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), NullWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  // Change the way resources are setup - no MRHelpers
  Vertex lhsVertex = new Vertex(LHS_INPUT_NAME, new ProcessorDescriptor(
      ForwardingProcessor.class.getName()), -1,
      MRHelpers.getMapResource(tezConf)).addInput("lhs", new InputDescriptor(
      MRInput.class.getName()).setUserPayload(streamInputPayload),
      MRInputAMSplitGenerator.class);

  Vertex rhsVertex = new Vertex(RHS_INPUT_NAME, new ProcessorDescriptor(
      ForwardingProcessor.class.getName()), -1,
      MRHelpers.getMapResource(tezConf)).addInput("rhs", new InputDescriptor(
      MRInput.class.getName()).setUserPayload(hashInputPayload),
      MRInputAMSplitGenerator.class);

  Vertex intersectValidateVertex = new Vertex("intersectvalidate",
      new ProcessorDescriptor(IntersectValidateProcessor.class.getName()),
      numPartitions, MRHelpers.getReduceResource(tezConf));

  Edge e1 = new Edge(lhsVertex, intersectValidateVertex, edgeConf.createDefaultEdgeProperty());
  Edge e2 = new Edge(rhsVertex, intersectValidateVertex, edgeConf.createDefaultEdgeProperty());

  dag.addVertex(lhsVertex).addVertex(rhsVertex).addVertex(intersectValidateVertex).addEdge(e1)
      .addEdge(e2);
  return dag;
}

Source File: IntersectExample.java From incubator-tez with Apache License 2.0

4 votes

private DAG createDag(TezConfiguration tezConf, Path streamPath, Path hashPath, Path outPath,
    int numPartitions) throws IOException {
  DAG dag = new DAG("IntersectExample");

  // Configuration for src1
  Configuration streamInputConf = new Configuration(tezConf);
  streamInputConf.set(FileInputFormat.INPUT_DIR, streamPath.toUri().toString());
  byte[] streamInputPayload = MRInput.createUserPayload(streamInputConf,
      TextInputFormat.class.getName(), true, false);

  // Configuration for src2
  Configuration hashInputConf = new Configuration(tezConf);
  hashInputConf.set(FileInputFormat.INPUT_DIR, hashPath.toUri().toString());
  byte[] hashInputPayload = MRInput.createUserPayload(hashInputConf,
      TextInputFormat.class.getName(), true, false);

  // Configuration for intermediate output - shared by Vertex1 and Vertex2
  // This should only be setting selective keys from the underlying conf. Fix after there's a
  // better mechanism to configure the IOs.

  UnorderedPartitionedKVEdgeConfigurer edgeConf =
      UnorderedPartitionedKVEdgeConfigurer
          .newBuilder(Text.class.getName(), NullWritable.class.getName(),
              HashPartitioner.class.getName(), null).build();

  Configuration finalOutputConf = new Configuration(tezConf);
  finalOutputConf.set(FileOutputFormat.OUTDIR, outPath.toUri().toString());
  byte[] finalOutputPayload = MROutput.createUserPayload(finalOutputConf,
      TextOutputFormat.class.getName(), true);

  // Change the way resources are setup - no MRHelpers
  Vertex streamFileVertex = new Vertex("partitioner1",
      new ProcessorDescriptor(ForwardingProcessor.class.getName()), -1,
      MRHelpers.getMapResource(tezConf)).addInput("streamfile",
      new InputDescriptor(MRInput.class.getName())
          .setUserPayload(streamInputPayload), MRInputAMSplitGenerator.class);

  Vertex hashFileVertex = new Vertex("partitioner2", new ProcessorDescriptor(
      ForwardingProcessor.class.getName()), -1,
      MRHelpers.getMapResource(tezConf)).addInput("hashfile",
      new InputDescriptor(MRInput.class.getName())
          .setUserPayload(hashInputPayload), MRInputAMSplitGenerator.class);

  Vertex intersectVertex = new Vertex("intersect", new ProcessorDescriptor(
      IntersectProcessor.class.getName()), numPartitions,
      MRHelpers.getReduceResource(tezConf)).addOutput("finalOutput",
      new OutputDescriptor(MROutput.class.getName())
          .setUserPayload(finalOutputPayload), MROutputCommitter.class);

  Edge e1 = new Edge(streamFileVertex, intersectVertex, edgeConf.createDefaultEdgeProperty());

  Edge e2 = new Edge(hashFileVertex, intersectVertex, edgeConf.createDefaultEdgeProperty());

  dag.addVertex(streamFileVertex).addVertex(hashFileVertex).addVertex(intersectVertex)
      .addEdge(e1).addEdge(e2);
  return dag;
}

Source File: UnionExample.java From incubator-tez with Apache License 2.0

4 votes

private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {
  DAG dag = new DAG("UnionExample");
  
  int numMaps = -1;
  Configuration inputConf = new Configuration(tezConf);
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  InputDescriptor id = new InputDescriptor(MRInput.class.getName())
      .setUserPayload(MRInput.createUserPayload(inputConf,
          TextInputFormat.class.getName(), true, true));

  Vertex mapVertex1 = new Vertex("map1", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex1.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex mapVertex2 = new Vertex("map2", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex2.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex mapVertex3 = new Vertex("map3", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex3.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex checkerVertex = new Vertex("checker",
      new ProcessorDescriptor(
          UnionProcessor.class.getName()),
              1, MRHelpers.getReduceResource(tezConf));

  Configuration outputConf = new Configuration(tezConf);
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        outputConf, TextOutputFormat.class.getName(), true));
  checkerVertex.addOutput("union", od, MROutputCommitter.class);

  Configuration allPartsConf = new Configuration(tezConf);
  allPartsConf.set(FileOutputFormat.OUTDIR, outputPath+"-all-parts");
  OutputDescriptor od2 = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        allPartsConf, TextOutputFormat.class.getName(), true));
  checkerVertex.addOutput("all-parts", od2, MROutputCommitter.class);

  Configuration partsConf = new Configuration(tezConf);
  partsConf.set(FileOutputFormat.OUTDIR, outputPath+"-parts");
  
  VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
  OutputDescriptor od1 = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        partsConf, TextOutputFormat.class.getName(), true));
  unionVertex.addOutput("parts", od1, MROutputCommitter.class);

  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  dag.addVertex(mapVertex1)
      .addVertex(mapVertex2)
      .addVertex(mapVertex3)
      .addVertex(checkerVertex)
      .addEdge(
          new Edge(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty()))
      .addEdge(
          new GroupInputEdge(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(),
              new InputDescriptor(
                  ConcatenatedMergedKeyValuesInput.class.getName())));
  return dag;  
}

Source File: MapProcessor.java From incubator-tez with Apache License 2.0

4 votes

private NewRecordReader(MRInput in) throws IOException {
  this.in = in;
  this.reader = in.getReader();
}

Source File: WordCount.java From tez with Apache License 2.0

4 votes

private DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
    int numPartitions) throws IOException {

  // Create the descriptor that describes the input data to Tez. Using MRInput to read text 
  // data from the given input path. The TextInputFormat is used to read the text data.
  DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
      TextInputFormat.class, inputPath).groupSplits(!isDisableSplitGrouping())
        .generateSplitsInAM(!isGenerateSplitInClient()).build();

  // Create a descriptor that describes the output data to Tez. Using MROoutput to write text
  // data to the given output path. The TextOutputFormat is used to write the text data.
  DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
      TextOutputFormat.class, outputPath).build();

  // Create a vertex that reads the data from the data source and tokenizes it using the 
  // TokenProcessor. The number of tasks that will do the work for this vertex will be decided 
  // using the information provided by the data source descriptor.
  Vertex tokenizerVertex = Vertex.create(TOKENIZER, ProcessorDescriptor.create(
      TokenProcessor.class.getName())).addDataSource(INPUT, dataSource);

  // Create the edge that represents the movement and semantics of data between the producer 
  // Tokenizer vertex and the consumer Summation vertex. In order to perform the summation in 
  // parallel the tokenized data will be partitioned by word such that a given word goes to the 
  // same partition. The counts for the words should be grouped together per word. To achieve this
  // we can use an edge that contains an input/output pair that handles partitioning and grouping 
  // of key value data. We use the helper OrderedPartitionedKVEdgeConfig to create such an
  // edge. Internally, it sets up matching Tez inputs and outputs that can perform this logic.
  // We specify the key, value and partitioner type. Here the key type is Text (for word), the 
  // value type is IntWritable (for count) and we using a hash based partitioner. This is a helper
  // object. The edge can be configured by configuring the input, output etc individually without
  // using this helper. The setFromConfiguration call is optional and allows overriding the config
  // options with command line parameters.
  OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName())
      .setFromConfiguration(tezConf)
      .build();

  // Create a vertex that reads the tokenized data and calculates the sum using the SumProcessor.
  // The number of tasks that do the work of this vertex depends on the number of partitions used 
  // to distribute the sum processing. In this case, its been made configurable via the 
  // numPartitions parameter.
  Vertex summationVertex = Vertex.create(SUMMATION,
      ProcessorDescriptor.create(SumProcessor.class.getName()), numPartitions)
      .addDataSink(OUTPUT, dataSink);

  // No need to add jar containing this class as assumed to be part of the Tez jars. Otherwise 
  // we would have to add the jars for this code as local files to the vertices.
  
  // Create DAG and add the vertices. Connect the producer and consumer vertices via the edge
  DAG dag = DAG.create("WordCount");
  dag.addVertex(tokenizerVertex)
      .addVertex(summationVertex)
      .addEdge(
          Edge.create(tokenizerVertex, summationVertex, edgeConf.createDefaultEdgeProperty()));
  return dag;  
}

Source File: CartesianProduct.java From tez with Apache License 2.0

4 votes

private DAG createDAG(TezConfiguration tezConf, String inputPath1, String inputPath2,
                      String inputPath3, String outputPath, boolean isPartitioned)
  throws IOException {
  Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  // turn off groupSplit so that each input file incurs one task
  v1.addDataSource(INPUT,
    MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath1)
           .groupSplits(false).build());
  Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v2.addDataSource(INPUT,
    MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath2)
            .groupSplits(false).build());
  Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v3.addDataSource(INPUT,
    MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath3)
      .groupSplits(false).build());
  CartesianProductConfig cartesianProductConfig;
  if (isPartitioned) {
    Map<String, Integer> vertexPartitionMap = new HashMap<>();
    for (String vertex : cpSources) {
      vertexPartitionMap.put(vertex, numPartition);
    }
    cartesianProductConfig = new CartesianProductConfig(vertexPartitionMap);
  } else {
    cartesianProductConfig = new CartesianProductConfig(Arrays.asList(cpSources));
  }
  UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
  Vertex v4 = Vertex.create(VERTEX4, ProcessorDescriptor.create(JoinProcessor.class.getName()));
  v4.addDataSink(OUTPUT,
    MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, outputPath)
            .build());
  v4.setVertexManagerPlugin(
    VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
                                 .setUserPayload(userPayload));

  EdgeManagerPluginDescriptor cpEdgeManager =
    EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
  cpEdgeManager.setUserPayload(userPayload);
  EdgeProperty cpEdgeProperty;
  if (isPartitioned) {
    UnorderedPartitionedKVEdgeConfig cpEdgeConf =
      UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(),
        IntWritable.class.getName(), CustomPartitioner.class.getName()).build();
    cpEdgeProperty = cpEdgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
  } else {
    UnorderedKVEdgeConfig edgeConf =
      UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
    cpEdgeProperty = edgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
  }

  EdgeProperty broadcastEdgeProperty;
  UnorderedKVEdgeConfig broadcastEdgeConf =
    UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
  broadcastEdgeProperty = broadcastEdgeConf.createDefaultBroadcastEdgeProperty();

  return DAG.create("CartesianProduct")
    .addVertex(v1).addVertex(v2).addVertex(v3).addVertex(v4)
    .addEdge(Edge.create(v1, v4, cpEdgeProperty))
    .addEdge(Edge.create(v2, v4, cpEdgeProperty))
    .addEdge(Edge.create(v3, v4, broadcastEdgeProperty));
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * @see {@link InputContext#getSourceVertexName}
 * @param conf configuration instance
 * @return source name
 */
@Public
public static String getInputName(Configuration conf) {
  return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_INPUT_NAME);
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * @see {@link InputContext#getDagIdentifier}
 * @param conf configuration instance
 * @return dag index
 */
@Public
public static int getDagIndex(Configuration conf) {
  return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_DAG_INDEX);
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * Returns string representation of full DAG identifier
 * @param conf configuration instance
 * @return dag identifier
 */
@Public
public static String getDagIdString(Configuration conf) {
  return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_DAG_ID);
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * * @see {@link InputContext#getTaskVertexIndex}
 * @param conf configuration instance
 * @return vertex index
 */
@Public
public static int getVertexIndex(Configuration conf) {
  return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_VERTEX_INDEX);
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * @see {@link InputContext#getDAGAttemptNumber}
 * @param conf configuration instance
 * @return attempt number
 */
@Public
public static int getDagAttemptNumber(Configuration conf) {
  return getIntProperty(conf, MRInput.TEZ_MAPREDUCE_DAG_ATTEMPT_NUMBER);
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * @see {@link InputContext#getUniqueIdentifier}
 * @param conf configuration instance
 * @return unique identifier for the input
 */
@Public
public static String getUniqueIdentifier(Configuration conf) {
  return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_UNIQUE_IDENTIFIER);
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * @see {@link InputContext#getApplicationId}
 * @param conf configuration instance
 * @return applicationId as a string
 */
@Public
public static String getApplicationIdString(Configuration conf) {
  return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_APPLICATION_ID);
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * Returns string representation of full vertex identifier
 * @param conf configuration instance
 * @return vertex identifier
 */
@Public
public static String getVertexIdString(Configuration conf) {
  return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_VERTEX_ID);
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * @see {@link InputContext#getTaskVertexName}
 * @param conf configuration instance
 * @return vertex name
 */
@Public
public static String getVertexName(Configuration conf) {
  return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_VERTEX_NAME);
}

Source File: MRInputHelpers.java From tez with Apache License 2.0

2 votes

/**
 * @see {@link InputContext#getDAGName}
 * @param conf configuration instance
 * @return dag name
 */
@Public
public static String getDagName(Configuration conf) {
  return getStringProperty(conf, MRInput.TEZ_MAPREDUCE_DAG_NAME);
}

org.apache.tez.mapreduce.input.MRInput Java Examples