Java Code Examples for org.apache.tez.dag.api.Vertex#addOutput()

The following examples show how to use org.apache.tez.dag.api.Vertex#addOutput() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WordCount.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {

  Configuration inputConf = new Configuration(tezConf);
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  InputDescriptor id = new InputDescriptor(MRInput.class.getName())
      .setUserPayload(MRInput.createUserPayload(inputConf,
          TextInputFormat.class.getName(), true, true));

  Configuration outputConf = new Configuration(tezConf);
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        outputConf, TextOutputFormat.class.getName(), true));

  Vertex tokenizerVertex = new Vertex("tokenizer", new ProcessorDescriptor(
      TokenProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf));
  tokenizerVertex.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex summerVertex = new Vertex("summer",
      new ProcessorDescriptor(
          SumProcessor.class.getName()), 1, MRHelpers.getReduceResource(tezConf));
  summerVertex.addOutput("MROutput", od, MROutputCommitter.class);

  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  DAG dag = new DAG("WordCount");
  dag.addVertex(tokenizerVertex)
      .addVertex(summerVertex)
      .addEdge(
          new Edge(tokenizerVertex, summerVertex, edgeConf.createDefaultEdgeProperty()));
  return dag;  
}
 
Example 2
Source File: IntersectDataGen.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
    Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
    throws IOException {

  long largeOutSizePerTask = largeOutSize / numTasks;
  long smallOutSizePerTask = smallOutSize / numTasks;

  DAG dag = new DAG("IntersectDataGen");

  byte[] streamOutputPayload = createPayloadForOutput(largeOutPath, tezConf);
  byte[] hashOutputPayload = createPayloadForOutput(smallOutPath, tezConf);
  byte[] expectedOutputPayload = createPayloadForOutput(expectedOutputPath, tezConf);

  Vertex genDataVertex = new Vertex("datagen", new ProcessorDescriptor(
      GenDataProcessor.class.getName()).setUserPayload(GenDataProcessor.createConfiguration(
      largeOutSizePerTask, smallOutSizePerTask)), numTasks, MRHelpers.getMapResource(tezConf));
  genDataVertex.addOutput(STREAM_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(streamOutputPayload),
      MROutputCommitter.class);
  genDataVertex.addOutput(HASH_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(hashOutputPayload),
      MROutputCommitter.class);
  genDataVertex.addOutput(EXPECTED_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(expectedOutputPayload),
      MROutputCommitter.class);

  dag.addVertex(genDataVertex);

  return dag;
}
 
Example 3
Source File: UnionExample.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {
  DAG dag = new DAG("UnionExample");
  
  int numMaps = -1;
  Configuration inputConf = new Configuration(tezConf);
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  InputDescriptor id = new InputDescriptor(MRInput.class.getName())
      .setUserPayload(MRInput.createUserPayload(inputConf,
          TextInputFormat.class.getName(), true, true));

  Vertex mapVertex1 = new Vertex("map1", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex1.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex mapVertex2 = new Vertex("map2", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex2.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex mapVertex3 = new Vertex("map3", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex3.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex checkerVertex = new Vertex("checker",
      new ProcessorDescriptor(
          UnionProcessor.class.getName()),
              1, MRHelpers.getReduceResource(tezConf));

  Configuration outputConf = new Configuration(tezConf);
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        outputConf, TextOutputFormat.class.getName(), true));
  checkerVertex.addOutput("union", od, MROutputCommitter.class);

  Configuration allPartsConf = new Configuration(tezConf);
  allPartsConf.set(FileOutputFormat.OUTDIR, outputPath+"-all-parts");
  OutputDescriptor od2 = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        allPartsConf, TextOutputFormat.class.getName(), true));
  checkerVertex.addOutput("all-parts", od2, MROutputCommitter.class);

  Configuration partsConf = new Configuration(tezConf);
  partsConf.set(FileOutputFormat.OUTDIR, outputPath+"-parts");
  
  VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
  OutputDescriptor od1 = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        partsConf, TextOutputFormat.class.getName(), true));
  unionVertex.addOutput("parts", od1, MROutputCommitter.class);

  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  dag.addVertex(mapVertex1)
      .addVertex(mapVertex2)
      .addVertex(mapVertex3)
      .addVertex(checkerVertex)
      .addEdge(
          new Edge(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty()))
      .addEdge(
          new GroupInputEdge(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(),
              new InputDescriptor(
                  ConcatenatedMergedKeyValuesInput.class.getName())));
  return dag;  
}
 
Example 4
Source File: MRHelpers.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Private
public static void addMROutputLegacy(Vertex vertex, byte[] userPayload) {
  OutputDescriptor od = new OutputDescriptor(MROutputLegacy.class.getName())
      .setUserPayload(userPayload);
  vertex.addOutput("MROutput", od, MROutputCommitter.class);
}
 
Example 5
Source File: MRHelpers.java    From incubator-tez with Apache License 2.0 2 votes vote down vote up
/**
 * Convenience method to add an MR Output to the specified vertex. The name of
 * the Output is "MROutput" </p>
 * 
 * This should only be called for one vertex in a DAG
 * 
 * @param vertex
 * @param userPayload
 */
public static void addMROutput(Vertex vertex, byte[] userPayload) {
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
      .setUserPayload(userPayload);
  vertex.addOutput("MROutput", od, MROutputCommitter.class);
}