Java Code Examples for org.apache.tez.mapreduce.hadoop.MRHelpers#getMapResource()

The following examples show how to use org.apache.tez.mapreduce.hadoop.MRHelpers#getMapResource() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: WordCount.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {

  Configuration inputConf = new Configuration(tezConf);
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  InputDescriptor id = new InputDescriptor(MRInput.class.getName())
      .setUserPayload(MRInput.createUserPayload(inputConf,
          TextInputFormat.class.getName(), true, true));

  Configuration outputConf = new Configuration(tezConf);
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        outputConf, TextOutputFormat.class.getName(), true));

  Vertex tokenizerVertex = new Vertex("tokenizer", new ProcessorDescriptor(
      TokenProcessor.class.getName()), -1, MRHelpers.getMapResource(tezConf));
  tokenizerVertex.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex summerVertex = new Vertex("summer",
      new ProcessorDescriptor(
          SumProcessor.class.getName()), 1, MRHelpers.getReduceResource(tezConf));
  summerVertex.addOutput("MROutput", od, MROutputCommitter.class);

  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  DAG dag = new DAG("WordCount");
  dag.addVertex(tokenizerVertex)
      .addVertex(summerVertex)
      .addEdge(
          new Edge(tokenizerVertex, summerVertex, edgeConf.createDefaultEdgeProperty()));
  return dag;  
}
 
Example 2
Source File: IntersectDataGen.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private DAG createDag(TezConfiguration tezConf, Path largeOutPath, Path smallOutPath,
    Path expectedOutputPath, int numTasks, long largeOutSize, long smallOutSize)
    throws IOException {

  long largeOutSizePerTask = largeOutSize / numTasks;
  long smallOutSizePerTask = smallOutSize / numTasks;

  DAG dag = new DAG("IntersectDataGen");

  byte[] streamOutputPayload = createPayloadForOutput(largeOutPath, tezConf);
  byte[] hashOutputPayload = createPayloadForOutput(smallOutPath, tezConf);
  byte[] expectedOutputPayload = createPayloadForOutput(expectedOutputPath, tezConf);

  Vertex genDataVertex = new Vertex("datagen", new ProcessorDescriptor(
      GenDataProcessor.class.getName()).setUserPayload(GenDataProcessor.createConfiguration(
      largeOutSizePerTask, smallOutSizePerTask)), numTasks, MRHelpers.getMapResource(tezConf));
  genDataVertex.addOutput(STREAM_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(streamOutputPayload),
      MROutputCommitter.class);
  genDataVertex.addOutput(HASH_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(hashOutputPayload),
      MROutputCommitter.class);
  genDataVertex.addOutput(EXPECTED_OUTPUT_NAME,
      new OutputDescriptor(MROutput.class.getName()).setUserPayload(expectedOutputPayload),
      MROutputCommitter.class);

  dag.addVertex(genDataVertex);

  return dag;
}
 
Example 3
Source File: BroadcastAndOneToOneExample.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Path stagingDir, boolean doLocalityCheck) throws IOException, YarnException {

  JobConf mrConf = new JobConf(tezConf);

  int numBroadcastTasks = 2;
  int numOneToOneTasks = 3;
  if (doLocalityCheck) {
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(tezConf);
    yarnClient.start();
    int numNMs = yarnClient.getNodeReports(NodeState.RUNNING).size();
    yarnClient.stop();
    // create enough 1-1 tasks to run in parallel
    numOneToOneTasks = numNMs - numBroadcastTasks - 1;// 1 AM
    if (numOneToOneTasks < 1) {
      numOneToOneTasks = 1;
    }
  }
  byte[] procPayload = {(byte) (doLocalityCheck ? 1 : 0), 1};

  System.out.println("Using " + numOneToOneTasks + " 1-1 tasks");

  Vertex broadcastVertex = new Vertex("Broadcast", new ProcessorDescriptor(
      InputProcessor.class.getName()),
      numBroadcastTasks, MRHelpers.getMapResource(mrConf));
  
  Vertex inputVertex = new Vertex("Input", new ProcessorDescriptor(
      InputProcessor.class.getName()).setUserPayload(procPayload),
      numOneToOneTasks, MRHelpers.getMapResource(mrConf));

  Vertex oneToOneVertex = new Vertex("OneToOne",
      new ProcessorDescriptor(
          OneToOneProcessor.class.getName()).setUserPayload(procPayload),
          -1, MRHelpers.getReduceResource(mrConf));
  oneToOneVertex.setVertexManagerPlugin(
          new VertexManagerPluginDescriptor(InputReadyVertexManager.class.getName()));

  UnorderedUnpartitionedKVEdgeConfigurer edgeConf = UnorderedUnpartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName()).build();

  DAG dag = new DAG("BroadcastAndOneToOneExample");
  dag.addVertex(inputVertex)
      .addVertex(broadcastVertex)
      .addVertex(oneToOneVertex)
      .addEdge(
          new Edge(inputVertex, oneToOneVertex, edgeConf.createDefaultOneToOneEdgeProperty()))
      .addEdge(
          new Edge(broadcastVertex, oneToOneVertex,
              edgeConf.createDefaultBroadcastEdgeProperty()));
  return dag;
}
 
Example 4
Source File: UnionExample.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {
  DAG dag = new DAG("UnionExample");
  
  int numMaps = -1;
  Configuration inputConf = new Configuration(tezConf);
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  InputDescriptor id = new InputDescriptor(MRInput.class.getName())
      .setUserPayload(MRInput.createUserPayload(inputConf,
          TextInputFormat.class.getName(), true, true));

  Vertex mapVertex1 = new Vertex("map1", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex1.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex mapVertex2 = new Vertex("map2", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex2.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex mapVertex3 = new Vertex("map3", new ProcessorDescriptor(
      TokenProcessor.class.getName()),
      numMaps, MRHelpers.getMapResource(tezConf));
  mapVertex3.addInput("MRInput", id, MRInputAMSplitGenerator.class);

  Vertex checkerVertex = new Vertex("checker",
      new ProcessorDescriptor(
          UnionProcessor.class.getName()),
              1, MRHelpers.getReduceResource(tezConf));

  Configuration outputConf = new Configuration(tezConf);
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  OutputDescriptor od = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        outputConf, TextOutputFormat.class.getName(), true));
  checkerVertex.addOutput("union", od, MROutputCommitter.class);

  Configuration allPartsConf = new Configuration(tezConf);
  allPartsConf.set(FileOutputFormat.OUTDIR, outputPath+"-all-parts");
  OutputDescriptor od2 = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        allPartsConf, TextOutputFormat.class.getName(), true));
  checkerVertex.addOutput("all-parts", od2, MROutputCommitter.class);

  Configuration partsConf = new Configuration(tezConf);
  partsConf.set(FileOutputFormat.OUTDIR, outputPath+"-parts");
  
  VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
  OutputDescriptor od1 = new OutputDescriptor(MROutput.class.getName())
    .setUserPayload(MROutput.createUserPayload(
        partsConf, TextOutputFormat.class.getName(), true));
  unionVertex.addOutput("parts", od1, MROutputCommitter.class);

  OrderedPartitionedKVEdgeConfigurer edgeConf = OrderedPartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName(), null).build();

  dag.addVertex(mapVertex1)
      .addVertex(mapVertex2)
      .addVertex(mapVertex3)
      .addVertex(checkerVertex)
      .addEdge(
          new Edge(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty()))
      .addEdge(
          new GroupInputEdge(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(),
              new InputDescriptor(
                  ConcatenatedMergedKeyValuesInput.class.getName())));
  return dag;  
}
 
Example 5
Source File: YARNRunner.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private Vertex createVertexForStage(Configuration stageConf,
    Map<String, LocalResource> jobLocalResources,
    List<TaskLocationHint> locations, int stageNum, int totalStages)
    throws IOException {
  // stageNum starts from 0, goes till numStages - 1
  boolean isMap = false;
  if (stageNum == 0) {
    isMap = true;
  }

  int numTasks = isMap ? stageConf.getInt(MRJobConfig.NUM_MAPS, 0)
      : stageConf.getInt(MRJobConfig.NUM_REDUCES, 0);
  String processorName = isMap ? MapProcessor.class.getName()
      : ReduceProcessor.class.getName();
  String vertexName = null;
  if (isMap) {
    vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
  } else {
    if (stageNum == totalStages - 1) {
      vertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
    } else {
      vertexName = MultiStageMRConfigUtil
          .getIntermediateStageVertexName(stageNum);
    }
  }

  Resource taskResource = isMap ? MRHelpers.getMapResource(stageConf)
      : MRHelpers.getReduceResource(stageConf);
  
  stageConf.set(MRJobConfig.MROUTPUT_FILE_NAME_PREFIX, "part");
  
  byte[] vertexUserPayload = MRHelpers.createUserPayloadFromConf(stageConf);
  Vertex vertex = new Vertex(vertexName, new ProcessorDescriptor(processorName).
      setUserPayload(vertexUserPayload),
      numTasks, taskResource);
  if (isMap) {
    byte[] mapInputPayload = MRHelpers.createMRInputPayload(vertexUserPayload, null);
    MRHelpers.addMRInput(vertex, mapInputPayload, null);
  }
  // Map only jobs.
  if (stageNum == totalStages -1) {
    MRHelpers.addMROutputLegacy(vertex, vertexUserPayload);
  }

  Map<String, String> taskEnv = new HashMap<String, String>();
  setupMapReduceEnv(stageConf, taskEnv, isMap);

  Map<String, LocalResource> taskLocalResources =
      new TreeMap<String, LocalResource>();
  // PRECOMMIT Remove split localization for reduce tasks if it's being set
  // here
  taskLocalResources.putAll(jobLocalResources);

  String taskJavaOpts = isMap ? MRHelpers.getMapJavaOpts(stageConf)
      : MRHelpers.getReduceJavaOpts(stageConf);

  vertex.setTaskEnvironment(taskEnv)
      .setTaskLocalFiles(taskLocalResources)
      .setTaskLocationsHint(locations)
      .setTaskLaunchCmdOpts(taskJavaOpts);
  
  if (!isMap) {
    vertex.setVertexManagerPlugin(new VertexManagerPluginDescriptor(
        ShuffleVertexManager.class.getName()));
  }

  if (LOG.isDebugEnabled()) {
    LOG.debug("Adding vertex to DAG" + ", vertexName="
        + vertex.getName() + ", processor="
        + vertex.getProcessorDescriptor().getClassName() + ", parallelism="
        + vertex.getParallelism() + ", javaOpts=" + vertex.getTaskLaunchCmdOpts()
        + ", resources=" + vertex.getTaskResource()
    // TODO Add localResources and Environment
    );
  }

  return vertex;
}