Java Code Examples for org.apache.tez.dag.api.Vertex#setVertexManagerPlugin()

The following examples show how to use org.apache.tez.dag.api.Vertex#setVertexManagerPlugin() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestAMRecovery.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * v1 --> v2 <br>
 * v1 has a customized VM to control whether to schedule only one second task when it is partiallyFinished test case.
 * v2 has a customized VM which could control when to kill AM
 *
 * @param vertexManagerClass
 * @param dmType
 * @param failOnParitialCompleted
 * @return
 * @throws IOException
 */
private DAG createDAG(String dagName, Class vertexManagerClass, DataMovementType dmType,
    boolean failOnParitialCompleted) throws IOException {
  if (failOnParitialCompleted) {
    tezConf.set(FAIL_ON_PARTIAL_FINISHED, "true");
  } else {
    tezConf.set(FAIL_ON_PARTIAL_FINISHED, "false");
  }
  DAG dag = DAG.create(dagName);
  UserPayload payload = UserPayload.create(null);
  Vertex v1 = Vertex.create("v1", MyProcessor.getProcDesc(), 2);
  v1.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      ScheduleControlledVertexManager.class.getName()).setUserPayload(
      TezUtils.createUserPayloadFromConf(tezConf)));
  Vertex v2 = Vertex.create("v2", DoNothingProcessor.getProcDesc(), 2);
  v2.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      vertexManagerClass.getName()).setUserPayload(
      TezUtils.createUserPayloadFromConf(tezConf)));

  dag.addVertex(v1).addVertex(v2);
  dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(dmType,
      DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
      TestOutput.getOutputDesc(payload), TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 2
Source File: MultiAttemptDAG.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name,
    Configuration conf) throws Exception {
  byte[] payload = null;
  int taskCount = MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS, MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = new DAG(name);
  Vertex v1 = new Vertex("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = new Vertex("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = new Vertex("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);

  // Make each vertex manager fail on appropriate attempt
  v1.setVertexManagerPlugin(new VertexManagerPluginDescriptor(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(new String("1").getBytes()));
  v2.setVertexManagerPlugin(new VertexManagerPluginDescriptor(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(new String("2").getBytes()));
  v3.setVertexManagerPlugin(new VertexManagerPluginDescriptor(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(new String("3").getBytes()));
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(new Edge(v1, v2,
      new EdgeProperty(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  dag.addEdge(new Edge(v2, v3,
      new EdgeProperty(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 3
Source File: CartesianProduct.java    From tez with Apache License 2.0 5 votes vote down vote up
private DAG createDAG(TezConfiguration tezConf) throws IOException {
  InputDescriptor inputDescriptor = InputDescriptor.create(FakeInput.class.getName());
  InputInitializerDescriptor inputInitializerDescriptor =
    InputInitializerDescriptor.create(FakeInputInitializer.class.getName());
  DataSourceDescriptor dataSourceDescriptor =
    DataSourceDescriptor.create(inputDescriptor, inputInitializerDescriptor, null);

  Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v1.addDataSource(INPUT, dataSourceDescriptor);
  Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v2.addDataSource(INPUT, dataSourceDescriptor);

  OutputDescriptor outputDescriptor = OutputDescriptor.create(FakeOutput.class.getName());
  OutputCommitterDescriptor outputCommitterDescriptor =
    OutputCommitterDescriptor.create(FakeOutputCommitter.class.getName());
  DataSinkDescriptor dataSinkDescriptor =
    DataSinkDescriptor.create(outputDescriptor, outputCommitterDescriptor, null);

  CartesianProductConfig cartesianProductConfig =
    new CartesianProductConfig(Arrays.asList(sourceVertices));
  UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);

  Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(JoinProcessor.class.getName()));
  v3.addDataSink(OUTPUT, dataSinkDescriptor);
  v3.setVertexManagerPlugin(
    VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
                                 .setUserPayload(userPayload));

  EdgeManagerPluginDescriptor edgeManagerDescriptor =
    EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
  edgeManagerDescriptor.setUserPayload(userPayload);
  UnorderedPartitionedKVEdgeConfig edgeConf =
    UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName(),
      RoundRobinPartitioner.class.getName()).build();
  EdgeProperty edgeProperty = edgeConf.createDefaultCustomEdgeProperty(edgeManagerDescriptor);

  return DAG.create("CrossProduct").addVertex(v1).addVertex(v2).addVertex(v3)
    .addEdge(Edge.create(v1, v3, edgeProperty)).addEdge(Edge.create(v2, v3, edgeProperty));
}
 
Example 4
Source File: MultiAttemptDAG.java    From tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name,
    Configuration conf) throws Exception {
  UserPayload payload = UserPayload.create(null);
  int taskCount = MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS, MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = DAG.create(name);
  Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = Vertex.create("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);

  // Make each vertex manager fail on appropriate attempt
  v1.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("1").getBytes()))));
  v2.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("2").getBytes()))));
  v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("3").getBytes()))));
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(Edge.create(v1, v2,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  dag.addEdge(Edge.create(v2, v3,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 5
Source File: TestExceptionPropagation.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * create a DAG with 2 vertices (v1 --> v2), set payload on Input/Output/Processor/VertexManagerPlugin to
 * control where throw exception
 * 
 * @param exLocation
 * @return
 * @throws IOException
 */
private DAG createDAG(ExceptionLocation exLocation) throws IOException {
  DAG dag = DAG.create("dag_" + exLocation.name());
  UserPayload payload =
      UserPayload.create(ByteBuffer.wrap(exLocation.name().getBytes()));
  Vertex v1 =
      Vertex.create("v1", ProcessorWithException.getProcDesc(payload), 1);
  InputDescriptor inputDesc = InputWithException.getInputDesc(payload);
  InputInitializerDescriptor iiDesc =
      InputInitializerWithException.getIIDesc(payload);
  v1.addDataSource("input",
      DataSourceDescriptor.create(inputDesc, iiDesc, null));
  v1.setVertexManagerPlugin(RootInputVertexManagerWithException
      .getVMDesc(exLocation));

  Vertex v2 = 
      Vertex.create("v2", DoNothingProcessor.getProcDesc(), 1);
  v2.addDataSource("input2",
      DataSourceDescriptor.create(InputDescriptor.create(NoOpInput.class.getName()),
        InputInitializerWithException2.getIIDesc(payload), null));

  dag.addVertex(v1)
    .addVertex(v2);
  if (exLocation.name().startsWith("EM_")) {
    dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(
        EdgeManagerPluginDescriptor.create(CustomEdgeManager.class.getName())
          .setUserPayload(payload),
        DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
        OutputWithException.getOutputDesc(payload), InputWithException.getInputDesc(payload))));
  } else {
    // set Customized VertexManager here, it can't been used for CustomEdge
    v2.setVertexManagerPlugin(InputReadyVertexManagerWithException.getVMDesc(exLocation));
    dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(DataMovementType.ONE_TO_ONE,
        DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
        OutputWithException.getOutputDesc(payload), InputWithException.getInputDesc(payload))));
  }

  return dag;
}
 
Example 6
Source File: BroadcastAndOneToOneExample.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Path stagingDir, boolean doLocalityCheck) throws IOException, YarnException {

  JobConf mrConf = new JobConf(tezConf);

  int numBroadcastTasks = 2;
  int numOneToOneTasks = 3;
  if (doLocalityCheck) {
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(tezConf);
    yarnClient.start();
    int numNMs = yarnClient.getNodeReports(NodeState.RUNNING).size();
    yarnClient.stop();
    // create enough 1-1 tasks to run in parallel
    numOneToOneTasks = numNMs - numBroadcastTasks - 1;// 1 AM
    if (numOneToOneTasks < 1) {
      numOneToOneTasks = 1;
    }
  }
  byte[] procPayload = {(byte) (doLocalityCheck ? 1 : 0), 1};

  System.out.println("Using " + numOneToOneTasks + " 1-1 tasks");

  Vertex broadcastVertex = new Vertex("Broadcast", new ProcessorDescriptor(
      InputProcessor.class.getName()),
      numBroadcastTasks, MRHelpers.getMapResource(mrConf));
  
  Vertex inputVertex = new Vertex("Input", new ProcessorDescriptor(
      InputProcessor.class.getName()).setUserPayload(procPayload),
      numOneToOneTasks, MRHelpers.getMapResource(mrConf));

  Vertex oneToOneVertex = new Vertex("OneToOne",
      new ProcessorDescriptor(
          OneToOneProcessor.class.getName()).setUserPayload(procPayload),
          -1, MRHelpers.getReduceResource(mrConf));
  oneToOneVertex.setVertexManagerPlugin(
          new VertexManagerPluginDescriptor(InputReadyVertexManager.class.getName()));

  UnorderedUnpartitionedKVEdgeConfigurer edgeConf = UnorderedUnpartitionedKVEdgeConfigurer
      .newBuilder(Text.class.getName(), IntWritable.class.getName()).build();

  DAG dag = new DAG("BroadcastAndOneToOneExample");
  dag.addVertex(inputVertex)
      .addVertex(broadcastVertex)
      .addVertex(oneToOneVertex)
      .addEdge(
          new Edge(inputVertex, oneToOneVertex, edgeConf.createDefaultOneToOneEdgeProperty()))
      .addEdge(
          new Edge(broadcastVertex, oneToOneVertex,
              edgeConf.createDefaultBroadcastEdgeProperty()));
  return dag;
}
 
Example 7
Source File: YARNRunner.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private Vertex createVertexForStage(Configuration stageConf,
    Map<String, LocalResource> jobLocalResources,
    List<TaskLocationHint> locations, int stageNum, int totalStages)
    throws IOException {
  // stageNum starts from 0, goes till numStages - 1
  boolean isMap = false;
  if (stageNum == 0) {
    isMap = true;
  }

  int numTasks = isMap ? stageConf.getInt(MRJobConfig.NUM_MAPS, 0)
      : stageConf.getInt(MRJobConfig.NUM_REDUCES, 0);
  String processorName = isMap ? MapProcessor.class.getName()
      : ReduceProcessor.class.getName();
  String vertexName = null;
  if (isMap) {
    vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
  } else {
    if (stageNum == totalStages - 1) {
      vertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
    } else {
      vertexName = MultiStageMRConfigUtil
          .getIntermediateStageVertexName(stageNum);
    }
  }

  Resource taskResource = isMap ? MRHelpers.getMapResource(stageConf)
      : MRHelpers.getReduceResource(stageConf);
  
  stageConf.set(MRJobConfig.MROUTPUT_FILE_NAME_PREFIX, "part");
  
  byte[] vertexUserPayload = MRHelpers.createUserPayloadFromConf(stageConf);
  Vertex vertex = new Vertex(vertexName, new ProcessorDescriptor(processorName).
      setUserPayload(vertexUserPayload),
      numTasks, taskResource);
  if (isMap) {
    byte[] mapInputPayload = MRHelpers.createMRInputPayload(vertexUserPayload, null);
    MRHelpers.addMRInput(vertex, mapInputPayload, null);
  }
  // Map only jobs.
  if (stageNum == totalStages -1) {
    MRHelpers.addMROutputLegacy(vertex, vertexUserPayload);
  }

  Map<String, String> taskEnv = new HashMap<String, String>();
  setupMapReduceEnv(stageConf, taskEnv, isMap);

  Map<String, LocalResource> taskLocalResources =
      new TreeMap<String, LocalResource>();
  // PRECOMMIT Remove split localization for reduce tasks if it's being set
  // here
  taskLocalResources.putAll(jobLocalResources);

  String taskJavaOpts = isMap ? MRHelpers.getMapJavaOpts(stageConf)
      : MRHelpers.getReduceJavaOpts(stageConf);

  vertex.setTaskEnvironment(taskEnv)
      .setTaskLocalFiles(taskLocalResources)
      .setTaskLocationsHint(locations)
      .setTaskLaunchCmdOpts(taskJavaOpts);
  
  if (!isMap) {
    vertex.setVertexManagerPlugin(new VertexManagerPluginDescriptor(
        ShuffleVertexManager.class.getName()));
  }

  if (LOG.isDebugEnabled()) {
    LOG.debug("Adding vertex to DAG" + ", vertexName="
        + vertex.getName() + ", processor="
        + vertex.getProcessorDescriptor().getClassName() + ", parallelism="
        + vertex.getParallelism() + ", javaOpts=" + vertex.getTaskLaunchCmdOpts()
        + ", resources=" + vertex.getTaskResource()
    // TODO Add localResources and Environment
    );
  }

  return vertex;
}
 
Example 8
Source File: CartesianProduct.java    From tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(TezConfiguration tezConf, String inputPath1, String inputPath2,
                      String inputPath3, String outputPath, boolean isPartitioned)
  throws IOException {
  Vertex v1 = Vertex.create(VERTEX1, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  // turn off groupSplit so that each input file incurs one task
  v1.addDataSource(INPUT,
    MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath1)
           .groupSplits(false).build());
  Vertex v2 = Vertex.create(VERTEX2, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v2.addDataSource(INPUT,
    MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath2)
            .groupSplits(false).build());
  Vertex v3 = Vertex.create(VERTEX3, ProcessorDescriptor.create(TokenProcessor.class.getName()));
  v3.addDataSource(INPUT,
    MRInput.createConfigBuilder(new Configuration(tezConf), TextInputFormat.class, inputPath3)
      .groupSplits(false).build());
  CartesianProductConfig cartesianProductConfig;
  if (isPartitioned) {
    Map<String, Integer> vertexPartitionMap = new HashMap<>();
    for (String vertex : cpSources) {
      vertexPartitionMap.put(vertex, numPartition);
    }
    cartesianProductConfig = new CartesianProductConfig(vertexPartitionMap);
  } else {
    cartesianProductConfig = new CartesianProductConfig(Arrays.asList(cpSources));
  }
  UserPayload userPayload = cartesianProductConfig.toUserPayload(tezConf);
  Vertex v4 = Vertex.create(VERTEX4, ProcessorDescriptor.create(JoinProcessor.class.getName()));
  v4.addDataSink(OUTPUT,
    MROutput.createConfigBuilder(new Configuration(tezConf), TextOutputFormat.class, outputPath)
            .build());
  v4.setVertexManagerPlugin(
    VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
                                 .setUserPayload(userPayload));

  EdgeManagerPluginDescriptor cpEdgeManager =
    EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName());
  cpEdgeManager.setUserPayload(userPayload);
  EdgeProperty cpEdgeProperty;
  if (isPartitioned) {
    UnorderedPartitionedKVEdgeConfig cpEdgeConf =
      UnorderedPartitionedKVEdgeConfig.newBuilder(Text.class.getName(),
        IntWritable.class.getName(), CustomPartitioner.class.getName()).build();
    cpEdgeProperty = cpEdgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
  } else {
    UnorderedKVEdgeConfig edgeConf =
      UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
    cpEdgeProperty = edgeConf.createDefaultCustomEdgeProperty(cpEdgeManager);
  }

  EdgeProperty broadcastEdgeProperty;
  UnorderedKVEdgeConfig broadcastEdgeConf =
    UnorderedKVEdgeConfig.newBuilder(Text.class.getName(), IntWritable.class.getName()).build();
  broadcastEdgeProperty = broadcastEdgeConf.createDefaultBroadcastEdgeProperty();

  return DAG.create("CartesianProduct")
    .addVertex(v1).addVertex(v2).addVertex(v3).addVertex(v4)
    .addEdge(Edge.create(v1, v4, cpEdgeProperty))
    .addEdge(Edge.create(v2, v4, cpEdgeProperty))
    .addEdge(Edge.create(v3, v4, broadcastEdgeProperty));
}
 
Example 9
Source File: BroadcastAndOneToOneExample.java    From tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Path stagingDir, boolean doLocalityCheck) throws IOException, YarnException {

  int numBroadcastTasks = 2;
  int numOneToOneTasks = 3;
  if (doLocalityCheck) {
    YarnClient yarnClient = YarnClient.createYarnClient();
    yarnClient.init(tezConf);
    yarnClient.start();
    int numNMs = yarnClient.getNodeReports(NodeState.RUNNING).size();
    yarnClient.stop();
    // create enough 1-1 tasks to run in parallel
    numOneToOneTasks = numNMs - numBroadcastTasks - 1;// 1 AM
    if (numOneToOneTasks < 1) {
      numOneToOneTasks = 1;
    }
  }
  byte[] procByte = {(byte) (doLocalityCheck ? 1 : 0), 1};
  UserPayload procPayload = UserPayload.create(ByteBuffer.wrap(procByte));

  System.out.println("Using " + numOneToOneTasks + " 1-1 tasks");

  Vertex broadcastVertex = Vertex.create("Broadcast", ProcessorDescriptor.create(
      InputProcessor.class.getName()), numBroadcastTasks);
  
  Vertex inputVertex = Vertex.create("Input", ProcessorDescriptor.create(
      InputProcessor.class.getName()).setUserPayload(procPayload), numOneToOneTasks);

  Vertex oneToOneVertex = Vertex.create("OneToOne",
      ProcessorDescriptor.create(
          OneToOneProcessor.class.getName()).setUserPayload(procPayload));
  oneToOneVertex.setVertexManagerPlugin(
      VertexManagerPluginDescriptor.create(InputReadyVertexManager.class.getName()));

  UnorderedKVEdgeConfig edgeConf = UnorderedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName())
      .setFromConfiguration(tezConf).build();

  DAG dag = DAG.create("BroadcastAndOneToOneExample");
  dag.addVertex(inputVertex)
      .addVertex(broadcastVertex)
      .addVertex(oneToOneVertex)
      .addEdge(
          Edge.create(inputVertex, oneToOneVertex, edgeConf.createDefaultOneToOneEdgeProperty()))
      .addEdge(
          Edge.create(broadcastVertex, oneToOneVertex,
              edgeConf.createDefaultBroadcastEdgeProperty()));
  return dag;
}
 
Example 10
Source File: TestFaultTolerance.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * In unpartitioned cartesian product, failure fraction should be #unique failure/#consumer that
 * depends on the src task. Here we test a 2x2 cartesian product and let 4th destination task fail.
 * The failure fraction limit is configured to be 0.25. So the failure fraction should be 1/2,
 * not 1/4.
 * @throws Exception
 */
@Test
public void testCartesianProduct() throws Exception {
  Configuration dagConf = new Configuration();
  dagConf.setDouble(TezConfiguration.TEZ_TASK_MAX_ALLOWED_OUTPUT_FAILURES_FRACTION, 0.25);
  DAG dag = DAG.create("dag");

  Configuration vertexConf = new Configuration();
  vertexConf.setInt(TestProcessor.getVertexConfName(
    TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_TASK_INDEX, "v3"), 3);
  vertexConf.setInt(TestProcessor.getVertexConfName(
    TestProcessor.TEZ_FAILING_PROCESSOR_VERIFY_VALUE, "v3"), 5);
  UserPayload vertexPayload = TezUtils.createUserPayloadFromConf(vertexConf);
  ProcessorDescriptor processorDescriptor =
    ProcessorDescriptor.create(TestProcessor.class.getName()).setUserPayload(vertexPayload);
  Vertex v1 = Vertex.create("v1", processorDescriptor, 2);
  Vertex v2 = Vertex.create("v2", processorDescriptor, 2);
  Vertex v3 = Vertex.create("v3", processorDescriptor);

  String[] sourceVertices = {"v1", "v2"};
  CartesianProductConfig cartesianProductConfig =
    new CartesianProductConfig(Arrays.asList(sourceVertices));
  TezConfiguration tezConf = new TezConfiguration();
  tezConf.setInt(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_NUM_PARTITIONS, 1);
  tezConf.setBoolean(CartesianProductVertexManager.TEZ_CARTESIAN_PRODUCT_ENABLE_GROUPING, false);
  UserPayload cartesianProductPayload =
    cartesianProductConfig.toUserPayload(tezConf);

  v3.setVertexManagerPlugin(
    VertexManagerPluginDescriptor.create(CartesianProductVertexManager.class.getName())
      .setUserPayload(cartesianProductPayload));

  EdgeManagerPluginDescriptor edgeManagerPluginDescriptor =
    EdgeManagerPluginDescriptor.create(CartesianProductEdgeManager.class.getName())
      .setUserPayload(cartesianProductPayload);

  Configuration inputConf = new Configuration();
  inputConf.setBoolean(TestInput.getVertexConfName(
    TestInput.TEZ_FAILING_INPUT_DO_FAIL, "v3"), true);
  inputConf.setInt(TestInput.getVertexConfName(
    TestInput.TEZ_FAILING_INPUT_FAILING_TASK_INDEX, "v3"), 3);
  inputConf.setInt(TestInput.getVertexConfName(
    TestInput.TEZ_FAILING_INPUT_FAILING_TASK_ATTEMPT, "v3"), 0);
  inputConf.setInt(TestInput.getVertexConfName(
    TestInput.TEZ_FAILING_INPUT_FAILING_INPUT_INDEX, "v3"), 0);
  inputConf.setInt(TestInput.getVertexConfName(
    TestInput.TEZ_FAILING_INPUT_FAILING_UPTO_INPUT_ATTEMPT, "v3"), 0);
  UserPayload inputPayload = TezUtils.createUserPayloadFromConf(inputConf);
  EdgeProperty edgeProperty =
    EdgeProperty.create(edgeManagerPluginDescriptor, DataMovementType.CUSTOM,
      DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL, TestOutput.getOutputDesc(null),
      TestInput.getInputDesc(inputPayload));
  Edge e1 = Edge.create(v1, v3, edgeProperty);
  Edge e2 = Edge.create(v2, v3, edgeProperty);
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(e1).addEdge(e2);

  // run dag
  runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
}
 
Example 11
Source File: YARNRunner.java    From tez with Apache License 2.0 4 votes vote down vote up
private Vertex createVertexForStage(Configuration stageConf,
    Map<String, LocalResource> jobLocalResources,
    List<TaskLocationHint> locations, int stageNum, int totalStages)
    throws IOException {
  // stageNum starts from 0, goes till numStages - 1
  boolean isMap = false;
  if (stageNum == 0) {
    isMap = true;
  }

  int numTasks = isMap ? stageConf.getInt(MRJobConfig.NUM_MAPS, 0)
      : stageConf.getInt(MRJobConfig.NUM_REDUCES, 0);
  String processorName = isMap ? MapProcessor.class.getName()
      : ReduceProcessor.class.getName();
  String vertexName = null;
  if (isMap) {
    vertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
  } else {
    if (stageNum == totalStages - 1) {
      vertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
    } else {
      vertexName = MultiStageMRConfigUtil
          .getIntermediateStageVertexName(stageNum);
    }
  }

  Resource taskResource = isMap ? MRHelpers.getResourceForMRMapper(stageConf)
      : MRHelpers.getResourceForMRReducer(stageConf);
  
  stageConf.set(MRJobConfig.MROUTPUT_FILE_NAME_PREFIX, "part");
  
  UserPayload vertexUserPayload = TezUtils.createUserPayloadFromConf(stageConf);
  Vertex vertex = Vertex.create(vertexName,
      ProcessorDescriptor.create(processorName).setUserPayload(vertexUserPayload),
      numTasks, taskResource);
  if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
      TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
    vertex.getProcessorDescriptor().setHistoryText(TezUtils.convertToHistoryText(stageConf));
  }

  if (isMap) {
    vertex.addDataSource("MRInput",
        configureMRInputWithLegacySplitsGenerated(stageConf, true));
  }
  // Map only jobs.
  if (stageNum == totalStages -1) {
    OutputDescriptor od = OutputDescriptor.create(MROutputLegacy.class.getName())
        .setUserPayload(vertexUserPayload);
    if (stageConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT,
        TezRuntimeConfiguration.TEZ_RUNTIME_CONVERT_USER_PAYLOAD_TO_HISTORY_TEXT_DEFAULT)) {
      od.setHistoryText(TezUtils.convertToHistoryText(stageConf));
    }
    vertex.addDataSink("MROutput", DataSinkDescriptor.create(od,
        OutputCommitterDescriptor.create(MROutputCommitter.class.getName()), null));
  }

  Map<String, String> taskEnv = new HashMap<String, String>();
  setupMapReduceEnv(stageConf, taskEnv, isMap);

  Map<String, LocalResource> taskLocalResources =
      new TreeMap<String, LocalResource>();
  // PRECOMMIT Remove split localization for reduce tasks if it's being set
  // here
  taskLocalResources.putAll(jobLocalResources);

  String taskJavaOpts = isMap ? MRHelpers.getJavaOptsForMRMapper(stageConf)
      : MRHelpers.getJavaOptsForMRReducer(stageConf);

  vertex.setTaskEnvironment(taskEnv)
      .addTaskLocalFiles(taskLocalResources)
      .setLocationHint(VertexLocationHint.create(locations))
      .setTaskLaunchCmdOpts(taskJavaOpts);
  
  if (!isMap) {
    vertex.setVertexManagerPlugin((ShuffleVertexManager.createConfigBuilder(stageConf).build()));
  }

  if (LOG.isDebugEnabled()) {
    LOG.debug("Adding vertex to DAG" + ", vertexName="
        + vertex.getName() + ", processor="
        + vertex.getProcessorDescriptor().getClassName() + ", parallelism="
        + vertex.getParallelism() + ", javaOpts=" + vertex.getTaskLaunchCmdOpts()
        + ", resources=" + vertex.getTaskResource()
    // TODO Add localResources and Environment
    );
  }

  return vertex;
}