Java Code Examples for org.apache.tez.dag.api.DAG#addEdge()

The following examples show how to use org.apache.tez.dag.api.DAG#addEdge() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestAMRecovery.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * v1 --> v2 <br>
 * v1 has a customized VM to control whether to schedule only one second task when it is partiallyFinished test case.
 * v2 has a customized VM which could control when to kill AM
 *
 * @param vertexManagerClass
 * @param dmType
 * @param failOnParitialCompleted
 * @return
 * @throws IOException
 */
private DAG createDAG(String dagName, Class vertexManagerClass, DataMovementType dmType,
    boolean failOnParitialCompleted) throws IOException {
  if (failOnParitialCompleted) {
    tezConf.set(FAIL_ON_PARTIAL_FINISHED, "true");
  } else {
    tezConf.set(FAIL_ON_PARTIAL_FINISHED, "false");
  }
  DAG dag = DAG.create(dagName);
  UserPayload payload = UserPayload.create(null);
  Vertex v1 = Vertex.create("v1", MyProcessor.getProcDesc(), 2);
  v1.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      ScheduleControlledVertexManager.class.getName()).setUserPayload(
      TezUtils.createUserPayloadFromConf(tezConf)));
  Vertex v2 = Vertex.create("v2", DoNothingProcessor.getProcDesc(), 2);
  v2.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      vertexManagerClass.getName()).setUserPayload(
      TezUtils.createUserPayloadFromConf(tezConf)));

  dag.addVertex(v1).addVertex(v2);
  dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(dmType,
      DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
      TestOutput.getOutputDesc(payload), TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 2
Source File: MultiAttemptDAG.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name,
    Configuration conf) throws Exception {
  byte[] payload = null;
  int taskCount = MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS, MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = new DAG(name);
  Vertex v1 = new Vertex("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = new Vertex("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = new Vertex("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);

  // Make each vertex manager fail on appropriate attempt
  v1.setVertexManagerPlugin(new VertexManagerPluginDescriptor(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(new String("1").getBytes()));
  v2.setVertexManagerPlugin(new VertexManagerPluginDescriptor(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(new String("2").getBytes()));
  v3.setVertexManagerPlugin(new VertexManagerPluginDescriptor(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(new String("3").getBytes()));
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(new Edge(v1, v2,
      new EdgeProperty(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  dag.addEdge(new Edge(v2, v3,
      new EdgeProperty(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 3
Source File: SimpleReverseVTestDAG.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name, 
    Configuration conf) throws Exception {
  byte[] payload = null;
  int taskCount = TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS, TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = new DAG(name);
  Vertex v1 = new Vertex("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = new Vertex("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = new Vertex("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(new Edge(v1, v2, 
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, 
          SchedulingType.SEQUENTIAL, 
          TestOutput.getOutputDesc(payload), 
          TestInput.getInputDesc(payload))));
  dag.addEdge(new Edge(v1, v3, 
          new EdgeProperty(DataMovementType.SCATTER_GATHER, 
              DataSourceType.PERSISTED, 
              SchedulingType.SEQUENTIAL, 
              TestOutput.getOutputDesc(payload), 
              TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 4
Source File: SimpleVTestDAG.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name, 
    Configuration conf) throws Exception {
  byte[] payload = null;
  int taskCount = TEZ_SIMPLE_V_DAG_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(TEZ_SIMPLE_V_DAG_NUM_TASKS, TEZ_SIMPLE_V_DAG_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = new DAG(name);
  Vertex v1 = new Vertex("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = new Vertex("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = new Vertex("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(new Edge(v1, v3, 
      new EdgeProperty(DataMovementType.SCATTER_GATHER, 
          DataSourceType.PERSISTED, 
          SchedulingType.SEQUENTIAL, 
          TestOutput.getOutputDesc(payload), 
          TestInput.getInputDesc(payload))));
  dag.addEdge(new Edge(v2, v3, 
          new EdgeProperty(DataMovementType.SCATTER_GATHER, 
              DataSourceType.PERSISTED, 
              SchedulingType.SEQUENTIAL, 
              TestOutput.getOutputDesc(payload), 
              TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 5
Source File: MultiAttemptDAG.java    From tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name,
    Configuration conf) throws Exception {
  UserPayload payload = UserPayload.create(null);
  int taskCount = MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS, MULTI_ATTEMPT_DAG_VERTEX_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = DAG.create(name);
  Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = Vertex.create("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);

  // Make each vertex manager fail on appropriate attempt
  v1.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("1").getBytes()))));
  v2.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("2").getBytes()))));
  v3.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      FailOnAttemptVertexManagerPlugin.class.getName())
      .setUserPayload(UserPayload.create(ByteBuffer.wrap(new String("3").getBytes()))));
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(Edge.create(v1, v2,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  dag.addEdge(Edge.create(v2, v3,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 6
Source File: SimpleReverseVTestDAG.java    From tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name, 
    Configuration conf) throws Exception {
  UserPayload payload = UserPayload.create(null);
  int taskCount = TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS, TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = DAG.create(name);
  Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = Vertex.create("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(Edge.create(v1, v2,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  dag.addEdge(Edge.create(v1, v3,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 7
Source File: SimpleVTestDAG.java    From tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name, 
    Configuration conf) throws Exception {
  UserPayload payload = UserPayload.create(null);
  int taskCount = TEZ_SIMPLE_V_DAG_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(TEZ_SIMPLE_V_DAG_NUM_TASKS, TEZ_SIMPLE_V_DAG_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = DAG.create(name);
  Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = Vertex.create("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(Edge.create(v1, v3,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  dag.addEdge(Edge.create(v2, v3,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 8
Source File: TestExceptionPropagation.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * create a DAG with 2 vertices (v1 --> v2), set payload on Input/Output/Processor/VertexManagerPlugin to
 * control where throw exception
 * 
 * @param exLocation
 * @return
 * @throws IOException
 */
private DAG createDAG(ExceptionLocation exLocation) throws IOException {
  DAG dag = DAG.create("dag_" + exLocation.name());
  UserPayload payload =
      UserPayload.create(ByteBuffer.wrap(exLocation.name().getBytes()));
  Vertex v1 =
      Vertex.create("v1", ProcessorWithException.getProcDesc(payload), 1);
  InputDescriptor inputDesc = InputWithException.getInputDesc(payload);
  InputInitializerDescriptor iiDesc =
      InputInitializerWithException.getIIDesc(payload);
  v1.addDataSource("input",
      DataSourceDescriptor.create(inputDesc, iiDesc, null));
  v1.setVertexManagerPlugin(RootInputVertexManagerWithException
      .getVMDesc(exLocation));

  Vertex v2 = 
      Vertex.create("v2", DoNothingProcessor.getProcDesc(), 1);
  v2.addDataSource("input2",
      DataSourceDescriptor.create(InputDescriptor.create(NoOpInput.class.getName()),
        InputInitializerWithException2.getIIDesc(payload), null));

  dag.addVertex(v1)
    .addVertex(v2);
  if (exLocation.name().startsWith("EM_")) {
    dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(
        EdgeManagerPluginDescriptor.create(CustomEdgeManager.class.getName())
          .setUserPayload(payload),
        DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
        OutputWithException.getOutputDesc(payload), InputWithException.getInputDesc(payload))));
  } else {
    // set Customized VertexManager here, it can't been used for CustomEdge
    v2.setVertexManagerPlugin(InputReadyVertexManagerWithException.getVMDesc(exLocation));
    dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(DataMovementType.ONE_TO_ONE,
        DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
        OutputWithException.getOutputDesc(payload), InputWithException.getInputDesc(payload))));
  }

  return dag;
}
 
Example 9
Source File: YARNRunner.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, JobID jobId, Configuration[] stageConfs,
    String jobSubmitDir, Credentials ts,
    Map<String, LocalResource> jobLocalResources) throws IOException {

  String jobName = stageConfs[0].get(MRJobConfig.JOB_NAME,
      YarnConfiguration.DEFAULT_APPLICATION_NAME);
  DAG dag = new DAG(jobName);

  LOG.info("Number of stages: " + stageConfs.length);

  List<TaskLocationHint> mapInputLocations =
      getMapLocationHintsFromInputSplits(
          jobId, fs, stageConfs[0], jobSubmitDir);
  List<TaskLocationHint> reduceInputLocations = null;

  Vertex[] vertices = new Vertex[stageConfs.length];
  for (int i = 0; i < stageConfs.length; i++) {
    vertices[i] = createVertexForStage(stageConfs[i], jobLocalResources,
        i == 0 ? mapInputLocations : reduceInputLocations, i,
        stageConfs.length);
  }

  for (int i = 0; i < vertices.length; i++) {
    dag.addVertex(vertices[i]);
    if (i > 0) {
      // Set edge conf based on Input conf (compression etc properties for MapReduce are
      // w.r.t Outputs - MAP_OUTPUT_COMPRESS for example)
      OrderedPartitionedKVEdgeConfigurer edgeConf =
          OrderedPartitionedKVEdgeConfigurer.newBuilder(stageConfs[i - 1].get(
                  TezJobConfig.TEZ_RUNTIME_KEY_CLASS),
              stageConfs[i - 1].get(TezJobConfig.TEZ_RUNTIME_VALUE_CLASS),
              MRPartitioner.class.getName(), stageConfs[i - 1])
              .configureInput().useLegacyInput().done()
              .setFromConfiguration(stageConfs[i - 1]).build();
      Edge edge = new Edge(vertices[i-1], vertices[i], edgeConf.createDefaultEdgeProperty());
      dag.addEdge(edge);
    }

  }
  return dag;
}
 
Example 10
Source File: YARNRunner.java    From tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, JobID jobId, Configuration[] stageConfs,
    String jobSubmitDir, Credentials ts,
    Map<String, LocalResource> jobLocalResources) throws IOException {

  String jobName = stageConfs[0].get(MRJobConfig.JOB_NAME,
      YarnConfiguration.DEFAULT_APPLICATION_NAME);
  DAG dag = DAG.create(jobName);

  LOG.info("Number of stages: " + stageConfs.length);

  List<TaskLocationHint> mapInputLocations =
      getMapLocationHintsFromInputSplits(
          jobId, fs, stageConfs[0], jobSubmitDir);
  List<TaskLocationHint> reduceInputLocations = null;

  Vertex[] vertices = new Vertex[stageConfs.length];
  for (int i = 0; i < stageConfs.length; i++) {
    vertices[i] = createVertexForStage(stageConfs[i], jobLocalResources,
        i == 0 ? mapInputLocations : reduceInputLocations, i,
        stageConfs.length);
  }

  for (int i = 0; i < vertices.length; i++) {
    dag.addVertex(vertices[i]);
    if (i > 0) {
      // Set edge conf based on Input conf (compression etc properties for MapReduce are
      // w.r.t Outputs - MAP_OUTPUT_COMPRESS for example)
      Map<String, String> partitionerConf = null;
      if (stageConfs[i-1] != null) {
        partitionerConf = Maps.newHashMap();
        for (Map.Entry<String, String> entry : stageConfs[i - 1]) {
          partitionerConf.put(entry.getKey(), entry.getValue());
        }
      }
      OrderedPartitionedKVEdgeConfig edgeConf =
          OrderedPartitionedKVEdgeConfig.newBuilder(stageConfs[i - 1].get(
                  TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS),
              stageConfs[i - 1].get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS),
              MRPartitioner.class.getName(), partitionerConf)
              .setFromConfigurationUnfiltered(stageConfs[i-1])
              .configureInput().useLegacyInput().done()
              .build();
      Edge edge = Edge.create(vertices[i - 1], vertices[i], edgeConf.createDefaultEdgeProperty());
      dag.addEdge(edge);
    }

  }
  return dag;
}
 
Example 11
Source File: TestSpeculation.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * Test basic speculation per vertex conf.
 *
 * @throws Exception the exception
 */
@Retry
@Test (timeout=10000)
public void testBasicSpeculationPerVertexConf() throws Exception {
  DAG dag = DAG.create("test");
  String vNameNoSpec = "A";
  String vNameSpec = "B";
  String speculatorSleepTime = "50";
  Vertex vA = Vertex.create(vNameNoSpec, ProcessorDescriptor.create("Proc.class"), 5);
  Vertex vB = Vertex.create(vNameSpec, ProcessorDescriptor.create("Proc.class"), 5);
  vA.setConf(TezConfiguration.TEZ_AM_SPECULATION_ENABLED, "false");
  dag.addVertex(vA);
  dag.addVertex(vB);
  // min/max src fraction is set to 1. So vertices will run sequentially
  dag.addEdge(
      Edge.create(vA, vB,
          EdgeProperty.create(DataMovementType.SCATTER_GATHER, DataSourceType.PERSISTED,
              SchedulingType.SEQUENTIAL, OutputDescriptor.create("O"),
              InputDescriptor.create("I"))));

  MockTezClient tezClient = createTezSession();

  DAGClient dagClient = tezClient.submitDAG(dag);
  DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
  TezVertexID vertexId = dagImpl.getVertex(vNameSpec).getVertexId();
  TezVertexID vertexIdNoSpec = dagImpl.getVertex(vNameNoSpec).getVertexId();
  // original attempt is killed and speculative one is successful
  TezTaskAttemptID killedTaId =
      TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
  TezTaskAttemptID noSpecTaId = TezTaskAttemptID
      .getInstance(TezTaskID.getInstance(vertexIdNoSpec, 0), 0);

  // cause speculation trigger for both
  mockLauncher.setStatusUpdatesForTask(killedTaId, 100);
  mockLauncher.setStatusUpdatesForTask(noSpecTaId, 100);

  mockLauncher.startScheduling(true);
  org.apache.tez.dag.app.dag.Vertex vSpec = dagImpl.getVertex(vertexId);
  org.apache.tez.dag.app.dag.Vertex vNoSpec = dagImpl.getVertex(vertexIdNoSpec);
  // Wait enough time to give chance for the speculator to trigger
  // speculation on VB.
  // This would fail because of JUnit time out.
  do {
    Thread.sleep(100);
  } while (vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
      .getValue() <= 0);
  dagClient.waitForCompletion();
  // speculation for vA but not for vB
  Assert.assertTrue("Num Speculations is not higher than 0",
      vSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
          .getValue() > 0);
  Assert.assertEquals(0,
      vNoSpec.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
          .getValue());

  tezClient.stop();
}