Java Code Examples for org.apache.tez.dag.api.Vertex#create()

The following examples show how to use org.apache.tez.dag.api.Vertex#create() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TopKDataGen.java    From sequenceiq-samples with Apache License 2.0 6 votes vote down vote up
private DAG createDag(TezConfiguration tezConf, Path outPath, long outSize, int extraColumns, int numTasks)
        throws IOException {

    long largeOutSizePerTask = outSize / numTasks;

    DAG dag = DAG.create("TopK DataGen");

    Vertex genDataVertex = Vertex.create("datagen", ProcessorDescriptor.create(
                    GenDataProcessor.class.getName()).setUserPayload(
                    UserPayload.create(ByteBuffer.wrap(GenDataProcessor.createConfiguration(largeOutSizePerTask, extraColumns)))),
            numTasks);
    genDataVertex.addDataSink(OUTPUT,
            MROutput.createConfigBuilder(new Configuration(tezConf),
                    TextOutputFormat.class, outPath.toUri().toString()).build());
    dag.addVertex(genDataVertex);

    return dag;
}
 
Example 2
Source File: TestTezClient.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
public void testSubmitDAGAppFailed() throws Exception {
  final TezClientForTest client = configureAndCreateTezClient();
  client.start();
  
  client.callRealGetSessionAMProxy = true;
  String msg = "Application Test Failed";
  when(client.mockYarnClient.getApplicationReport(client.mockAppId).getYarnApplicationState())
      .thenReturn(YarnApplicationState.KILLED);
  when(client.mockYarnClient.getApplicationReport(client.mockAppId).getDiagnostics()).thenReturn(
      msg);

  Vertex vertex = Vertex.create("Vertex", ProcessorDescriptor.create("P"), 1,
      Resource.newInstance(1, 1));
  DAG dag = DAG.create("DAG").addVertex(vertex);
  
  try {
    client.submitDAG(dag);
    fail();
  } catch (SessionNotRunning e) {
    assertTrue(e.getMessage().contains(msg));
  }
  client.stop();
}
 
Example 3
Source File: TestTezClient.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 5000)
public void testClientResubmit() throws Exception {
  TezClientForTest client = configureAndCreateTezClient(null, true, null);
  client.start();
  Map<String, LocalResource> lrDAG = Collections.singletonMap("LR1",
      LocalResource.newInstance(
          URL.newInstance("file", "localhost", 0, "/test1"),
          LocalResourceType.FILE,
          LocalResourceVisibility.PUBLIC, 1, 1));
  Vertex vertex1 = Vertex.create("Vertex1", ProcessorDescriptor.create("P1"), 1,
      Resource.newInstance(1, 1));
  vertex1.setTaskLaunchCmdOpts("-XX:+UseParallelGC -XX:+UseG1GC");
  Vertex vertex2 = Vertex.create("Vertex2", ProcessorDescriptor.create("P2"), 1,
      Resource.newInstance(1, 1));
  vertex2.setTaskLaunchCmdOpts("-XX:+UseParallelGC -XX:+UseG1GC");
  DAG dag = DAG.create("DAG").addVertex(vertex1).addVertex(vertex2).addTaskLocalFiles(lrDAG);
  for (int i = 0; i < 3; ++i) {
    try {
      client.submitDAG(dag);
      Assert.fail("Expected TezUncheckedException here.");
    } catch(TezUncheckedException ex) {
      Assert.assertTrue(ex.getMessage().contains("Invalid/conflicting GC options found"));
    }
  }
  client.stop();
}
 
Example 4
Source File: TestAMRecovery.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * v1 --> v2 <br>
 * v1 has a customized VM to control whether to schedule only one second task when it is partiallyFinished test case.
 * v2 has a customized VM which could control when to kill AM
 *
 * @param vertexManagerClass
 * @param dmType
 * @param failOnParitialCompleted
 * @return
 * @throws IOException
 */
private DAG createDAG(String dagName, Class vertexManagerClass, DataMovementType dmType,
    boolean failOnParitialCompleted) throws IOException {
  if (failOnParitialCompleted) {
    tezConf.set(FAIL_ON_PARTIAL_FINISHED, "true");
  } else {
    tezConf.set(FAIL_ON_PARTIAL_FINISHED, "false");
  }
  DAG dag = DAG.create(dagName);
  UserPayload payload = UserPayload.create(null);
  Vertex v1 = Vertex.create("v1", MyProcessor.getProcDesc(), 2);
  v1.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      ScheduleControlledVertexManager.class.getName()).setUserPayload(
      TezUtils.createUserPayloadFromConf(tezConf)));
  Vertex v2 = Vertex.create("v2", DoNothingProcessor.getProcDesc(), 2);
  v2.setVertexManagerPlugin(VertexManagerPluginDescriptor.create(
      vertexManagerClass.getName()).setUserPayload(
      TezUtils.createUserPayloadFromConf(tezConf)));

  dag.addVertex(v1).addVertex(v2);
  dag.addEdge(Edge.create(v1, v2, EdgeProperty.create(dmType,
      DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
      TestOutput.getOutputDesc(payload), TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 5
Source File: TestSpeculation.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * Test basic speculation not useful.
 *
 * @throws Exception the exception
 */
@Retry
@Test (timeout=10000)
public void testBasicSpeculationNotUseful() throws Exception {
  DAG dag = DAG.create("test");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), 5);
  dag.addVertex(vA);

  MockTezClient tezClient = createTezSession();
  
  DAGClient dagClient = tezClient.submitDAG(dag);
  DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
  TezVertexID vertexId = TezVertexID.getInstance(dagImpl.getID(), 0);
  // original attempt is successful and speculative one is killed
  TezTaskAttemptID successTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 0);
  TezTaskAttemptID killedTaId = TezTaskAttemptID.getInstance(TezTaskID.getInstance(vertexId, 0), 1);

  mockLauncher.setStatusUpdatesForTask(successTaId, 100);
  mockLauncher.setStatusUpdatesForTask(killedTaId, 100);

  mockLauncher.startScheduling(true);
  dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
  Task task = dagImpl.getTask(killedTaId.getTaskID());
  Assert.assertEquals(2, task.getAttempts().size());
  Assert.assertEquals(successTaId, task.getSuccessfulAttempt().getID());
  TaskAttempt killedAttempt = task.getAttempt(killedTaId);
  Joiner.on(",").join(killedAttempt.getDiagnostics()).contains("Killed speculative attempt as");
  Assert.assertEquals(TaskAttemptTerminationCause.TERMINATED_INEFFECTIVE_SPECULATION, 
      killedAttempt.getTerminationCause());
  Assert.assertEquals(1, task.getCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
      .getValue());
  Assert.assertEquals(1, dagImpl.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
      .getValue());
  org.apache.tez.dag.app.dag.Vertex v = dagImpl.getVertex(killedTaId.getTaskID().getVertexID());
  Assert.assertEquals(1, v.getAllCounters().findCounter(TaskCounter.NUM_SPECULATIONS)
      .getValue());
  tezClient.stop();
}
 
Example 6
Source File: SimpleReverseVTestDAG.java    From tez with Apache License 2.0 5 votes vote down vote up
public static DAG createDAG(String name, 
    Configuration conf) throws Exception {
  UserPayload payload = UserPayload.create(null);
  int taskCount = TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS, TEZ_SIMPLE_REVERSE_V_DAG_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = DAG.create(name);
  Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = Vertex.create("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  dag.addVertex(v1).addVertex(v2).addVertex(v3);
  dag.addEdge(Edge.create(v1, v2,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  dag.addEdge(Edge.create(v1, v3,
      EdgeProperty.create(DataMovementType.SCATTER_GATHER,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(payload),
          TestInput.getInputDesc(payload))));
  return dag;
}
 
Example 7
Source File: TestATSHistoryWithMiniCluster.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout=50000)
public void testDisabledACls() throws Exception {
  TezClient tezSession = null;
  try {
    SleepProcessorConfig spConf = new SleepProcessorConfig(1);

    DAG dag = DAG.create("TezSleepProcessor");
    Vertex vertex = Vertex.create("SleepVertex", ProcessorDescriptor.create(
            SleepProcessor.class.getName()).setUserPayload(spConf.toUserPayload()), 1,
        Resource.newInstance(256, 1));
    dag.addVertex(vertex);

    TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
    tezConf.setBoolean(TezConfiguration.TEZ_AM_ALLOW_DISABLED_TIMELINE_DOMAINS, true);
    tezConf.set(TezConfiguration.TEZ_HISTORY_LOGGING_SERVICE_CLASS,
        ATSHistoryLoggingService.class.getName());
    Path remoteStagingDir = remoteFs.makeQualified(new Path("/tmp", String.valueOf(random
        .nextInt(100000))));
    remoteFs.mkdirs(remoteStagingDir);
    tezConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, remoteStagingDir.toString());

    tezSession = TezClient.create("TezSleepProcessor", tezConf, true);
    tezSession.start();

    DAGClient dagClient = tezSession.submitDAG(dag);

    DAGStatus dagStatus = dagClient.getDAGStatus(null);
    while (!dagStatus.isCompleted()) {
      LOG.info("Waiting for job to complete. Sleeping for 500ms." + " Current state: "
          + dagStatus.getState());
      Thread.sleep(500l);
      dagStatus = dagClient.getDAGStatus(null);
    }
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagStatus.getState());
  } finally {
    if (tezSession != null) {
      tezSession.stop();
    }
  }
}
 
Example 8
Source File: TestMemoryWithEvents.java    From tez with Apache License 2.0 5 votes vote down vote up
@Ignore
@Test (timeout = 600000)
public void testMemoryBroadcast() throws Exception {
  DAG dag = DAG.create("testMemoryBroadcast");
  Vertex vA = Vertex.create("A", ProcessorDescriptor.create("Proc.class"), numTasks);
  Vertex vB = Vertex.create("B", ProcessorDescriptor.create("Proc.class"), numTasks);
  dag.addVertex(vA)
      .addVertex(vB)
      .addEdge(
          Edge.create(vA, vB, EdgeProperty.create(DataMovementType.BROADCAST,
              DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
              OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
  testMemory(dag, true);
}
 
Example 9
Source File: TwoLevelsFailingDAG.java    From tez with Apache License 2.0 5 votes vote down vote up
protected static void addDAGVerticesAndEdges() {
    l1v1 = Vertex.create("l1v1", TestProcessor.getProcDesc(payload), 1, defaultResource);
    l2v1 = Vertex.create("l2v1", TestProcessor.getProcDesc(payload), 1, defaultResource);
    addVerticesAndEdgeInternal(l1v1, l2v1, DataMovementType.SCATTER_GATHER);
    l1v2 = Vertex.create("l1v2", TestProcessor.getProcDesc(payload), 2, defaultResource);
    l2v2 = Vertex.create("l2v2", TestProcessor.getProcDesc(payload), 3, defaultResource);
    addVerticesAndEdgeInternal(l1v2, l2v2, DataMovementType.SCATTER_GATHER);
    l1v3 = Vertex.create("l1v3", TestProcessor.getProcDesc(payload), 3, defaultResource);
    l2v3 = Vertex.create("l2v3", TestProcessor.getProcDesc(payload), 2, defaultResource);
    addVerticesAndEdgeInternal(l1v3, l2v3, DataMovementType.SCATTER_GATHER);
    l1v4 = Vertex.create("l1v4", TestProcessor.getProcDesc(payload), 2, defaultResource);
    l2v4 = Vertex.create("l2v4", TestProcessor.getProcDesc(payload), 3, defaultResource);
    addVerticesAndEdgeInternal(l1v4, l2v4, DataMovementType.BROADCAST);
}
 
Example 10
Source File: TestTezJobs.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000)
public void testVertexFailuresMaxPercent() throws TezException, InterruptedException, IOException {

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  tezConf.set(TezConfiguration.TEZ_VERTEX_FAILURES_MAXPERCENT, "50.0f");
  tezConf.setInt(TezConfiguration.TEZ_AM_TASK_MAX_FAILED_ATTEMPTS, 1);
  TezClient tezClient = TezClient.create("TestVertexFailuresMaxPercent", tezConf);
  tezClient.start();

  try {
    DAG dag = DAG.create("TestVertexFailuresMaxPercent");
    Vertex vertex1 = Vertex.create("Parent", ProcessorDescriptor.create(
        FailingAttemptProcessor.class.getName()), 2);
    Vertex vertex2 = Vertex.create("Child", ProcessorDescriptor.create(FailingAttemptProcessor.class.getName()), 2);

    OrderedPartitionedKVEdgeConfig edgeConfig = OrderedPartitionedKVEdgeConfig
        .newBuilder(Text.class.getName(), IntWritable.class.getName(),
            HashPartitioner.class.getName())
        .setFromConfiguration(tezConf)
        .build();
    dag.addVertex(vertex1)
        .addVertex(vertex2)
        .addEdge(Edge.create(vertex1, vertex2, edgeConfig.createDefaultEdgeProperty()));

    DAGClient dagClient = tezClient.submitDAG(dag);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
  } finally {
    tezClient.stop();
  }
}
 
Example 11
Source File: ThreeLevelsFailingDAG.java    From tez with Apache License 2.0 5 votes vote down vote up
protected static void addDAGVerticesAndEdges() {
    TwoLevelsFailingDAG.addDAGVerticesAndEdges();
    l3v1 = Vertex.create("l3v1", TestProcessor.getProcDesc(payload), 4, defaultResource);
    dag.addVertex(l3v1);
    addEdge(l2v1, l3v1, DataMovementType.SCATTER_GATHER);
    addEdge(l2v2, l3v1, DataMovementType.SCATTER_GATHER);
    l3v2 = Vertex.create("l3v2", TestProcessor.getProcDesc(payload), 4, defaultResource);
    dag.addVertex(l3v2);
    addEdge(l2v2, l3v2, DataMovementType.BROADCAST);
    addEdge(l2v3, l3v2, DataMovementType.SCATTER_GATHER);
    addEdge(l2v4, l3v2, DataMovementType.SCATTER_GATHER);
}
 
Example 12
Source File: TestFaultTolerance.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout=60000)
public void testBasicSuccessBroadcast() throws Exception {
  DAG dag = DAG.create("testBasicSuccessBroadcast");
  Vertex v1 =
      Vertex.create("v1", TestProcessor.getProcDesc(null), 2, SimpleTestDAG.defaultResource);
  Vertex v2 =
      Vertex.create("v2", TestProcessor.getProcDesc(null), 2, SimpleTestDAG.defaultResource);
  dag.addVertex(v1).addVertex(v2).addEdge(Edge.create(v1, v2,
      EdgeProperty.create(DataMovementType.BROADCAST,
          DataSourceType.PERSISTED,
          SchedulingType.SEQUENTIAL,
          TestOutput.getOutputDesc(null),
          TestInput.getInputDesc(null))));
  runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
}
 
Example 13
Source File: TestTezJobs.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 60000)
public void testInputInitializerEvents() throws TezException, InterruptedException, IOException {

  TezConfiguration tezConf = new TezConfiguration(mrrTezCluster.getConfig());
  TezClient tezClient = TezClient.create("TestInputInitializerEvents", tezConf);
  tezClient.start();

  try {
    DAG dag = DAG.create("TestInputInitializerEvents");
    Vertex vertex1 = Vertex.create(VERTEX_WITH_INITIALIZER_NAME, ProcessorDescriptor.create(
        SleepProcessor.class.getName())
        .setUserPayload(new SleepProcessor.SleepProcessorConfig(1).toUserPayload()), 1)
        .addDataSource(INPUT1_NAME,
            DataSourceDescriptor
                .create(InputDescriptor.create(MultiAttemptDAG.NoOpInput.class.getName()),
                    InputInitializerDescriptor.create(InputInitializerForTest.class.getName()),
                    null));
    Vertex vertex2 = Vertex.create(EVENT_GENERATING_VERTEX_NAME,
        ProcessorDescriptor.create(InputInitializerEventGeneratingProcessor.class.getName()), 5);

    dag.addVertex(vertex1).addVertex(vertex2);

    DAGClient dagClient = tezClient.submitDAG(dag);
    dagClient.waitForCompletion();
    Assert.assertEquals(DAGStatus.State.SUCCEEDED, dagClient.getDAGStatus(null).getState());
  } finally {
    tezClient.stop();
  }
}
 
Example 14
Source File: SimpleTestDAG.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 *  v1  v2
 *   \  /
 *    v3
 *   /  \
 *  v4  v5
 *   \  /
 *    v6
 * @param name
 * @param conf
 * @return
 * @throws Exception
 */
public static DAG createDAGForVertexOrder(String name, Configuration conf) throws Exception{
  UserPayload payload = UserPayload.create(null);
  int taskCount = TEZ_SIMPLE_DAG_NUM_TASKS_DEFAULT;
  if (conf != null) {
    taskCount = conf.getInt(TEZ_SIMPLE_DAG_NUM_TASKS, TEZ_SIMPLE_DAG_NUM_TASKS_DEFAULT);
    payload = TezUtils.createUserPayloadFromConf(conf);
  }
  DAG dag = DAG.create(name);

  Vertex v1 = Vertex.create("v1", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v2 = Vertex.create("v2", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v3 = Vertex.create("v3", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v4 = Vertex.create("v4", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v5 = Vertex.create("v5", TestProcessor.getProcDesc(payload), taskCount, defaultResource);
  Vertex v6 = Vertex.create("v6", TestProcessor.getProcDesc(payload), taskCount, defaultResource);

  // add vertex not in the topological order, since we are using this dag for testing vertex topological order
  dag.addVertex(v4)
    .addVertex(v5)
    .addVertex(v6)
    .addVertex(v1)
    .addVertex(v2)
    .addVertex(v3)
    .addEdge(Edge.create(v1, v3,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v2, v3,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v3, v4,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v3, v5,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v4, v6,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))))
    .addEdge(Edge.create(v5, v6,
        EdgeProperty.create(DataMovementType.SCATTER_GATHER,
            DataSourceType.PERSISTED,
            SchedulingType.SEQUENTIAL,
            TestOutput.getOutputDesc(payload),
            TestInput.getInputDesc(payload))));

  return dag;
}
 
Example 15
Source File: TopK.java    From sequenceiq-samples with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(TezConfiguration tezConf, String inputPath, String outputPath,
        String columnIndex, String top, String numPartitions) throws IOException {

    DataSourceDescriptor dataSource = MRInput.createConfigBuilder(new Configuration(tezConf),
            TextInputFormat.class, inputPath).build();

    DataSinkDescriptor dataSink = MROutput.createConfigBuilder(new Configuration(tezConf),
            TextOutputFormat.class, outputPath).build();

    Vertex tokenizerVertex = Vertex.create(TOKENIZER,
            ProcessorDescriptor.create(TokenProcessor.class.getName())
                    .setUserPayload(createPayload(Integer.valueOf(columnIndex))))
            .addDataSource(INPUT, dataSource);

    int topK = Integer.valueOf(top);
    Vertex sumVertex = Vertex.create(SUM,
            ProcessorDescriptor.create(SumProcessor.class.getName())
                    .setUserPayload(createPayload(topK)), Integer.valueOf(numPartitions));

    // parallelism must be set to 1 as the writer needs to see the global picture of
    // the data set
    // multiple tasks from the writer will result in multiple list of the top K
    // elements as all task will take the partitioned data's top K element
    Vertex writerVertex = Vertex.create(WRITER,
            ProcessorDescriptor.create(Writer.class.getName())
                    .setUserPayload(createPayload(topK)), 1)
            .addDataSink(OUTPUT, dataSink);

    OrderedPartitionedKVEdgeConfig tokenSumEdge = OrderedPartitionedKVEdgeConfig
            .newBuilder(Text.class.getName(), IntWritable.class.getName(),
                    HashPartitioner.class.getName()).build();

    UnorderedKVEdgeConfig sumWriterEdge = UnorderedKVEdgeConfig
            .newBuilder(IntWritable.class.getName(), Text.class.getName()).build();

    DAG dag = DAG.create("topk");
    return dag
            .addVertex(tokenizerVertex)
            .addVertex(sumVertex)
            .addVertex(writerVertex)
            .addEdge(Edge.create(tokenizerVertex, sumVertex, tokenSumEdge.createDefaultEdgeProperty()))
            .addEdge(Edge.create(sumVertex, writerVertex, sumWriterEdge.createDefaultBroadcastEdgeProperty()));
}
 
Example 16
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test (timeout = 10000)
public void testBasicStatistics() throws Exception {
  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
      null, false, false);
  tezClient.start();

  final String vAName = "A";
  final String vBName = "B";
  final String sourceName = "In";
  final String sinkName = "Out";
  DAG dag = DAG.create("testBasisStatistics");
  Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 3);
  Vertex vB = Vertex.create(vBName, ProcessorDescriptor.create("Proc.class"), 2);
  vA.addDataSource(sourceName,
      DataSourceDescriptor.create(InputDescriptor.create("In"), null, null));
  vB.addDataSink(sinkName, DataSinkDescriptor.create(OutputDescriptor.create("Out"), null, null));
  dag.addVertex(vA)
      .addVertex(vB)
      .addEdge(
          Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
              DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
              OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
  IOStatistics ioStats = new IOStatistics();
  ioStats.setDataSize(1);
  ioStats.setItemsProcessed(1);
  TaskStatistics vAStats = new TaskStatistics();
  vAStats.addIO(vBName, ioStats);
  vAStats.addIO(sourceName, ioStats);
  TaskStatistics vBStats = new TaskStatistics();
  vBStats.addIO(vAName, ioStats);
  vBStats.addIO(sinkName, ioStats);
  ByteArrayOutputStream bosA = new ByteArrayOutputStream();
  DataOutput outA = new DataOutputStream(bosA);
  vAStats.write(outA);
  final byte[] payloadA = bosA.toByteArray();
  ByteArrayOutputStream bosB = new ByteArrayOutputStream();
  DataOutput outB = new DataOutputStream(bosB);
  vBStats.write(outB);
  final byte[] payloadB = bosB.toByteArray();
  
  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(false);
  mockApp.statsDelegate = new StatisticsDelegate() {
    @Override
    public TaskStatistics getStatistics(TaskSpec taskSpec) {
      byte[] payload = payloadA;
      TaskStatistics stats = new TaskStatistics();
      if (taskSpec.getVertexName().equals(vBName)) {
        payload = payloadB;
      }
      final DataInputByteBuffer in = new DataInputByteBuffer();
      in.reset(ByteBuffer.wrap(payload));
      try {
        // this ensures that the serde code path is covered.
        stats.readFields(in);
      } catch (IOException e) {
        Assert.fail(e.getMessage());
      }
      return stats;
    }
  };
  mockApp.doSleep = false;
  DAGClient dagClient = tezClient.submitDAG(dag);
  mockLauncher.waitTillContainersLaunched();
  DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
  mockLauncher.startScheduling(true);
  DAGStatus status = dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
  
  // verify that the values have been correct aggregated
  for (org.apache.tez.dag.app.dag.Vertex v : dagImpl.getVertices().values()) {
    VertexStatistics vStats = v.getStatistics();
    if (v.getName().equals(vAName)) {
      Assert.assertEquals(3, vStats.getOutputStatistics(vBName).getDataSize());
      Assert.assertEquals(3, vStats.getInputStatistics(sourceName).getDataSize());
      Assert.assertEquals(3, vStats.getOutputStatistics(vBName).getItemsProcessed());
      Assert.assertEquals(3, vStats.getInputStatistics(sourceName).getItemsProcessed());
    } else {
      Assert.assertEquals(2, vStats.getInputStatistics(vAName).getDataSize());
      Assert.assertEquals(2, vStats.getOutputStatistics(sinkName).getDataSize());
      Assert.assertEquals(2, vStats.getInputStatistics(vAName).getItemsProcessed());
      Assert.assertEquals(2, vStats.getOutputStatistics(sinkName).getItemsProcessed());
    }
  }
  
  tezClient.stop();
}
 
Example 17
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test (timeout = 10000)
public void testBasicCounters() throws Exception {
  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
      null, false, false);
  tezClient.start();

  final String vAName = "A";
  final String vBName = "B";
  final String procCounterName = "Proc";
  final String globalCounterName = "Global";
  DAG dag = DAG.create("testBasicCounters");
  Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 10);
  Vertex vB = Vertex.create(vBName, ProcessorDescriptor.create("Proc.class"), 1);
  dag.addVertex(vA)
      .addVertex(vB)
      .addEdge(
          Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
              DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
              OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
  TezCounters temp = new TezCounters();
  temp.findCounter(new String(globalCounterName), new String(globalCounterName)).increment(1);
  ByteArrayOutputStream bos = new ByteArrayOutputStream();
  DataOutput out = new DataOutputStream(bos);
  temp.write(out);
  final byte[] payload = bos.toByteArray();

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(false);
  mockApp.countersDelegate = new CountersDelegate() {
    @Override
    public TezCounters getCounters(TaskSpec taskSpec) {
      String vName = taskSpec.getVertexName();
      TezCounters counters = new TezCounters();
      final DataInputByteBuffer in  = new DataInputByteBuffer();
      in.reset(ByteBuffer.wrap(payload));
      try {
        // this ensures that the serde code path is covered.
        // the internal merges of counters covers the constructor code path.
        counters.readFields(in);
      } catch (IOException e) {
        Assert.fail(e.getMessage());
      }
      counters.findCounter(vName, procCounterName).increment(1);
      for (OutputSpec output : taskSpec.getOutputs()) {
        counters.findCounter(vName, output.getDestinationVertexName()).increment(1);
      }
      for (InputSpec input : taskSpec.getInputs()) {
        counters.findCounter(vName, input.getSourceVertexName()).increment(1);
      }
      return counters;
    }
  };
  mockApp.doSleep = false;
  DAGClient dagClient = tezClient.submitDAG(dag);
  mockLauncher.waitTillContainersLaunched();
  DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
  mockLauncher.startScheduling(true);
  DAGStatus status = dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
  TezCounters counters = dagImpl.getAllCounters();

  String osName = System.getProperty("os.name").toLowerCase(Locale.ENGLISH);
  if (SystemUtils.IS_OS_LINUX) {
    Assert.assertTrue(counters.findCounter(DAGCounter.AM_CPU_MILLISECONDS).getValue() > 0);
  }

  // verify processor counters
  Assert.assertEquals(10, counters.findCounter(vAName, procCounterName).getValue());
  Assert.assertEquals(1, counters.findCounter(vBName, procCounterName).getValue());
  // verify edge counters
  Assert.assertEquals(10, counters.findCounter(vAName, vBName).getValue());
  Assert.assertEquals(1, counters.findCounter(vBName, vAName).getValue());
  // verify global counters
  Assert.assertEquals(11, counters.findCounter(globalCounterName, globalCounterName).getValue());
  VertexImpl vAImpl = (VertexImpl) dagImpl.getVertex(vAName);
  VertexImpl vBImpl = (VertexImpl) dagImpl.getVertex(vBName);
  TezCounters vACounters = vAImpl.getAllCounters();
  TezCounters vBCounters = vBImpl.getAllCounters();
  String vACounterName = vACounters.findCounter(globalCounterName, globalCounterName).getName();
  String vBCounterName = vBCounters.findCounter(globalCounterName, globalCounterName).getName();
  if (vACounterName != vBCounterName) {
    Assert.fail("String counter name objects dont match despite interning.");
  }
  CounterGroup vaGroup = vACounters.getGroup(globalCounterName);
  String vaGrouName = vaGroup.getName();
  CounterGroup vBGroup = vBCounters.getGroup(globalCounterName);
  String vBGrouName = vBGroup.getName();
  if (vaGrouName != vBGrouName) {
    Assert.fail("String group name objects dont match despite interning.");
  }
  
  tezClient.stop();
}
 
Example 18
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test
public void testCountersAggregation() throws Exception {
  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
                                              null, false, false);
  tezClient.start();

  final String vAName = "A";
  final String vBName = "B";
  final String procCounterName = "Proc";
  final String globalCounterName = "Global";
  DAG dag = DAG.create("testCountersAggregation");
  Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 10);
  Vertex vB = Vertex.create(vBName, ProcessorDescriptor.create("Proc.class"), 1);
  dag.addVertex(vA)
      .addVertex(vB)
      .addEdge(
          Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
                                                  DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
                                                  OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
  TezCounters temp = new TezCounters();
  temp.findCounter(new String(globalCounterName), new String(globalCounterName)).increment(1);
  ByteArrayOutputStream bos = new ByteArrayOutputStream();
  DataOutput out = new DataOutputStream(bos);
  temp.write(out);
  final byte[] payload = bos.toByteArray();

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(false);
  mockApp.countersDelegate = new CountersDelegate() {
    int counterValue = 0;
    @Override
    public TezCounters getCounters(TaskSpec taskSpec) {
      String vName = taskSpec.getVertexName();
      TezCounters counters = new TezCounters();
      final DataInputByteBuffer in  = new DataInputByteBuffer();
      in.reset(ByteBuffer.wrap(payload));
      try {
        // this ensures that the serde code path is covered.
        // the internal merges of counters covers the constructor code path.
        counters.readFields(in);
      } catch (IOException e) {
        Assert.fail(e.getMessage());
      }
      counters.findCounter(vName, procCounterName).setValue(++counterValue);
      for (OutputSpec output : taskSpec.getOutputs()) {
        counters.findCounter(vName, output.getDestinationVertexName()).setValue(++counterValue);
      }
      for (InputSpec input : taskSpec.getInputs()) {
        counters.findCounter(vName, input.getSourceVertexName()).setValue(++counterValue);
      }
      return counters;
    }
  };
  mockApp.doSleep = false;
  DAGClient dagClient = tezClient.submitDAG(dag);
  mockLauncher.waitTillContainersLaunched();
  DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
  mockLauncher.startScheduling(true);
  DAGStatus status = dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
  TezCounters counters = dagImpl.getAllCounters();

  // verify processor counters
  VertexImpl vAImpl = (VertexImpl) dagImpl.getVertex(vAName);
  VertexImpl vBImpl = (VertexImpl) dagImpl.getVertex(vBName);
  TezCounters vACounters = vAImpl.getAllCounters();
  TezCounters vBCounters = vBImpl.getAllCounters();

  Assert.assertEquals(19, ((AggregateTezCounterDelegate)vACounters.findCounter(vAName, procCounterName)).getMax());
  Assert.assertEquals(1, ((AggregateTezCounterDelegate)vACounters.findCounter(vAName, procCounterName)).getMin());
  Assert.assertEquals(20, ((AggregateTezCounterDelegate)vACounters.findCounter(vAName, vBName)).getMax());
  Assert.assertEquals(2, ((AggregateTezCounterDelegate)vACounters.findCounter(vAName, vBName)).getMin());

  Assert.assertEquals(21, ((AggregateTezCounterDelegate)vBCounters.findCounter(vBName, procCounterName)).getMin());
  Assert.assertEquals(21, ((AggregateTezCounterDelegate)vBCounters.findCounter(vBName, procCounterName)).getMax());
  Assert.assertEquals(22, ((AggregateTezCounterDelegate)vBCounters.findCounter(vBName, vAName)).getMin());
  Assert.assertEquals(22, ((AggregateTezCounterDelegate)vBCounters.findCounter(vBName, vAName)).getMax());

  tezClient.stop();
}
 
Example 19
Source File: UnionExample.java    From tez with Apache License 2.0 4 votes vote down vote up
private DAG createDAG(FileSystem fs, TezConfiguration tezConf,
    Map<String, LocalResource> localResources, Path stagingDir,
    String inputPath, String outputPath) throws IOException {
  DAG dag = DAG.create("UnionExample");
  
  int numMaps = -1;
  Configuration inputConf = new Configuration(tezConf);
  inputConf.setBoolean("mapred.mapper.new-api", false);
  inputConf.set("mapred.input.format.class", TextInputFormat.class.getName());
  inputConf.set(FileInputFormat.INPUT_DIR, inputPath);
  MRInput.MRInputConfigBuilder configurer = MRInput.createConfigBuilder(inputConf, null);
  DataSourceDescriptor dataSource = configurer.generateSplitsInAM(false).build();

  Vertex mapVertex1 = Vertex.create("map1", ProcessorDescriptor.create(
      TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);

  Vertex mapVertex2 = Vertex.create("map2", ProcessorDescriptor.create(
      TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);

  Vertex mapVertex3 = Vertex.create("map3", ProcessorDescriptor.create(
      TokenProcessor.class.getName()), numMaps).addDataSource("MRInput", dataSource);

  Vertex checkerVertex = Vertex.create("checker", ProcessorDescriptor.create(
      UnionProcessor.class.getName()), 1);

  Configuration outputConf = new Configuration(tezConf);
  outputConf.setBoolean("mapred.reducer.new-api", false);
  outputConf.set("mapred.output.format.class", TextOutputFormat.class.getName());
  outputConf.set(FileOutputFormat.OUTDIR, outputPath);
  DataSinkDescriptor od = MROutput.createConfigBuilder(outputConf, null).build();
  checkerVertex.addDataSink("union", od);
  

  Configuration allPartsConf = new Configuration(tezConf);
  DataSinkDescriptor od2 = MROutput.createConfigBuilder(allPartsConf,
      TextOutputFormat.class, outputPath + "-all-parts").build();
  checkerVertex.addDataSink("all-parts", od2);

  Configuration partsConf = new Configuration(tezConf);    
  DataSinkDescriptor od1 = MROutput.createConfigBuilder(partsConf,
      TextOutputFormat.class, outputPath + "-parts").build();
  VertexGroup unionVertex = dag.createVertexGroup("union", mapVertex1, mapVertex2);
  unionVertex.addDataSink("parts", od1);

  OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), IntWritable.class.getName(),
          HashPartitioner.class.getName()).build();

  dag.addVertex(mapVertex1)
      .addVertex(mapVertex2)
      .addVertex(mapVertex3)
      .addVertex(checkerVertex)
      .addEdge(
          Edge.create(mapVertex3, checkerVertex, edgeConf.createDefaultEdgeProperty()))
      .addEdge(
          GroupInputEdge.create(unionVertex, checkerVertex, edgeConf.createDefaultEdgeProperty(),
              InputDescriptor.create(
                  ConcatenatedMergedKeyValuesInput.class.getName())));
  return dag;  
}
 
Example 20
Source File: JoinValidate.java    From tez with Apache License 2.0 4 votes vote down vote up
@VisibleForTesting
DAG createDag(TezConfiguration tezConf, Path lhs, Path rhs, int numPartitions)
    throws IOException {
  DAG dag = DAG.create(getDagName());
  if (getDefaultExecutionContext() != null) {
    dag.setExecutionContext(getDefaultExecutionContext());
  }

  // Configuration for intermediate output - shared by Vertex1 and Vertex2
  // This should only be setting selective keys from the underlying conf. Fix after there's a
  // better mechanism to configure the IOs. The setFromConfiguration call is optional and allows
  // overriding the config options with command line parameters.
  OrderedPartitionedKVEdgeConfig edgeConf = OrderedPartitionedKVEdgeConfig
      .newBuilder(Text.class.getName(), NullWritable.class.getName(),
          HashPartitioner.class.getName())
      .setFromConfiguration(tezConf)
      .build();

  Vertex lhsVertex = Vertex.create(LHS_INPUT_NAME, ProcessorDescriptor.create(
      ForwardingProcessor.class.getName())).addDataSource("lhs",
      MRInput
          .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
              lhs.toUri().toString()).groupSplits(!isDisableSplitGrouping())
              .generateSplitsInAM(!isGenerateSplitInClient()).build());
  setVertexExecutionContext(lhsVertex, getLhsExecutionContext());

  Vertex rhsVertex = Vertex.create(RHS_INPUT_NAME, ProcessorDescriptor.create(
      ForwardingProcessor.class.getName())).addDataSource("rhs",
      MRInput
          .createConfigBuilder(new Configuration(tezConf), TextInputFormat.class,
              rhs.toUri().toString()).groupSplits(!isDisableSplitGrouping())
              .generateSplitsInAM(!isGenerateSplitInClient()).build());
  setVertexExecutionContext(rhsVertex, getRhsExecutionContext());

  Vertex joinValidateVertex = Vertex.create("joinvalidate", ProcessorDescriptor.create(
      JoinValidateProcessor.class.getName()), numPartitions);
  setVertexExecutionContext(joinValidateVertex, getValidateExecutionContext());

  Edge e1 = Edge.create(lhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());
  Edge e2 = Edge.create(rhsVertex, joinValidateVertex, edgeConf.createDefaultEdgeProperty());

  dag.addVertex(lhsVertex).addVertex(rhsVertex).addVertex(joinValidateVertex).addEdge(e1)
      .addEdge(e2);
  return dag;
}