org.apache.tez.common.counters.DAGCounter Java Examples

The following examples show how to use org.apache.tez.common.counters.DAGCounter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TaskAttemptImpl.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
private static DAGEventCounterUpdate createJobCounterUpdateEventTATerminated(
      TaskAttemptImpl taskAttempt, boolean taskAlreadyCompleted,
      TaskAttemptStateInternal taState) {
    DAGEventCounterUpdate jce =
        new DAGEventCounterUpdate(
            taskAttempt.getDAGID());

    if (taState == TaskAttemptStateInternal.FAILED) {
      jce.addCounterUpdate(DAGCounter.NUM_FAILED_TASKS, 1);
    } else if (taState == TaskAttemptStateInternal.KILLED) {
      jce.addCounterUpdate(DAGCounter.NUM_KILLED_TASKS, 1);
    }

//    long slotMillisIncrement = computeSlotMillis(taskAttempt);
//    if (!taskAlreadyCompleted) {
//      // dont double count the elapsed time
//      jce.addCounterUpdate(DAGCounter.SLOTS_MILLIS_TASKS, slotMillisIncrement);
//    }

    return jce;
  }
 
Example #2
Source File: TestAMRecovery.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
 * is not started. History flush happens. AM dies. Once AM is recovered, task 0 is
 * not re-run. Task 1 is re-run. (Broadcast)
 *
 * @throws Exception
 */
@Test(timeout = 120000)
public void testVertexPartiallyFinished_Broadcast() throws Exception {
  DAG dag =
      createDAG("VertexPartiallyFinished_Broadcast", ControlledImmediateStartVertexManager.class,
          DataMovementType.BROADCAST, true);
  TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
  assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
  assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());

  List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
  List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
  printHistoryEvents(historyEvents1, 1);
  printHistoryEvents(historyEvents1, 2);
  // task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
  // attempt 1
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
  assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());

  // task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
  // finished in attempt 2
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
 
Example #3
Source File: TestAMRecovery.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
 * is also done. History flush happens. AM dies. Once AM is recovered, task 0
 * and Task 1 is not re-run. (Broadcast)
 *
 * @throws Exception
 */
@Test(timeout = 120000)
public void testVertexCompletelyFinished_Broadcast() throws Exception {
  DAG dag =
      createDAG("VertexCompletelyFinished_Broadcast", ControlledImmediateStartVertexManager.class,
          DataMovementType.BROADCAST, false);
  TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);

  assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
  assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());

  List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
  List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
  printHistoryEvents(historyEvents1, 1);
  printHistoryEvents(historyEvents1, 2);
  // task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
  // attempt 1
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());

  // task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
  // finished in attempt 2
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
 
Example #4
Source File: TestAMRecovery.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
 * is not started. History flush happens. AM dies. Once AM is recovered, task 0 is
 * not re-run. Task 1 is re-run. (ONE_TO_ONE)
 *
 * @throws Exception
 */
@Test(timeout = 120000)
public void testVertexPartialFinished_One2One() throws Exception {
  DAG dag =
      createDAG("VertexPartialFinished_One2One", ControlledInputReadyVertexManager.class,
          DataMovementType.ONE_TO_ONE, true);
  TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
  assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
  assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());

  List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
  List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
  printHistoryEvents(historyEvents1, 1);
  printHistoryEvents(historyEvents1, 2);
  // task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
  // attempt 1
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
  assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());

  // task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
  // finished in attempt 2
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());

}
 
Example #5
Source File: TestAMRecovery.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
 * is also done. History flush happens. AM dies. Once AM is recovered, task 0
 * and Task 1 is not re-run. (ONE_TO_ONE)
 *
 * @throws Exception
 */
@Test(timeout = 120000)
public void testVertexCompletelyFinished_One2One() throws Exception {
  DAG dag =
      createDAG("VertexCompletelyFinished_One2One", ControlledInputReadyVertexManager.class,
          DataMovementType.ONE_TO_ONE, false);
  TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
  assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
  assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());

  List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
  List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
  printHistoryEvents(historyEvents1, 1);
  printHistoryEvents(historyEvents1, 2);
  // task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
  // attempt 1
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());

  // task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
  // finished in attempt 2
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());

}
 
Example #6
Source File: TestAMRecovery.java    From tez with Apache License 2.0 6 votes vote down vote up
/**
 * Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
 * is not started. History flush happens. AM dies. Once AM is recovered, task 0 is
 * not re-run. Task 1 is re-run. (SCATTER_GATHER)
 *
 * @throws Exception
 */
@Test(timeout = 120000)
public void testVertexPartiallyFinished_ScatterGather() throws Exception {
  DAG dag =
      createDAG("VertexPartiallyFinished_ScatterGather", ControlledShuffleVertexManager.class,
          DataMovementType.SCATTER_GATHER, true);
  TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
  assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
  assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());

  List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
  List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
  printHistoryEvents(historyEvents1, 1);
  printHistoryEvents(historyEvents1, 2);
  // task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
  // attempt 1
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
  assertEquals(0, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());

  // task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
  // finished in attempt 2
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());

}
 
Example #7
Source File: TaskAttemptInfo.java    From tez with Apache License 2.0 6 votes vote down vote up
public final TezCounter getLocalityInfo() {
  Map<String, TezCounter> dataLocalTask = getCounter(DAGCounter.class.getName(),
      DAGCounter.DATA_LOCAL_TASKS.toString());
  Map<String, TezCounter> rackLocalTask = getCounter(DAGCounter.class.getName(),
      DAGCounter.RACK_LOCAL_TASKS.toString());
  Map<String, TezCounter> otherLocalTask = getCounter(DAGCounter.class.getName(),
      DAGCounter.OTHER_LOCAL_TASKS.toString());

  if (!dataLocalTask.isEmpty()) {
    return dataLocalTask.get(DAGCounter.class.getName());
  }

  if (!rackLocalTask.isEmpty()) {
    return rackLocalTask.get(DAGCounter.class.getName());
  }

  if (!otherLocalTask.isEmpty()) {
    return otherLocalTask.get(DAGCounter.class.getName());
  }
  return null;
}
 
Example #8
Source File: TaskAttemptImpl.java    From tez with Apache License 2.0 6 votes vote down vote up
private static DAGEventCounterUpdate createDAGCounterUpdateEventTAFinished(
    TaskAttemptImpl taskAttempt, TaskAttemptState taState) {
  DAGEventCounterUpdate jce =
      new DAGEventCounterUpdate(taskAttempt.getDAGID());

  if (taState == TaskAttemptState.FAILED) {
    jce.addCounterUpdate(DAGCounter.NUM_FAILED_TASKS, 1);
  } else if (taState == TaskAttemptState.KILLED) {
    jce.addCounterUpdate(DAGCounter.NUM_KILLED_TASKS, 1);
  } else if (taState == TaskAttemptState.SUCCEEDED ) {
    jce.addCounterUpdate(DAGCounter.NUM_SUCCEEDED_TASKS, 1);
  }

  long amSideWallClockTimeMs = TimeUnit.NANOSECONDS.toMillis(
      taskAttempt.getDurationNs());
  jce.addCounterUpdate(DAGCounter.WALL_CLOCK_MILLIS, amSideWallClockTimeMs);

  return jce;
}
 
Example #9
Source File: TaskAttemptImpl.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
private static DAGEventCounterUpdate createJobCounterUpdateEventTALaunched(
    TaskAttemptImpl ta) {
  DAGEventCounterUpdate jce =
      new DAGEventCounterUpdate(
          ta.getDAGID()
          );
  jce.addCounterUpdate(DAGCounter.TOTAL_LAUNCHED_TASKS, 1);
  return jce;
}
 
Example #10
Source File: TestAMRecovery.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * Fine-grained recovery task-level, In a vertex (v1), task 0 is done task 1
 * is also done. History flush happens. AM dies. Once AM is recovered, task 0
 * and Task 1 is not re-run. (SCATTER_GATHER)
 *
 * @throws Exception
 */
@Test(timeout = 120000)
public void testVertexCompletelyFinished_ScatterGather() throws Exception {
  DAG dag =
      createDAG("VertexCompletelyFinished_ScatterGather", ControlledShuffleVertexManager.class,
          DataMovementType.SCATTER_GATHER, false);
  TezCounters counters = runDAGAndVerify(dag, DAGStatus.State.SUCCEEDED);
  assertEquals(4, counters.findCounter(DAGCounter.NUM_SUCCEEDED_TASKS).getValue());
  assertEquals(2, counters.findCounter(TestCounter.Counter_1).getValue());
  TezCounter outputCounter = counters.findCounter(TestOutput.COUNTER_NAME, TestOutput.COUNTER_NAME);
  TezCounter inputCounter = counters.findCounter(TestInput.COUNTER_NAME, TestInput.COUNTER_NAME);
  // verify that processor, input and output counters, are all being collected
  Assert.assertTrue(outputCounter.getValue() > 0);
  Assert.assertTrue(inputCounter.getValue() > 0);

  List<HistoryEvent> historyEvents1 = readRecoveryLog(1);
  List<HistoryEvent> historyEvents2 = readRecoveryLog(2);
  printHistoryEvents(historyEvents1, 1);
  printHistoryEvents(historyEvents1, 2);
  // task_0 of v1 is finished in attempt 1, task_1 of v1 is not finished in
  // attempt 1
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 0).size());
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents1, 0, 1).size());

  // task_0 of v1 is finished in attempt 1 and not rerun, task_1 of v1 is
  // finished in attempt 2
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 0).size());
  assertEquals(1, findTaskAttemptFinishedEvent(historyEvents2, 0, 1).size());
}
 
Example #11
Source File: LocalityAnalyzer.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * Compute counter averages for specific vertex
 *
 * @param vertexInfo
 * @param counter
 * @return task attempt details
 */
private TaskAttemptDetails computeAverages(VertexInfo vertexInfo, DAGCounter counter) {
  long totalTime = 0;
  long totalTasks = 0;
  long totalHDFSBytesRead = 0;

  TaskAttemptDetails result = new TaskAttemptDetails();

  for(TaskAttemptInfo attemptInfo : vertexInfo.getTaskAttempts()) {
    Map<String, TezCounter> localityCounter = attemptInfo.getCounter(DAGCounter.class.getName(),
        counter.toString());

    if (!localityCounter.isEmpty() &&
        localityCounter.get(DAGCounter.class.getName()).getValue() > 0) {
      totalTime += attemptInfo.getTimeTaken();
      totalTasks++;

      //get HDFSBytes read counter
      Map<String, TezCounter> hdfsBytesReadCounter = attemptInfo.getCounter(FileSystemCounter
          .class.getName(), FileSystemCounter.HDFS_BYTES_READ.name());
      for(Map.Entry<String, TezCounter> entry : hdfsBytesReadCounter.entrySet()) {
        totalHDFSBytesRead += entry.getValue().getValue();
      }
    }
  }
  if (totalTasks > 0) {
    result.avgRuntime = (totalTime * 1.0f / totalTasks);
    result.avgHDFSBytesRead = (totalHDFSBytesRead * 1.0f / totalTasks);
  }
  return result;
}
 
Example #12
Source File: DAGImpl.java    From tez with Apache License 2.0 5 votes vote down vote up
private void updateCpuCounters() {
  long stopDAGCpuTime = appContext.getCumulativeCPUTime();
  long totalDAGCpuTime = stopDAGCpuTime - startDAGCpuTime;
  long stopDAGGCTime = appContext.getCumulativeGCTime();
  long totalDAGGCTime = stopDAGGCTime - startDAGGCTime;
  dagCounters.findCounter(DAGCounter.AM_CPU_MILLISECONDS).setValue(totalDAGCpuTime);
  dagCounters.findCounter(DAGCounter.AM_GC_TIME_MILLIS).setValue(totalDAGGCTime);
}
 
Example #13
Source File: TaskAttemptImpl.java    From tez with Apache License 2.0 5 votes vote down vote up
private static DAGEventCounterUpdate createDAGCounterUpdateEventTALaunched(
    TaskAttemptImpl ta) {
  DAGEventCounterUpdate dagCounterEvent =
      new DAGEventCounterUpdate(
          ta.getDAGID()
          );
  dagCounterEvent.addCounterUpdate(DAGCounter.TOTAL_LAUNCHED_TASKS, 1);
  return dagCounterEvent;
}
 
Example #14
Source File: TaskAttemptInfo.java    From tez with Apache License 2.0 5 votes vote down vote up
public final boolean isLocalityInfoAvailable() {
  Map<String, TezCounter> dataLocalTask = getCounter(DAGCounter.class.getName(),
      DAGCounter.DATA_LOCAL_TASKS.toString());
  Map<String, TezCounter> rackLocalTask = getCounter(DAGCounter.class.getName(),
      DAGCounter.RACK_LOCAL_TASKS.toString());

  Map<String, TezCounter> otherLocalTask = getCounter(DAGCounter.class.getName(),
      DAGCounter.OTHER_LOCAL_TASKS.toString());

  if (!dataLocalTask.isEmpty() || !rackLocalTask.isEmpty() || !otherLocalTask.isEmpty()) {
    return true;
  }
  return false;
}
 
Example #15
Source File: TestHistoryParser.java    From tez with Apache License 2.0 4 votes vote down vote up
/**
 * Run a failed job and parse the data from ATS
 */
@Test
public void testParserWithFailedJob() throws Exception {
  //Run a job which would fail
  String dagId = runWordCount(WordCount.TokenProcessor.class.getName(), FailProcessor.class
      .getName(), "WordCount-With-Exception", true);

  //Export the data from ATS
  String[] args = { "--dagId=" + dagId, "--downloadDir=" + DOWNLOAD_DIR, "--yarnTimelineAddress=" + yarnTimelineAddress };

  int result = ATSImportTool.process(args);
  assertTrue(result == 0);

  //Parse ATS data
  DagInfo dagInfo = getDagInfo(dagId);
  //Applicable for ATS dataset
  checkConfig(dagInfo);

  //Verify DAGInfo. Verifies vertex, task, taskAttempts in recursive manner
  verifyDagInfo(dagInfo, true);

  //Dag specific
  VertexInfo summationVertex = dagInfo.getVertex(SUMMATION);
  assertTrue(summationVertex.getFailedTasks().size() == 1); //1 task, 4 attempts failed
  assertTrue(summationVertex.getFailedTasks().get(0).getFailedTaskAttempts().size() == 4);
  assertTrue(summationVertex.getStatus().equals(VertexState.FAILED.toString()));

  assertTrue(dagInfo.getFailedVertices().size() == 1);
  assertTrue(dagInfo.getFailedVertices().get(0).getVertexName().equals(SUMMATION));
  assertTrue(dagInfo.getSuccessfullVertices().size() == 1);
  assertTrue(dagInfo.getSuccessfullVertices().get(0).getVertexName().equals(TOKENIZER));

  assertTrue(dagInfo.getStatus().equals(DAGState.FAILED.toString()));

  verifyCounter(dagInfo.getCounter(DAGCounter.NUM_FAILED_TASKS.toString()), null, 4);
  verifyCounter(dagInfo.getCounter(DAGCounter.NUM_SUCCEEDED_TASKS.toString()), null, 1);
  verifyCounter(dagInfo.getCounter(DAGCounter.TOTAL_LAUNCHED_TASKS.toString()), null, 5);

  verifyCounter(dagInfo.getCounter(TaskCounter.INPUT_RECORDS_PROCESSED.toString()),
      "TaskCounter_Tokenizer_INPUT_Input", 10);
  verifyCounter(dagInfo.getCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ.toString()),
      "TaskCounter_Tokenizer_OUTPUT_Summation", 0);
  verifyCounter(dagInfo.getCounter(TaskCounter.OUTPUT_RECORDS.toString()),
      "TaskCounter_Tokenizer_OUTPUT_Summation",
      20); //Every line has 2 words. 10 lines x 2 words = 20
  verifyCounter(dagInfo.getCounter(TaskCounter.SPILLED_RECORDS.toString()),
      "TaskCounter_Tokenizer_OUTPUT_Summation", 20); //Same as above

  for (TaskInfo taskInfo : summationVertex.getTasks()) {
    TaskAttemptInfo lastAttempt = null;
    for (TaskAttemptInfo attemptInfo : taskInfo.getTaskAttempts()) {
      if (lastAttempt != null) {
        // failed attempt should be causal TA of next attempt
        assertTrue(lastAttempt.getTaskAttemptId().equals(attemptInfo.getCreationCausalTA()));
        assertTrue(lastAttempt.getTerminationCause() != null);
      }
      lastAttempt = attemptInfo;
    }
  }

  //TODO: Need to check for SUMMATION vertex counters. Since all attempts are failed, counters are not getting populated.
  //TaskCounter.REDUCE_INPUT_RECORDS

  //Verify if the processor exception is given in diagnostics
  assertTrue(dagInfo.getDiagnostics().contains("Failing this processor for some reason"));

}
 
Example #16
Source File: TestMockDAGAppMaster.java    From tez with Apache License 2.0 4 votes vote down vote up
@Test (timeout = 10000)
public void testBasicCounters() throws Exception {
  TezConfiguration tezconf = new TezConfiguration(defaultConf);
  MockTezClient tezClient = new MockTezClient("testMockAM", tezconf, true, null, null, null,
      null, false, false);
  tezClient.start();

  final String vAName = "A";
  final String vBName = "B";
  final String procCounterName = "Proc";
  final String globalCounterName = "Global";
  DAG dag = DAG.create("testBasicCounters");
  Vertex vA = Vertex.create(vAName, ProcessorDescriptor.create("Proc.class"), 10);
  Vertex vB = Vertex.create(vBName, ProcessorDescriptor.create("Proc.class"), 1);
  dag.addVertex(vA)
      .addVertex(vB)
      .addEdge(
          Edge.create(vA, vB, EdgeProperty.create(DataMovementType.SCATTER_GATHER,
              DataSourceType.PERSISTED, SchedulingType.SEQUENTIAL,
              OutputDescriptor.create("Out"), InputDescriptor.create("In"))));
  TezCounters temp = new TezCounters();
  temp.findCounter(new String(globalCounterName), new String(globalCounterName)).increment(1);
  ByteArrayOutputStream bos = new ByteArrayOutputStream();
  DataOutput out = new DataOutputStream(bos);
  temp.write(out);
  final byte[] payload = bos.toByteArray();

  MockDAGAppMaster mockApp = tezClient.getLocalClient().getMockApp();
  MockContainerLauncher mockLauncher = mockApp.getContainerLauncher();
  mockLauncher.startScheduling(false);
  mockApp.countersDelegate = new CountersDelegate() {
    @Override
    public TezCounters getCounters(TaskSpec taskSpec) {
      String vName = taskSpec.getVertexName();
      TezCounters counters = new TezCounters();
      final DataInputByteBuffer in  = new DataInputByteBuffer();
      in.reset(ByteBuffer.wrap(payload));
      try {
        // this ensures that the serde code path is covered.
        // the internal merges of counters covers the constructor code path.
        counters.readFields(in);
      } catch (IOException e) {
        Assert.fail(e.getMessage());
      }
      counters.findCounter(vName, procCounterName).increment(1);
      for (OutputSpec output : taskSpec.getOutputs()) {
        counters.findCounter(vName, output.getDestinationVertexName()).increment(1);
      }
      for (InputSpec input : taskSpec.getInputs()) {
        counters.findCounter(vName, input.getSourceVertexName()).increment(1);
      }
      return counters;
    }
  };
  mockApp.doSleep = false;
  DAGClient dagClient = tezClient.submitDAG(dag);
  mockLauncher.waitTillContainersLaunched();
  DAGImpl dagImpl = (DAGImpl) mockApp.getContext().getCurrentDAG();
  mockLauncher.startScheduling(true);
  DAGStatus status = dagClient.waitForCompletion();
  Assert.assertEquals(DAGStatus.State.SUCCEEDED, status.getState());
  TezCounters counters = dagImpl.getAllCounters();

  String osName = System.getProperty("os.name").toLowerCase(Locale.ENGLISH);
  if (SystemUtils.IS_OS_LINUX) {
    Assert.assertTrue(counters.findCounter(DAGCounter.AM_CPU_MILLISECONDS).getValue() > 0);
  }

  // verify processor counters
  Assert.assertEquals(10, counters.findCounter(vAName, procCounterName).getValue());
  Assert.assertEquals(1, counters.findCounter(vBName, procCounterName).getValue());
  // verify edge counters
  Assert.assertEquals(10, counters.findCounter(vAName, vBName).getValue());
  Assert.assertEquals(1, counters.findCounter(vBName, vAName).getValue());
  // verify global counters
  Assert.assertEquals(11, counters.findCounter(globalCounterName, globalCounterName).getValue());
  VertexImpl vAImpl = (VertexImpl) dagImpl.getVertex(vAName);
  VertexImpl vBImpl = (VertexImpl) dagImpl.getVertex(vBName);
  TezCounters vACounters = vAImpl.getAllCounters();
  TezCounters vBCounters = vBImpl.getAllCounters();
  String vACounterName = vACounters.findCounter(globalCounterName, globalCounterName).getName();
  String vBCounterName = vBCounters.findCounter(globalCounterName, globalCounterName).getName();
  if (vACounterName != vBCounterName) {
    Assert.fail("String counter name objects dont match despite interning.");
  }
  CounterGroup vaGroup = vACounters.getGroup(globalCounterName);
  String vaGrouName = vaGroup.getName();
  CounterGroup vBGroup = vBCounters.getGroup(globalCounterName);
  String vBGrouName = vBGroup.getName();
  if (vaGrouName != vBGrouName) {
    Assert.fail("String group name objects dont match despite interning.");
  }
  
  tezClient.stop();
}
 
Example #17
Source File: TaskAttemptImpl.java    From tez with Apache License 2.0 4 votes vote down vote up
@Override
public void transition(TaskAttemptImpl ta, TaskAttemptEvent origEvent) {
  TaskAttemptEventSubmitted event = (TaskAttemptEventSubmitted) origEvent;

  AMContainer amContainer = ta.appContext.getAllContainers().get(event.getContainerId());
  Container container = amContainer.getContainer();

  ta.allocationTime = amContainer.getCurrentTaskAttemptAllocationTime();
  ta.container = container;
  ta.containerId = event.getContainerId();
  ta.containerNodeId = container.getNodeId();
  ta.nodeHttpAddress = StringInterner.weakIntern(container.getNodeHttpAddress());
  ta.nodeRackName = StringInterner.weakIntern(RackResolver.resolve(ta.containerNodeId.getHost())
      .getNetworkLocation());
  ta.lastNotifyProgressTimestamp = ta.clock.getTime();

  ta.setLaunchTime();

  // TODO Resolve to host / IP in case of a local address.
  InetSocketAddress nodeHttpInetAddr = NetUtils
      .createSocketAddr(ta.nodeHttpAddress); // TODO: Costly?
  ta.trackerName = StringInterner.weakIntern(nodeHttpInetAddr.getHostName());
  ta.httpPort = nodeHttpInetAddr.getPort();
  ta.sendEvent(createDAGCounterUpdateEventTALaunched(ta));

  LOG.info("TaskAttempt: [" + ta.attemptId + "] submitted."
      + " Is using containerId: [" + ta.containerId + "]" + " on NM: ["
      + ta.containerNodeId + "]");

  // JobHistoryEvent.
  // The started event represents when the attempt was submitted to the executor.
  ta.logJobHistoryAttemptStarted();

  // TODO Remove after HDFS-5098
  // Compute LOCALITY counter for this task.
  if (ta.taskHosts.contains(ta.containerNodeId.getHost())) {
    ta.localityCounter = DAGCounter.DATA_LOCAL_TASKS;
  } else if (ta.taskRacks.contains(ta.nodeRackName)) {
    ta.localityCounter = DAGCounter.RACK_LOCAL_TASKS;
  } else {
    // Not computing this if the task does not have locality information.
    if (ta.getTaskLocationHint() != null) {
      ta.localityCounter = DAGCounter.OTHER_LOCAL_TASKS;
    }
  }

  // Inform the Task
  ta.sendEvent(new TaskEventTALaunched(ta.attemptId));

  if (ta.isSpeculationEnabled()) {
    ta.sendEvent(new SpeculatorEventTaskAttemptStatusUpdate(ta.attemptId, TaskAttemptState.RUNNING,
        ta.launchTime, true));
  }

  ta.sendEvent(
      new AMSchedulerEventTAStateUpdated(ta, TaskScheduler.SchedulerTaskState.SUBMITTED,
          ta.getVertex().getTaskSchedulerIdentifier()));
  ta.taskHeartbeatHandler.register(ta.attemptId);
}
 
Example #18
Source File: LocalityAnalyzer.java    From tez with Apache License 2.0 4 votes vote down vote up
@Override
public void analyze(DagInfo dagInfo) throws TezException {
  for (VertexInfo vertexInfo : dagInfo.getVertices()) {
    String vertexName = vertexInfo.getVertexName();

    Map<String, TezCounter> dataLocalTask = vertexInfo.getCounter(DAGCounter.class.getName(),
        DAGCounter.DATA_LOCAL_TASKS.toString());
    Map<String, TezCounter> rackLocalTask = vertexInfo.getCounter(DAGCounter.class.getName(),
        DAGCounter.RACK_LOCAL_TASKS.toString());

    long dataLocalTasks = 0;
    long rackLocalTasks = 0;

    if (!dataLocalTask.isEmpty()) {
      dataLocalTasks = dataLocalTask.get(DAGCounter.class.getName()).getValue();
    }

    if (!rackLocalTask.isEmpty()) {
      rackLocalTasks = rackLocalTask.get(DAGCounter.class.getName()).getValue();
    }

    long totalVertexTasks = vertexInfo.getNumTasks();

    if (dataLocalTasks > 0 || rackLocalTasks > 0) {
      //compute locality details.
      float dataLocalRatio = dataLocalTasks * 1.0f / totalVertexTasks;
      float rackLocalRatio = rackLocalTasks * 1.0f / totalVertexTasks;
      float othersRatio = (totalVertexTasks - (dataLocalTasks + rackLocalTasks)) * 1.0f /
          totalVertexTasks;

      List<String> record = Lists.newLinkedList();
      record.add(vertexName);
      record.add(totalVertexTasks + "");
      record.add(dataLocalRatio + "");
      record.add(rackLocalRatio + "");
      record.add(othersRatio + "");

      TaskAttemptDetails dataLocalResult = computeAverages(vertexInfo,
          DAGCounter.DATA_LOCAL_TASKS);
      TaskAttemptDetails rackLocalResult = computeAverages(vertexInfo,
          DAGCounter.RACK_LOCAL_TASKS);
      TaskAttemptDetails otherTaskResult = computeAverages(vertexInfo,
          DAGCounter.OTHER_LOCAL_TASKS);

      record.add(dataLocalResult.avgRuntime + "");
      record.add(rackLocalResult.avgRuntime + "");
      record.add(otherTaskResult.avgRuntime + "");

      //Get the number of inputs to this vertex
      record.add(vertexInfo.getInputEdges().size()
          + vertexInfo.getAdditionalInputInfoList().size() + "");

      //Get the avg HDFS bytes read in this vertex for different type of locality
      record.add(dataLocalResult.avgHDFSBytesRead + "");
      record.add(rackLocalResult.avgHDFSBytesRead + "");
      record.add(otherTaskResult.avgHDFSBytesRead + "");

      String recommendation = "";
      if (dataLocalRatio < config.getFloat(DATA_LOCAL_RATIO, DATA_LOCAL_RATIO_DEFAULT)) {
        recommendation = "Data locality is poor for this vertex. Try tuning "
            + TezConfiguration.TEZ_AM_CONTAINER_REUSE_LOCALITY_DELAY_ALLOCATION_MILLIS + ", "
            + TezConfiguration.TEZ_AM_CONTAINER_REUSE_RACK_FALLBACK_ENABLED + ", "
            + TezConfiguration.TEZ_AM_CONTAINER_REUSE_NON_LOCAL_FALLBACK_ENABLED;
      }

      record.add(recommendation);
      csvResult.addRecord(record.toArray(new String[record.size()]));
    }
  }
}
 
Example #19
Source File: TaskAttemptImpl.java    From incubator-tez with Apache License 2.0 4 votes vote down vote up
@Override
public void transition(TaskAttemptImpl ta, TaskAttemptEvent origEvent) {
  TaskAttemptEventStartedRemotely event = (TaskAttemptEventStartedRemotely) origEvent;

  Container container = ta.appContext.getAllContainers()
      .get(event.getContainerId()).getContainer();

  ta.container = container;
  ta.containerId = event.getContainerId();
  ta.containerNodeId = container.getNodeId();
  ta.nodeHttpAddress = StringInterner.weakIntern(container.getNodeHttpAddress());
  ta.nodeRackName = StringInterner.weakIntern(RackResolver.resolve(ta.containerNodeId.getHost())
      .getNetworkLocation());

  ta.launchTime = ta.clock.getTime();

  // TODO Resolve to host / IP in case of a local address.
  InetSocketAddress nodeHttpInetAddr = NetUtils
      .createSocketAddr(ta.nodeHttpAddress); // TODO: Costly?
  ta.trackerName = StringInterner.weakIntern(nodeHttpInetAddr.getHostName());
  ta.httpPort = nodeHttpInetAddr.getPort();
  ta.sendEvent(createJobCounterUpdateEventTALaunched(ta));

  LOG.info("TaskAttempt: [" + ta.attemptId + "] started."
      + " Is using containerId: [" + ta.containerId + "]" + " on NM: ["
      + ta.containerNodeId + "]");

  // JobHistoryEvent
  ta.logJobHistoryAttemptStarted();

  // TODO Remove after HDFS-5098
  // Compute LOCALITY counter for this task.
  if (ta.taskHosts.contains(ta.containerNodeId.getHost())) {
    ta.localityCounter = DAGCounter.DATA_LOCAL_TASKS;
  } else if (ta.taskRacks.contains(ta.nodeRackName)) {
    ta.localityCounter = DAGCounter.RACK_LOCAL_TASKS;
  } else {
    // Not computing this if the task does not have locality information.
    if (ta.getTaskLocationHint() != null) {
      ta.localityCounter = DAGCounter.OTHER_LOCAL_TASKS;
    }
  }

  // Inform the Task
  ta.sendEvent(new TaskEventTAUpdate(ta.attemptId,
      TaskEventType.T_ATTEMPT_LAUNCHED));

  ta.taskHeartbeatHandler.register(ta.attemptId);
}