org.apache.hadoop.yarn.api.records.ContainerExitStatus Java Examples

The following examples show how to use org.apache.hadoop.yarn.api.records.ContainerExitStatus. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestContainer.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testKillOnNew() throws Exception {
  WrappedContainer wc = null;
  try {
    wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
    assertEquals(ContainerState.NEW, wc.c.getContainerState());
    int killed = metrics.getKilledContainers();
    wc.killContainer();
    assertEquals(ContainerState.DONE, wc.c.getContainerState());
    assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
        wc.c.cloneAndGetContainerStatus().getExitStatus());
    assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
        .contains("KillRequest"));
    assertEquals(killed + 1, metrics.getKilledContainers());
  } finally {
    if (wc != null) {
      wc.finished();
    }
  }
}
 
Example #2
Source File: TestContainer.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testKillOnLocalizing() throws Exception {
  WrappedContainer wc = null;
  try {
    wc = new WrappedContainer(14, 314159265358979L, 4344, "yak");
    wc.initContainer();
    assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState());
    wc.killContainer();
    assertEquals(ContainerState.KILLING, wc.c.getContainerState());
    assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
        wc.c.cloneAndGetContainerStatus().getExitStatus());
    assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
        .contains("KillRequest"));
    int killed = metrics.getKilledContainers();
    wc.containerResourcesCleanup();
    assertEquals(ContainerState.DONE, wc.c.getContainerState());
    assertEquals(killed + 1, metrics.getKilledContainers());
  } finally {
    if (wc != null) {
      wc.finished();
    }
  }
}
 
Example #3
Source File: RMContainerImpl.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
  public void transition(RMContainerImpl container, RMContainerEvent event) {
 RMContainerFinishedEvent finishedEvent = (RMContainerFinishedEvent) event;
 //add the suspend time
 container.suspendTime.add(System.currentTimeMillis());
 Resource resource = container.getLastPreemptedResource();  
 container.finishedStatus = finishedEvent.getRemoteContainerStatus();
 container.isSuspending   = true;
 
 //update preempt metrics
 RMAppAttempt rmAttempt = container.rmContext.getRMApps()
         .get(container.getApplicationAttemptId().getApplicationId())
         .getCurrentAppAttempt();
 
 if (ContainerExitStatus.PREEMPTED == container.finishedStatus.getExitStatus()) {
       rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource,container);
     }
}
 
Example #4
Source File: ApplicationImpl.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public ApplicationState transition(ApplicationImpl app,
    ApplicationEvent event) {
  ApplicationFinishEvent appEvent = (ApplicationFinishEvent)event;
  if (app.containers.isEmpty()) {
    // No container to cleanup. Cleanup app level resources.
    app.handleAppFinishWithContainersCleanedup();
    return ApplicationState.APPLICATION_RESOURCES_CLEANINGUP;
  }

  // Send event to ContainersLauncher to finish all the containers of this
  // application.
  for (ContainerId containerID : app.containers.keySet()) {
    app.dispatcher.getEventHandler().handle(
        new ContainerKillEvent(containerID,
            ContainerExitStatus.KILLED_AFTER_APP_COMPLETION,
            "Container killed on application-finish event: " + appEvent.getDiagnostic()));
  }
  return ApplicationState.FINISHING_CONTAINERS_WAIT;
}
 
Example #5
Source File: AMContainerImpl.java    From tez with Apache License 2.0 6 votes vote down vote up
private void handleExtraTAAssign(
    AMContainerEventAssignTA event, TezTaskAttemptID currentTaId) {
  setError();
  String errorMessage = "AMScheduler Error: Multiple simultaneous " +
      "taskAttempt allocations to: " + this.getContainerId() +
      ". Attempts: " + currentTaId + ", " + event.getTaskAttemptId() +
      ". Current state: " + this.getState();
  this.maybeSendNodeFailureForFailedAssignment(event.getTaskAttemptId());
  this.sendTerminatingToTaskAttempt(event.getTaskAttemptId(), errorMessage,
      TaskAttemptTerminationCause.FRAMEWORK_ERROR);
  this.sendTerminatingToTaskAttempt(currentTaId, errorMessage,
      TaskAttemptTerminationCause.FRAMEWORK_ERROR);
  this.registerFailedAttempt(event.getTaskAttemptId());
  LOG.warn(errorMessage);
  this.logStopped(ContainerExitStatus.INVALID);
  this.sendStopRequestToNM();
  this.unregisterFromTAListener(ContainerEndReason.FRAMEWORK_ERROR, errorMessage);
  this.unregisterFromContainerListener();
}
 
Example #6
Source File: TaskSchedulerEventHandler.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized void containerCompleted(Object task, ContainerStatus containerStatus) {
  // Inform the Containers about completion.
  AMContainer amContainer = appContext.getAllContainers().get(containerStatus.getContainerId());
  if (amContainer != null) {
    String message = null;
    int exitStatus = containerStatus.getExitStatus();
    if (exitStatus == ContainerExitStatus.PREEMPTED) {
      message = "Container preempted externally. ";
    } else if (exitStatus == ContainerExitStatus.DISKS_FAILED) {
      message = "Container disk failed. ";
    } else {
      message = "Container failed. ";
    }
    if (containerStatus.getDiagnostics() != null) {
      message += containerStatus.getDiagnostics();
    }
    sendEvent(new AMContainerEventCompleted(amContainer.getContainerId(), exitStatus, message));
  }
}
 
Example #7
Source File: AMContainerImpl.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
private void handleExtraTAAssign(
    AMContainerEventAssignTA event, TezTaskAttemptID currentTaId) {
  this.inError = true;
  String errorMessage = "AMScheduler Error: Multiple simultaneous " +
      "taskAttempt allocations to: " + this.getContainerId() +
      ". Attempts: " + currentTaId + ", " + event.getTaskAttemptId() +
      ". Current state: " + this.getState();
  this.maybeSendNodeFailureForFailedAssignment(event.getTaskAttemptId());
  this.sendTerminatingToTaskAttempt(event.getTaskAttemptId(), errorMessage);
  this.sendTerminatingToTaskAttempt(currentTaId, errorMessage);
  this.registerFailedAttempt(event.getTaskAttemptId());
  LOG.warn(errorMessage);
  this.logStopped(ContainerExitStatus.INVALID);
  this.sendStopRequestToNM();
  this.unregisterFromTAListener();
  this.unregisterFromContainerListener();
}
 
Example #8
Source File: RMContainerImpl.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private static void updateAttemptMetrics(RMContainerImpl container) {
  // If this is a preempted container, update preemption metrics
  Resource resource = container.getContainer().getResource();
  RMAppAttempt rmAttempt = container.rmContext.getRMApps()
      .get(container.getApplicationAttemptId().getApplicationId())
      .getCurrentAppAttempt();
  if (ContainerExitStatus.PREEMPTED == container.finishedStatus
    .getExitStatus()) {
    rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource,
      container);
  }

  if (rmAttempt != null) {
    long usedMillis = container.finishTime - container.creationTime;
    long memorySeconds = resource.getMemory()
                          * usedMillis / DateUtils.MILLIS_PER_SECOND;
    long vcoreSeconds = resource.getVirtualCores()
                         * usedMillis / DateUtils.MILLIS_PER_SECOND;
    long gcoreSeconds = resource.getGpuCores()
                         * usedMillis / DateUtils.MILLIS_PER_SECOND;
    rmAttempt.getRMAppAttemptMetrics()
              .updateAggregateAppResourceUsage(memorySeconds,vcoreSeconds, gcoreSeconds);
  }
}
 
Example #9
Source File: TestProtocolRecords.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testNMContainerStatus() {
  ApplicationId appId = ApplicationId.newInstance(123456789, 1);
  ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
  ContainerId containerId = ContainerId.newContainerId(attemptId, 1);
  Resource resource = Resource.newInstance(1000, 200, 300);

  NMContainerStatus report =
      NMContainerStatus.newInstance(containerId,
        ContainerState.COMPLETE, resource, "diagnostics",
        ContainerExitStatus.ABORTED, Priority.newInstance(10), 1234);
  NMContainerStatus reportProto =
      new NMContainerStatusPBImpl(
        ((NMContainerStatusPBImpl) report).getProto());
  Assert.assertEquals("diagnostics", reportProto.getDiagnostics());
  Assert.assertEquals(resource, reportProto.getAllocatedResource());
  Assert.assertEquals(ContainerExitStatus.ABORTED,
    reportProto.getContainerExitStatus());
  Assert.assertEquals(ContainerState.COMPLETE,
    reportProto.getContainerState());
  Assert.assertEquals(containerId, reportProto.getContainerId());
  Assert.assertEquals(Priority.newInstance(10), reportProto.getPriority());
  Assert.assertEquals(1234, reportProto.getCreationTime());
}
 
Example #10
Source File: TestContainer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testKillOnLocalizing() throws Exception {
  WrappedContainer wc = null;
  try {
    wc = new WrappedContainer(14, 314159265358979L, 4344, "yak");
    wc.initContainer();
    assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState());
    wc.killContainer();
    assertEquals(ContainerState.KILLING, wc.c.getContainerState());
    assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
        wc.c.cloneAndGetContainerStatus().getExitStatus());
    assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
        .contains("KillRequest"));
    int killed = metrics.getKilledContainers();
    wc.containerResourcesCleanup();
    assertEquals(ContainerState.DONE, wc.c.getContainerState());
    assertEquals(killed + 1, metrics.getKilledContainers());
  } finally {
    if (wc != null) {
      wc.finished();
    }
  }
}
 
Example #11
Source File: ApplicationImpl.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public ApplicationState transition(ApplicationImpl app,
    ApplicationEvent event) {
  ApplicationFinishEvent appEvent = (ApplicationFinishEvent)event;
  if (app.containers.isEmpty()) {
    // No container to cleanup. Cleanup app level resources.
    app.handleAppFinishWithContainersCleanedup();
    return ApplicationState.APPLICATION_RESOURCES_CLEANINGUP;
  }

  // Send event to ContainersLauncher to finish all the containers of this
  // application.
  for (ContainerId containerID : app.containers.keySet()) {
    app.dispatcher.getEventHandler().handle(
        new ContainerKillEvent(containerID,
            ContainerExitStatus.KILLED_AFTER_APP_COMPLETION,
            "Container killed on application-finish event: " + appEvent.getDiagnostic()));
  }
  return ApplicationState.FINISHING_CONTAINERS_WAIT;
}
 
Example #12
Source File: AbstractApplicationMaster.java    From Scribengin with GNU Affero General Public License v3.0 5 votes vote down vote up
public void onContainersCompleted(List<ContainerStatus> statuses) {
  LOG.info("onContainersCompleted");
  for (ContainerStatus status : statuses) {
    assert (status.getState() == ContainerState.COMPLETE);

    int exitStatus = status.getExitStatus();
    if (exitStatus != ContainerExitStatus.SUCCESS) {
      if (exitStatus != ContainerExitStatus.ABORTED) {
        failedContainerCount.incrementAndGet();
      }
      allocatedContainerCount.decrementAndGet();
      requestedContainerCount.decrementAndGet();
      recordFailedCommand(status.getContainerId());
    } else {
      completedContainerCount.incrementAndGet();
    }
  }

  int askAgainCount = totalContainerCount - requestedContainerCount.get();
  requestedContainerCount.addAndGet(askAgainCount);

  if (askAgainCount > 0) {
    // need to reallocate failed containers
    for (int i = 0; i < askAgainCount; i++) {
      ContainerRequest req = setupContainerReqForRM();
      resourceManager.addContainerRequest(req);
    }
  }

  if (completedContainerCount.get() == totalContainerCount) {
    done = true;
  }
}
 
Example #13
Source File: TestTaskSchedulerEventHandler.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout = 5000)
public void testContainerDiskFailed() throws IOException {
  Configuration conf = new Configuration(false);
  schedulerHandler.init(conf);
  schedulerHandler.start();
  
  String diagnostics = "NM disk failed.";
  TaskAttemptImpl mockTask = mock(TaskAttemptImpl.class);
  ContainerStatus mockStatus = mock(ContainerStatus.class);
  ContainerId mockCId = mock(ContainerId.class);
  AMContainer mockAMContainer = mock(AMContainer.class);
  when(mockAMContainerMap.get(mockCId)).thenReturn(mockAMContainer);
  when(mockAMContainer.getContainerId()).thenReturn(mockCId);
  when(mockStatus.getContainerId()).thenReturn(mockCId);
  when(mockStatus.getDiagnostics()).thenReturn(diagnostics);
  when(mockStatus.getExitStatus()).thenReturn(ContainerExitStatus.DISKS_FAILED);
  schedulerHandler.containerCompleted(mockTask, mockStatus);
  Assert.assertEquals(1, mockEventHandler.events.size());
  Event event = mockEventHandler.events.get(0);
  Assert.assertEquals(AMContainerEventType.C_COMPLETED, event.getType());
  AMContainerEventCompleted completedEvent = (AMContainerEventCompleted) event;
  Assert.assertEquals(mockCId, completedEvent.getContainerId());
  Assert.assertEquals("Container disk failed. NM disk failed.", 
      completedEvent.getDiagnostics());
  Assert.assertFalse(completedEvent.isPreempted());
  Assert.assertTrue(completedEvent.isDiskFailed());

  schedulerHandler.stop();
  schedulerHandler.close();
}
 
Example #14
Source File: TestSchedulerUtils.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateAbnormalContainerStatus() {
  ContainerStatus cd = SchedulerUtils.createAbnormalContainerStatus(
      ContainerId.newContainerId(ApplicationAttemptId.newInstance(
        ApplicationId.newInstance(System.currentTimeMillis(), 1), 1), 1), "x");
  Assert.assertEquals(ContainerExitStatus.ABORTED, cd.getExitStatus());
}
 
Example #15
Source File: TestAMContainer.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test
public void testContainerDiskFailedAtRunning() {
  WrappedContainer wc = new WrappedContainer();
  List<Event> outgoingEvents;

  wc.launchContainer();

  wc.assignTaskAttempt(wc.taskAttemptID);
  wc.containerLaunched();
  wc.pullTaskToRun();
  wc.verifyState(AMContainerState.RUNNING);

  wc.containerCompleted(ContainerExitStatus.DISKS_FAILED);
  wc.verifyState(AMContainerState.COMPLETED);
  verify(wc.tal).registerRunningContainer(wc.containerID);
  verify(wc.tal).unregisterRunningContainer(wc.containerID);
  verify(wc.chh).register(wc.containerID);
  verify(wc.chh).unregister(wc.containerID);

  outgoingEvents = wc.verifyCountAndGetOutgoingEvents(1);
  verifyUnOrderedOutgoingEventTypes(outgoingEvents,
      TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM);

  assertFalse(wc.amContainer.isInErrorState());

  // Pending task complete. (Ideally, container should be dead at this point
  // and this event should not be generated. Network timeout on NM-RM heartbeat
  // can cause it to be genreated)
  wc.taskAttemptSucceeded(wc.taskAttemptID);
  wc.verifyNoOutgoingEvents();
  wc.verifyHistoryStopEvent();

  assertFalse(wc.amContainer.isInErrorState());
}
 
Example #16
Source File: TaskSchedulerEventHandler.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Override
public void preemptContainer(ContainerId containerId) {
  taskScheduler.deallocateContainer(containerId);
  // Inform the Containers about completion.
  sendEvent(new AMContainerEventCompleted(containerId,
      ContainerExitStatus.PREEMPTED, "Container preempted internally"));
}
 
Example #17
Source File: AMContainerImpl.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Override
public void transition(AMContainerImpl container, AMContainerEvent cEvent) {
  if (container.pendingAttempt != null) {
    container.sendTerminatingToTaskAttempt(container.pendingAttempt,
        getMessage(container, cEvent));
  }
  container.unregisterFromTAListener();
  container.logStopped(container.pendingAttempt == null ? 
      ContainerExitStatus.SUCCESS 
      : ContainerExitStatus.INVALID);
  container.sendStopRequestToNM();
}
 
Example #18
Source File: AMContainerImpl.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Override
public void transition(AMContainerImpl container, AMContainerEvent cEvent) {
  super.transition(container, cEvent);
  if (container.pendingAttempt != null) {
    container.sendTerminatingToTaskAttempt(container.pendingAttempt,
        "Container " + container.getContainerId() +
            " hit an invalid transition - " + cEvent.getType() + " at " +
            container.getState());
  }
  container.logStopped(ContainerExitStatus.ABORTED);
  container.sendStopRequestToNM();
  container.unregisterFromTAListener();
}
 
Example #19
Source File: TestTaskSchedulerManager.java    From tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout = 5000)
public void testContainerPreempted() throws IOException {
  Configuration conf = new Configuration(false);
  schedulerHandler.init(conf);
  schedulerHandler.start();
  
  String diagnostics = "Container preempted by RM.";
  TaskAttemptImpl mockTask = mock(TaskAttemptImpl.class);
  ContainerStatus mockStatus = mock(ContainerStatus.class);
  ContainerId mockCId = mock(ContainerId.class);
  AMContainer mockAMContainer = mock(AMContainer.class);
  when(mockAMContainerMap.get(mockCId)).thenReturn(mockAMContainer);
  when(mockAMContainer.getContainerId()).thenReturn(mockCId);
  when(mockStatus.getContainerId()).thenReturn(mockCId);
  when(mockStatus.getDiagnostics()).thenReturn(diagnostics);
  when(mockStatus.getExitStatus()).thenReturn(ContainerExitStatus.PREEMPTED);
  schedulerHandler.containerCompleted(0, mockTask, mockStatus);
  assertEquals(1, mockEventHandler.events.size());
  Event event = mockEventHandler.events.get(0);
  assertEquals(AMContainerEventType.C_COMPLETED, event.getType());
  AMContainerEventCompleted completedEvent = (AMContainerEventCompleted) event;
  assertEquals(mockCId, completedEvent.getContainerId());
  assertEquals("Container preempted externally. Container preempted by RM.",
      completedEvent.getDiagnostics());
  assertTrue(completedEvent.isPreempted());
  assertEquals(TaskAttemptTerminationCause.EXTERNAL_PREEMPTION,
      completedEvent.getTerminationCause());
  Assert.assertFalse(completedEvent.isDiskFailed());

  schedulerHandler.stop();
  schedulerHandler.close();
}
 
Example #20
Source File: TestTaskSchedulerEventHandler.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
@Test (timeout = 5000)
public void testContainerPreempted() throws IOException {
  Configuration conf = new Configuration(false);
  schedulerHandler.init(conf);
  schedulerHandler.start();
  
  String diagnostics = "Container preempted by RM.";
  TaskAttemptImpl mockTask = mock(TaskAttemptImpl.class);
  ContainerStatus mockStatus = mock(ContainerStatus.class);
  ContainerId mockCId = mock(ContainerId.class);
  AMContainer mockAMContainer = mock(AMContainer.class);
  when(mockAMContainerMap.get(mockCId)).thenReturn(mockAMContainer);
  when(mockAMContainer.getContainerId()).thenReturn(mockCId);
  when(mockStatus.getContainerId()).thenReturn(mockCId);
  when(mockStatus.getDiagnostics()).thenReturn(diagnostics);
  when(mockStatus.getExitStatus()).thenReturn(ContainerExitStatus.PREEMPTED);
  schedulerHandler.containerCompleted(mockTask, mockStatus);
  Assert.assertEquals(1, mockEventHandler.events.size());
  Event event = mockEventHandler.events.get(0);
  Assert.assertEquals(AMContainerEventType.C_COMPLETED, event.getType());
  AMContainerEventCompleted completedEvent = (AMContainerEventCompleted) event;
  Assert.assertEquals(mockCId, completedEvent.getContainerId());
  Assert.assertEquals("Container preempted externally. Container preempted by RM.", 
      completedEvent.getDiagnostics());
  Assert.assertTrue(completedEvent.isPreempted());
  Assert.assertFalse(completedEvent.isDiskFailed());

  schedulerHandler.stop();
  schedulerHandler.close();
}
 
Example #21
Source File: TestAMContainer.java    From tez with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test (timeout=5000)
public void testContainerCompletedAtLaunchingSpecificClusterError() {
  WrappedContainer wc = new WrappedContainer();
  List<Event> outgoingEvents;

  wc.launchContainer();

  wc.assignTaskAttempt(wc.taskAttemptID);

  wc.containerCompleted(ContainerExitStatus.DISKS_FAILED, TaskAttemptTerminationCause.NODE_DISK_ERROR, "DiskFailed");
  wc.verifyState(AMContainerState.COMPLETED);
  verify(wc.tal).registerRunningContainer(wc.containerID, 0);
  verifyUnregisterRunningContainer(wc.tal, wc.containerID, 0, ContainerEndReason.OTHER, "DiskFailed");

  outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
  verifyUnOrderedOutgoingEventTypes(outgoingEvents,
      TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM,
      AMNodeEventType.N_CONTAINER_COMPLETED);
  Assert.assertEquals(TaskAttemptTerminationCause.NODE_DISK_ERROR,
      ((TaskAttemptEventContainerTerminatedBySystem)outgoingEvents.get(0)).getTerminationCause());

  assertFalse(wc.amContainer.isInErrorState());

  // Container launched generated by NM call.
  wc.containerLaunched();
  wc.verifyNoOutgoingEvents();

  assertFalse(wc.amContainer.isInErrorState());
}
 
Example #22
Source File: ApplicationAttemptStateData.java    From big-c with Apache License 2.0 5 votes vote down vote up
public static ApplicationAttemptStateData newInstance(
  ApplicationAttemptId attemptId, Container masterContainer,
  Credentials attemptTokens, long startTime, long memorySeconds,
  long vcoreSeconds) {
return newInstance(attemptId, masterContainer, attemptTokens,
    startTime, null, "N/A", "", null, ContainerExitStatus.INVALID, 0,
    memorySeconds, vcoreSeconds);
}
 
Example #23
Source File: CapacityScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Process node labels update on a node.
 * 
 * TODO: Currently capacity scheduler will kill containers on a node when
 * labels on the node changed. It is a simply solution to ensure guaranteed
 * capacity on labels of queues. When YARN-2498 completed, we can let
 * preemption policy to decide if such containers need to be killed or just
 * keep them running.
 */
private synchronized void updateLabelsOnNode(NodeId nodeId,
    Set<String> newLabels) {
  FiCaSchedulerNode node = nodes.get(nodeId);
  if (null == node) {
    return;
  }
  
  // labels is same, we don't need do update
  if (node.getLabels().size() == newLabels.size()
      && node.getLabels().containsAll(newLabels)) {
    return;
  }
  
  // Kill running containers since label is changed
  for (RMContainer rmContainer : node.getRunningContainers()) {
    ContainerId containerId = rmContainer.getContainerId();
    completedContainer(rmContainer, 
        ContainerStatus.newInstance(containerId,
            ContainerState.COMPLETE, 
            String.format(
                "Container=%s killed since labels on the node=%s changed",
                containerId.toString(), nodeId.toString()),
            ContainerExitStatus.KILLED_BY_RESOURCEMANAGER),
        RMContainerEventType.KILL);
  }
  
  // Unreserve container on this node
  RMContainer reservedContainer = node.getReservedContainer();
  if (null != reservedContainer) {
    dropContainerReservation(reservedContainer);
  }
  
  // Update node labels after we've done this
  node.updateLabels(newLabels);
}
 
Example #24
Source File: RMAppAttemptImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public boolean shouldCountTowardsMaxAttemptRetry() {
  try {
    this.readLock.lock();
    int exitStatus = getAMContainerExitStatus();
    return !(exitStatus == ContainerExitStatus.PREEMPTED
        || exitStatus == ContainerExitStatus.ABORTED
        || exitStatus == ContainerExitStatus.DISKS_FAILED
        || exitStatus == ContainerExitStatus.KILLED_BY_RESOURCEMANAGER);
  } finally {
    this.readLock.unlock();
  }
}
 
Example #25
Source File: RMContainerImpl.java    From big-c with Apache License 2.0 5 votes vote down vote up
private static void updateAttemptMetrics(RMContainerImpl container) {
  // If this is a preempted container, update preemption metrics
  Resource resource = container.getContainer().getResource();
  RMAppAttempt rmAttempt = container.rmContext.getRMApps()
      .get(container.getApplicationAttemptId().getApplicationId())
      .getCurrentAppAttempt();
  if (ContainerExitStatus.PREEMPTED == container.finishedStatus
    .getExitStatus()) {
    rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource,
      container);
  }
  
  if (rmAttempt != null) {
    long usedMillis = container.finishTime - container.creationTime;
    long memorySeconds = (long)(resource.getMemory()*container.utilization)
                          * usedMillis / DateUtils.MILLIS_PER_SECOND;
    long vcoreSeconds = (long)(resource.getVirtualCores()*container.utilization)
                         * usedMillis / DateUtils.MILLIS_PER_SECOND;
    
    if (container.suspendTime.size() >0 && container.resumeTime.size() >0 && container.suspendTime.size() == container.resumeTime.size()){
    	double acc=0;
    	for(int i=0; i < container.suspendTime.size();i++){
    		
    		acc = acc + (container.resumeTime.get(i) - container.suspendTime.get(i));
    	}
    	container.utilization = acc/usedMillis;  	
    }
    rmAttempt.getRMAppAttemptMetrics()
              .updateAggregateAppResourceUsage(memorySeconds,vcoreSeconds);
  }
}
 
Example #26
Source File: YarnManager.java    From Scribengin with GNU Affero General Public License v3.0 5 votes vote down vote up
public void onContainersCompleted(List<ContainerStatus> statuses) {
  logger.info("Start onContainersCompleted(List<ContainerStatus> statuses)");
  for (ContainerStatus status: statuses) {
    assert (status.getState() == ContainerState.COMPLETE);
    int exitStatus = status.getExitStatus();
    //TODO: update vm descriptor status
    if (exitStatus != ContainerExitStatus.SUCCESS) {
    } else {
    }
  }
  logger.info("Finish onContainersCompleted(List<ContainerStatus> statuses)");
}
 
Example #27
Source File: NMMemoryStateStoreService.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized void storeContainerLaunched(ContainerId containerId)
    throws IOException {
  RecoveredContainerState rcs = getRecoveredContainerState(containerId);
  if (rcs.exitCode != ContainerExitStatus.INVALID) {
    throw new IOException("Container already completed");
  }
  rcs.status = RecoveredContainerStatus.LAUNCHED;
}
 
Example #28
Source File: TestAMContainer.java    From tez with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test (timeout=5000)
public void testContainerDiskFailedAtRunning() {
  WrappedContainer wc = new WrappedContainer();
  List<Event> outgoingEvents;

  wc.launchContainer();

  wc.assignTaskAttempt(wc.taskAttemptID);
  wc.containerLaunched();
  wc.verifyState(AMContainerState.RUNNING);

  wc.containerCompleted(ContainerExitStatus.DISKS_FAILED,
      TaskAttemptTerminationCause.NODE_DISK_ERROR, "NodeDiskError");
  wc.verifyState(AMContainerState.COMPLETED);
  verify(wc.tal).registerRunningContainer(wc.containerID, 0);
  verifyUnregisterRunningContainer(wc.tal, wc.containerID, 0, ContainerEndReason.OTHER, "NodeDiskError");
  verify(wc.chh).register(wc.containerID);
  verify(wc.chh).unregister(wc.containerID);

  outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
  Event event = findEventByType(outgoingEvents, TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM);
  Assert.assertEquals(TaskAttemptTerminationCause.NODE_DISK_ERROR,
      ((TaskAttemptEventContainerTerminatedBySystem)event).getTerminationCause());
  verifyUnOrderedOutgoingEventTypes(outgoingEvents,
      TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM,
      AMNodeEventType.N_CONTAINER_COMPLETED);

  assertFalse(wc.amContainer.isInErrorState());

  // Pending task complete. (Ideally, container should be dead at this point
  // and this event should not be generated. Network timeout on NM-RM heartbeat
  // can cause it to be genreated)
  wc.taskAttemptSucceeded(wc.taskAttemptID);
  wc.verifyNoOutgoingEvents();
  wc.verifyHistoryStopEvent();

  assertFalse(wc.amContainer.isInErrorState());
}
 
Example #29
Source File: AMContainerImpl.java    From tez with Apache License 2.0 5 votes vote down vote up
@Override
public void transition(AMContainerImpl container, AMContainerEvent cEvent) {
  if (container.currentAttempt != null) {
    container.sendTerminatingToTaskAttempt(container.currentAttempt,
        getMessage(container, cEvent), TaskAttemptTerminationCause.CONTAINER_STOPPED);
  }
  container.unregisterFromTAListener(ContainerEndReason.OTHER, getMessage(container, cEvent));
  container.logStopped(container.currentAttempt == null ?
      ContainerExitStatus.SUCCESS 
      : ContainerExitStatus.INVALID);
  container.sendStopRequestToNM();
}
 
Example #30
Source File: RMContainerAllocator.java    From big-c with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
public TaskAttemptEvent createContainerFinishedEvent(ContainerStatus cont,
    TaskAttemptId attemptID) {
  if (cont.getExitStatus() == ContainerExitStatus.ABORTED
      || cont.getExitStatus() == ContainerExitStatus.PREEMPTED) {
    // killed by framework
    return new TaskAttemptEvent(attemptID,
        TaskAttemptEventType.TA_KILL);
  } else {
    return new TaskAttemptEvent(attemptID,
        TaskAttemptEventType.TA_CONTAINER_COMPLETED);
  }
}