Java Code Examples for org.apache.hadoop.yarn.api.records.ContainerExitStatus

The following examples show how to use org.apache.hadoop.yarn.api.records.ContainerExitStatus. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: TestProtocolRecords.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNMContainerStatus() {
  ApplicationId appId = ApplicationId.newInstance(123456789, 1);
  ApplicationAttemptId attemptId = ApplicationAttemptId.newInstance(appId, 1);
  ContainerId containerId = ContainerId.newContainerId(attemptId, 1);
  Resource resource = Resource.newInstance(1000, 200, 300);

  NMContainerStatus report =
      NMContainerStatus.newInstance(containerId,
        ContainerState.COMPLETE, resource, "diagnostics",
        ContainerExitStatus.ABORTED, Priority.newInstance(10), 1234);
  NMContainerStatus reportProto =
      new NMContainerStatusPBImpl(
        ((NMContainerStatusPBImpl) report).getProto());
  Assert.assertEquals("diagnostics", reportProto.getDiagnostics());
  Assert.assertEquals(resource, reportProto.getAllocatedResource());
  Assert.assertEquals(ContainerExitStatus.ABORTED,
    reportProto.getContainerExitStatus());
  Assert.assertEquals(ContainerState.COMPLETE,
    reportProto.getContainerState());
  Assert.assertEquals(containerId, reportProto.getContainerId());
  Assert.assertEquals(Priority.newInstance(10), reportProto.getPriority());
  Assert.assertEquals(1234, reportProto.getCreationTime());
}
 
Example 2
Source Project: hadoop   Source File: ApplicationImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public ApplicationState transition(ApplicationImpl app,
    ApplicationEvent event) {
  ApplicationFinishEvent appEvent = (ApplicationFinishEvent)event;
  if (app.containers.isEmpty()) {
    // No container to cleanup. Cleanup app level resources.
    app.handleAppFinishWithContainersCleanedup();
    return ApplicationState.APPLICATION_RESOURCES_CLEANINGUP;
  }

  // Send event to ContainersLauncher to finish all the containers of this
  // application.
  for (ContainerId containerID : app.containers.keySet()) {
    app.dispatcher.getEventHandler().handle(
        new ContainerKillEvent(containerID,
            ContainerExitStatus.KILLED_AFTER_APP_COMPLETION,
            "Container killed on application-finish event: " + appEvent.getDiagnostic()));
  }
  return ApplicationState.FINISHING_CONTAINERS_WAIT;
}
 
Example 3
Source Project: hadoop   Source File: TestContainer.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testKillOnLocalizing() throws Exception {
  WrappedContainer wc = null;
  try {
    wc = new WrappedContainer(14, 314159265358979L, 4344, "yak");
    wc.initContainer();
    assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState());
    wc.killContainer();
    assertEquals(ContainerState.KILLING, wc.c.getContainerState());
    assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
        wc.c.cloneAndGetContainerStatus().getExitStatus());
    assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
        .contains("KillRequest"));
    int killed = metrics.getKilledContainers();
    wc.containerResourcesCleanup();
    assertEquals(ContainerState.DONE, wc.c.getContainerState());
    assertEquals(killed + 1, metrics.getKilledContainers());
  } finally {
    if (wc != null) {
      wc.finished();
    }
  }
}
 
Example 4
Source Project: hadoop   Source File: RMContainerImpl.java    License: Apache License 2.0 6 votes vote down vote up
private static void updateAttemptMetrics(RMContainerImpl container) {
  // If this is a preempted container, update preemption metrics
  Resource resource = container.getContainer().getResource();
  RMAppAttempt rmAttempt = container.rmContext.getRMApps()
      .get(container.getApplicationAttemptId().getApplicationId())
      .getCurrentAppAttempt();
  if (ContainerExitStatus.PREEMPTED == container.finishedStatus
    .getExitStatus()) {
    rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource,
      container);
  }

  if (rmAttempt != null) {
    long usedMillis = container.finishTime - container.creationTime;
    long memorySeconds = resource.getMemory()
                          * usedMillis / DateUtils.MILLIS_PER_SECOND;
    long vcoreSeconds = resource.getVirtualCores()
                         * usedMillis / DateUtils.MILLIS_PER_SECOND;
    long gcoreSeconds = resource.getGpuCores()
                         * usedMillis / DateUtils.MILLIS_PER_SECOND;
    rmAttempt.getRMAppAttemptMetrics()
              .updateAggregateAppResourceUsage(memorySeconds,vcoreSeconds, gcoreSeconds);
  }
}
 
Example 5
Source Project: tez   Source File: AMContainerImpl.java    License: Apache License 2.0 6 votes vote down vote up
private void handleExtraTAAssign(
    AMContainerEventAssignTA event, TezTaskAttemptID currentTaId) {
  setError();
  String errorMessage = "AMScheduler Error: Multiple simultaneous " +
      "taskAttempt allocations to: " + this.getContainerId() +
      ". Attempts: " + currentTaId + ", " + event.getTaskAttemptId() +
      ". Current state: " + this.getState();
  this.maybeSendNodeFailureForFailedAssignment(event.getTaskAttemptId());
  this.sendTerminatingToTaskAttempt(event.getTaskAttemptId(), errorMessage,
      TaskAttemptTerminationCause.FRAMEWORK_ERROR);
  this.sendTerminatingToTaskAttempt(currentTaId, errorMessage,
      TaskAttemptTerminationCause.FRAMEWORK_ERROR);
  this.registerFailedAttempt(event.getTaskAttemptId());
  LOG.warn(errorMessage);
  this.logStopped(ContainerExitStatus.INVALID);
  this.sendStopRequestToNM();
  this.unregisterFromTAListener(ContainerEndReason.FRAMEWORK_ERROR, errorMessage);
  this.unregisterFromContainerListener();
}
 
Example 6
Source Project: big-c   Source File: ApplicationImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public ApplicationState transition(ApplicationImpl app,
    ApplicationEvent event) {
  ApplicationFinishEvent appEvent = (ApplicationFinishEvent)event;
  if (app.containers.isEmpty()) {
    // No container to cleanup. Cleanup app level resources.
    app.handleAppFinishWithContainersCleanedup();
    return ApplicationState.APPLICATION_RESOURCES_CLEANINGUP;
  }

  // Send event to ContainersLauncher to finish all the containers of this
  // application.
  for (ContainerId containerID : app.containers.keySet()) {
    app.dispatcher.getEventHandler().handle(
        new ContainerKillEvent(containerID,
            ContainerExitStatus.KILLED_AFTER_APP_COMPLETION,
            "Container killed on application-finish event: " + appEvent.getDiagnostic()));
  }
  return ApplicationState.FINISHING_CONTAINERS_WAIT;
}
 
Example 7
Source Project: big-c   Source File: TestContainer.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testKillOnNew() throws Exception {
  WrappedContainer wc = null;
  try {
    wc = new WrappedContainer(13, 314159265358979L, 4344, "yak");
    assertEquals(ContainerState.NEW, wc.c.getContainerState());
    int killed = metrics.getKilledContainers();
    wc.killContainer();
    assertEquals(ContainerState.DONE, wc.c.getContainerState());
    assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
        wc.c.cloneAndGetContainerStatus().getExitStatus());
    assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
        .contains("KillRequest"));
    assertEquals(killed + 1, metrics.getKilledContainers());
  } finally {
    if (wc != null) {
      wc.finished();
    }
  }
}
 
Example 8
Source Project: big-c   Source File: TestContainer.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testKillOnLocalizing() throws Exception {
  WrappedContainer wc = null;
  try {
    wc = new WrappedContainer(14, 314159265358979L, 4344, "yak");
    wc.initContainer();
    assertEquals(ContainerState.LOCALIZING, wc.c.getContainerState());
    wc.killContainer();
    assertEquals(ContainerState.KILLING, wc.c.getContainerState());
    assertEquals(ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
        wc.c.cloneAndGetContainerStatus().getExitStatus());
    assertTrue(wc.c.cloneAndGetContainerStatus().getDiagnostics()
        .contains("KillRequest"));
    int killed = metrics.getKilledContainers();
    wc.containerResourcesCleanup();
    assertEquals(ContainerState.DONE, wc.c.getContainerState());
    assertEquals(killed + 1, metrics.getKilledContainers());
  } finally {
    if (wc != null) {
      wc.finished();
    }
  }
}
 
Example 9
Source Project: big-c   Source File: RMContainerImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
  public void transition(RMContainerImpl container, RMContainerEvent event) {
 RMContainerFinishedEvent finishedEvent = (RMContainerFinishedEvent) event;
 //add the suspend time
 container.suspendTime.add(System.currentTimeMillis());
 Resource resource = container.getLastPreemptedResource();  
 container.finishedStatus = finishedEvent.getRemoteContainerStatus();
 container.isSuspending   = true;
 
 //update preempt metrics
 RMAppAttempt rmAttempt = container.rmContext.getRMApps()
         .get(container.getApplicationAttemptId().getApplicationId())
         .getCurrentAppAttempt();
 
 if (ContainerExitStatus.PREEMPTED == container.finishedStatus.getExitStatus()) {
       rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource,container);
     }
}
 
Example 10
Source Project: incubator-tez   Source File: TaskSchedulerEventHandler.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized void containerCompleted(Object task, ContainerStatus containerStatus) {
  // Inform the Containers about completion.
  AMContainer amContainer = appContext.getAllContainers().get(containerStatus.getContainerId());
  if (amContainer != null) {
    String message = null;
    int exitStatus = containerStatus.getExitStatus();
    if (exitStatus == ContainerExitStatus.PREEMPTED) {
      message = "Container preempted externally. ";
    } else if (exitStatus == ContainerExitStatus.DISKS_FAILED) {
      message = "Container disk failed. ";
    } else {
      message = "Container failed. ";
    }
    if (containerStatus.getDiagnostics() != null) {
      message += containerStatus.getDiagnostics();
    }
    sendEvent(new AMContainerEventCompleted(amContainer.getContainerId(), exitStatus, message));
  }
}
 
Example 11
Source Project: incubator-tez   Source File: AMContainerImpl.java    License: Apache License 2.0 6 votes vote down vote up
private void handleExtraTAAssign(
    AMContainerEventAssignTA event, TezTaskAttemptID currentTaId) {
  this.inError = true;
  String errorMessage = "AMScheduler Error: Multiple simultaneous " +
      "taskAttempt allocations to: " + this.getContainerId() +
      ". Attempts: " + currentTaId + ", " + event.getTaskAttemptId() +
      ". Current state: " + this.getState();
  this.maybeSendNodeFailureForFailedAssignment(event.getTaskAttemptId());
  this.sendTerminatingToTaskAttempt(event.getTaskAttemptId(), errorMessage);
  this.sendTerminatingToTaskAttempt(currentTaId, errorMessage);
  this.registerFailedAttempt(event.getTaskAttemptId());
  LOG.warn(errorMessage);
  this.logStopped(ContainerExitStatus.INVALID);
  this.sendStopRequestToNM();
  this.unregisterFromTAListener();
  this.unregisterFromContainerListener();
}
 
Example 12
Source Project: hadoop   Source File: ContainerInfo.java    License: Apache License 2.0 5 votes vote down vote up
public ContainerInfo(final Context nmContext, final Container container,
     String requestUri, String pathPrefix) {

  this.id = container.getContainerId().toString();
  this.nodeId = nmContext.getNodeId().toString();
  ContainerStatus containerData = container.cloneAndGetContainerStatus();
  this.exitCode = containerData.getExitStatus();
  this.exitStatus =
      (this.exitCode == ContainerExitStatus.INVALID) ?
          "N/A" : String.valueOf(exitCode);
  this.state = container.getContainerState().toString();
  this.diagnostics = containerData.getDiagnostics();
  if (this.diagnostics == null || this.diagnostics.isEmpty()) {
    this.diagnostics = "";
  }

  this.user = container.getUser();
  Resource res = container.getResource();
  if (res != null) {
    this.totalMemoryNeededMB = res.getMemory();
    this.totalVCoresNeeded = res.getVirtualCores();
  }
  this.containerLogsShortLink = ujoin("containerlogs", this.id,
      container.getUser());

  if (requestUri == null) {
    requestUri = "";
  }
  if (pathPrefix == null) {
    pathPrefix = "";
  }
  this.containerLogsLink = join(requestUri, pathPrefix,
      this.containerLogsShortLink);
}
 
Example 13
Source Project: hadoop   Source File: ContainerManagerImpl.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void stopContainerInternal(NMTokenIdentifier nmTokenIdentifier,
    ContainerId containerID) throws YarnException, IOException {
  String containerIDStr = containerID.toString();
  Container container = this.context.getContainers().get(containerID);
  LOG.info("Stopping container with container Id: " + containerIDStr);
  authorizeGetAndStopContainerRequest(containerID, container, true,
    nmTokenIdentifier);

  if (container == null) {
    if (!nodeStatusUpdater.isContainerRecentlyStopped(containerID)) {
      throw RPCUtil.getRemoteException("Container " + containerIDStr
        + " is not handled by this NodeManager");
    }
  } else {
    context.getNMStateStore().storeContainerKilled(containerID);
    dispatcher.getEventHandler().handle(
      new ContainerKillEvent(containerID,
          ContainerExitStatus.KILLED_BY_APPMASTER,
          "Container killed by the ApplicationMaster."));

    NMAuditLogger.logSuccess(container.getUser(),    
      AuditConstants.STOP_CONTAINER, "ContainerManageImpl", containerID
        .getApplicationAttemptId().getApplicationId(), containerID);

    // TODO: Move this code to appropriate place once kill_container is
    // implemented.
    nodeStatusUpdater.sendOutofBandHeartBeat();
  }
}
 
Example 14
Source Project: hadoop   Source File: ContainerManagerImpl.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void handle(ContainerManagerEvent event) {
  switch (event.getType()) {
  case FINISH_APPS:
    CMgrCompletedAppsEvent appsFinishedEvent =
        (CMgrCompletedAppsEvent) event;
    for (ApplicationId appID : appsFinishedEvent.getAppsToCleanup()) {
      String diagnostic = "";
      if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.ON_SHUTDOWN) {
        diagnostic = "Application killed on shutdown";
      } else if (appsFinishedEvent.getReason() == CMgrCompletedAppsEvent.Reason.BY_RESOURCEMANAGER) {
        diagnostic = "Application killed by ResourceManager";
      }
      try {
        this.context.getNMStateStore().storeFinishedApplication(appID);
      } catch (IOException e) {
        LOG.error("Unable to update application state in store", e);
      }
      this.dispatcher.getEventHandler().handle(
          new ApplicationFinishEvent(appID,
              diagnostic));
    }
    break;
  case FINISH_CONTAINERS:
    CMgrCompletedContainersEvent containersFinishedEvent =
        (CMgrCompletedContainersEvent) event;
    for (ContainerId container : containersFinishedEvent
        .getContainersToCleanup()) {
        this.dispatcher.getEventHandler().handle(
            new ContainerKillEvent(container,
                ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
                "Container Killed by ResourceManager"));
    }
    break;
  default:
      throw new YarnRuntimeException(
          "Got an unknown ContainerManagerEvent type: " + event.getType());
  }
}
 
Example 15
Source Project: hadoop   Source File: NMMemoryStateStoreService.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public synchronized void storeContainerLaunched(ContainerId containerId)
    throws IOException {
  RecoveredContainerState rcs = getRecoveredContainerState(containerId);
  if (rcs.exitCode != ContainerExitStatus.INVALID) {
    throw new IOException("Container already completed");
  }
  rcs.status = RecoveredContainerStatus.LAUNCHED;
}
 
Example 16
Source Project: hadoop   Source File: RMAppAttemptImpl.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean shouldCountTowardsMaxAttemptRetry() {
  try {
    this.readLock.lock();
    int exitStatus = getAMContainerExitStatus();
    return !(exitStatus == ContainerExitStatus.PREEMPTED
        || exitStatus == ContainerExitStatus.ABORTED
        || exitStatus == ContainerExitStatus.DISKS_FAILED
        || exitStatus == ContainerExitStatus.KILLED_BY_RESOURCEMANAGER);
  } finally {
    this.readLock.unlock();
  }
}
 
Example 17
Source Project: hadoop   Source File: CapacityScheduler.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Process node labels update on a node.
 * 
 * TODO: Currently capacity scheduler will kill containers on a node when
 * labels on the node changed. It is a simply solution to ensure guaranteed
 * capacity on labels of queues. When YARN-2498 completed, we can let
 * preemption policy to decide if such containers need to be killed or just
 * keep them running.
 */
private synchronized void updateLabelsOnNode(NodeId nodeId,
    Set<String> newLabels) {
  FiCaSchedulerNode node = nodes.get(nodeId);
  if (null == node) {
    return;
  }
  
  // labels is same, we don't need do update
  if (node.getLabels().size() == newLabels.size()
      && node.getLabels().containsAll(newLabels)) {
    return;
  }
  
  // Kill running containers since label is changed
  for (RMContainer rmContainer : node.getRunningContainers()) {
    ContainerId containerId = rmContainer.getContainerId();
    completedContainer(rmContainer, 
        ContainerStatus.newInstance(containerId,
            ContainerState.COMPLETE, 
            String.format(
                "Container=%s killed since labels on the node=%s changed",
                containerId.toString(), nodeId.toString()),
            ContainerExitStatus.KILLED_BY_RESOURCEMANAGER),
        RMContainerEventType.KILL);
  }
  
  // Unreserve container on this node
  RMContainer reservedContainer = node.getReservedContainer();
  if (null != reservedContainer) {
    dropContainerReservation(reservedContainer);
  }
  
  // Update node labels after we've done this
  node.updateLabels(newLabels);
}
 
Example 18
Source Project: hadoop   Source File: ApplicationAttemptStateData.java    License: Apache License 2.0 5 votes vote down vote up
public static ApplicationAttemptStateData newInstance(
  ApplicationAttemptId attemptId, Container masterContainer,
  Credentials attemptTokens, long startTime, long memorySeconds,
  long vcoreSeconds, long gcoreSeconds) {
return newInstance(attemptId, masterContainer, attemptTokens,
    startTime, null, "N/A", "", null, ContainerExitStatus.INVALID, 0,
    memorySeconds, vcoreSeconds, gcoreSeconds);
}
 
Example 19
Source Project: hadoop   Source File: TestSchedulerUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateAbnormalContainerStatus() {
  ContainerStatus cd = SchedulerUtils.createAbnormalContainerStatus(
      ContainerId.newContainerId(ApplicationAttemptId.newInstance(
        ApplicationId.newInstance(System.currentTimeMillis(), 1), 1), 1), "x");
  Assert.assertEquals(ContainerExitStatus.ABORTED, cd.getExitStatus());
}
 
Example 20
Source Project: hadoop   Source File: TestSchedulerUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCreatePreemptedContainerStatus() {
  ContainerStatus cd = SchedulerUtils.createPreemptedContainerStatus(
      ContainerId.newContainerId(ApplicationAttemptId.newInstance(
        ApplicationId.newInstance(System.currentTimeMillis(), 1), 1), 1), "x");
  Assert.assertEquals(ContainerExitStatus.PREEMPTED, cd.getExitStatus());
}
 
Example 21
Source Project: hadoop   Source File: NodeInfo.java    License: Apache License 2.0 5 votes vote down vote up
public List<UpdatedContainerInfo> pullContainerUpdates() {
  ArrayList<UpdatedContainerInfo> list = new ArrayList<UpdatedContainerInfo>();
  
  ArrayList<ContainerStatus> list2 = new ArrayList<ContainerStatus>();
  for(ContainerId cId : this.toCleanUpContainers) {
    list2.add(ContainerStatus.newInstance(cId, ContainerState.RUNNING, "", 
      ContainerExitStatus.SUCCESS));
  }
  list.add(new UpdatedContainerInfo(new ArrayList<ContainerStatus>(), 
    list2));
  return list;
}
 
Example 22
Source Project: tez   Source File: TestAMContainer.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test (timeout=5000)
public void testContainerCompletedAtLaunchingSpecificError() {
  WrappedContainer wc = new WrappedContainer();
  List<Event> outgoingEvents;

  wc.launchContainer();


  wc.assignTaskAttempt(wc.taskAttemptID);

  wc.containerCompleted(ContainerExitStatus.ABORTED, TaskAttemptTerminationCause.NODE_FAILED, "NodeFailed");
  wc.verifyState(AMContainerState.COMPLETED);
  verify(wc.tal).registerRunningContainer(wc.containerID, 0);
  verifyUnregisterRunningContainer(wc.tal, wc.containerID, 0, ContainerEndReason.NODE_FAILED,
      "NodeFailed");

  outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
  verifyUnOrderedOutgoingEventTypes(outgoingEvents,
      TaskAttemptEventType.TA_CONTAINER_TERMINATED,
      AMNodeEventType.N_CONTAINER_COMPLETED);
  Assert.assertEquals(TaskAttemptTerminationCause.NODE_FAILED,
      ((TaskAttemptEventContainerTerminated)outgoingEvents.get(0)).getTerminationCause());

  assertFalse(wc.amContainer.isInErrorState());

  // Container launched generated by NM call.
  wc.containerLaunched();
  wc.verifyNoOutgoingEvents();

  assertFalse(wc.amContainer.isInErrorState());
}
 
Example 23
Source Project: big-c   Source File: ContainerManagerImpl.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void stopContainerInternal(NMTokenIdentifier nmTokenIdentifier,
    ContainerId containerID) throws YarnException, IOException {
  String containerIDStr = containerID.toString();
  Container container = this.context.getContainers().get(containerID);
  LOG.info("Stopping container with container Id: " + containerIDStr);
  authorizeGetAndStopContainerRequest(containerID, container, true,
    nmTokenIdentifier);

  if (container == null) {
    if (!nodeStatusUpdater.isContainerRecentlyStopped(containerID)) {
      throw RPCUtil.getRemoteException("Container " + containerIDStr
        + " is not handled by this NodeManager");
    }
  } else {
    context.getNMStateStore().storeContainerKilled(containerID);
    dispatcher.getEventHandler().handle(
      new ContainerKillEvent(containerID,
          ContainerExitStatus.KILLED_BY_APPMASTER,
          "Container killed by the ApplicationMaster."));

    NMAuditLogger.logSuccess(container.getUser(),    
      AuditConstants.STOP_CONTAINER, "ContainerManageImpl", containerID
        .getApplicationAttemptId().getApplicationId(), containerID);

    // TODO: Move this code to appropriate place once kill_container is
    // implemented.
    nodeStatusUpdater.sendOutofBandHeartBeat();
  }
}
 
Example 24
Source Project: big-c   Source File: NMMemoryStateStoreService.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public synchronized void storeContainerLaunched(ContainerId containerId)
    throws IOException {
  RecoveredContainerState rcs = getRecoveredContainerState(containerId);
  if (rcs.exitCode != ContainerExitStatus.INVALID) {
    throw new IOException("Container already completed");
  }
  rcs.status = RecoveredContainerStatus.LAUNCHED;
}
 
Example 25
Source Project: big-c   Source File: RMContainerImpl.java    License: Apache License 2.0 5 votes vote down vote up
private static void updateAttemptMetrics(RMContainerImpl container) {
  // If this is a preempted container, update preemption metrics
  Resource resource = container.getContainer().getResource();
  RMAppAttempt rmAttempt = container.rmContext.getRMApps()
      .get(container.getApplicationAttemptId().getApplicationId())
      .getCurrentAppAttempt();
  if (ContainerExitStatus.PREEMPTED == container.finishedStatus
    .getExitStatus()) {
    rmAttempt.getRMAppAttemptMetrics().updatePreemptionInfo(resource,
      container);
  }
  
  if (rmAttempt != null) {
    long usedMillis = container.finishTime - container.creationTime;
    long memorySeconds = (long)(resource.getMemory()*container.utilization)
                          * usedMillis / DateUtils.MILLIS_PER_SECOND;
    long vcoreSeconds = (long)(resource.getVirtualCores()*container.utilization)
                         * usedMillis / DateUtils.MILLIS_PER_SECOND;
    
    if (container.suspendTime.size() >0 && container.resumeTime.size() >0 && container.suspendTime.size() == container.resumeTime.size()){
    	double acc=0;
    	for(int i=0; i < container.suspendTime.size();i++){
    		
    		acc = acc + (container.resumeTime.get(i) - container.suspendTime.get(i));
    	}
    	container.utilization = acc/usedMillis;  	
    }
    rmAttempt.getRMAppAttemptMetrics()
              .updateAggregateAppResourceUsage(memorySeconds,vcoreSeconds);
  }
}
 
Example 26
Source Project: big-c   Source File: RMAppAttemptImpl.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public boolean shouldCountTowardsMaxAttemptRetry() {
  try {
    this.readLock.lock();
    int exitStatus = getAMContainerExitStatus();
    return !(exitStatus == ContainerExitStatus.PREEMPTED
        || exitStatus == ContainerExitStatus.ABORTED
        || exitStatus == ContainerExitStatus.DISKS_FAILED
        || exitStatus == ContainerExitStatus.KILLED_BY_RESOURCEMANAGER);
  } finally {
    this.readLock.unlock();
  }
}
 
Example 27
Source Project: big-c   Source File: CapacityScheduler.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Process node labels update on a node.
 * 
 * TODO: Currently capacity scheduler will kill containers on a node when
 * labels on the node changed. It is a simply solution to ensure guaranteed
 * capacity on labels of queues. When YARN-2498 completed, we can let
 * preemption policy to decide if such containers need to be killed or just
 * keep them running.
 */
private synchronized void updateLabelsOnNode(NodeId nodeId,
    Set<String> newLabels) {
  FiCaSchedulerNode node = nodes.get(nodeId);
  if (null == node) {
    return;
  }
  
  // labels is same, we don't need do update
  if (node.getLabels().size() == newLabels.size()
      && node.getLabels().containsAll(newLabels)) {
    return;
  }
  
  // Kill running containers since label is changed
  for (RMContainer rmContainer : node.getRunningContainers()) {
    ContainerId containerId = rmContainer.getContainerId();
    completedContainer(rmContainer, 
        ContainerStatus.newInstance(containerId,
            ContainerState.COMPLETE, 
            String.format(
                "Container=%s killed since labels on the node=%s changed",
                containerId.toString(), nodeId.toString()),
            ContainerExitStatus.KILLED_BY_RESOURCEMANAGER),
        RMContainerEventType.KILL);
  }
  
  // Unreserve container on this node
  RMContainer reservedContainer = node.getReservedContainer();
  if (null != reservedContainer) {
    dropContainerReservation(reservedContainer);
  }
  
  // Update node labels after we've done this
  node.updateLabels(newLabels);
}
 
Example 28
Source Project: big-c   Source File: ApplicationAttemptStateData.java    License: Apache License 2.0 5 votes vote down vote up
public static ApplicationAttemptStateData newInstance(
  ApplicationAttemptId attemptId, Container masterContainer,
  Credentials attemptTokens, long startTime, long memorySeconds,
  long vcoreSeconds) {
return newInstance(attemptId, masterContainer, attemptTokens,
    startTime, null, "N/A", "", null, ContainerExitStatus.INVALID, 0,
    memorySeconds, vcoreSeconds);
}
 
Example 29
Source Project: tez   Source File: TestAMContainer.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("rawtypes")
@Test (timeout=5000)
public void testContainerCompletedAtLaunchingSpecificClusterError() {
  WrappedContainer wc = new WrappedContainer();
  List<Event> outgoingEvents;

  wc.launchContainer();

  wc.assignTaskAttempt(wc.taskAttemptID);

  wc.containerCompleted(ContainerExitStatus.DISKS_FAILED, TaskAttemptTerminationCause.NODE_DISK_ERROR, "DiskFailed");
  wc.verifyState(AMContainerState.COMPLETED);
  verify(wc.tal).registerRunningContainer(wc.containerID, 0);
  verifyUnregisterRunningContainer(wc.tal, wc.containerID, 0, ContainerEndReason.OTHER, "DiskFailed");

  outgoingEvents = wc.verifyCountAndGetOutgoingEvents(2);
  verifyUnOrderedOutgoingEventTypes(outgoingEvents,
      TaskAttemptEventType.TA_CONTAINER_TERMINATED_BY_SYSTEM,
      AMNodeEventType.N_CONTAINER_COMPLETED);
  Assert.assertEquals(TaskAttemptTerminationCause.NODE_DISK_ERROR,
      ((TaskAttemptEventContainerTerminatedBySystem)outgoingEvents.get(0)).getTerminationCause());

  assertFalse(wc.amContainer.isInErrorState());

  // Container launched generated by NM call.
  wc.containerLaunched();
  wc.verifyNoOutgoingEvents();

  assertFalse(wc.amContainer.isInErrorState());
}
 
Example 30
Source Project: big-c   Source File: TestSchedulerUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateAbnormalContainerStatus() {
  ContainerStatus cd = SchedulerUtils.createAbnormalContainerStatus(
      ContainerId.newContainerId(ApplicationAttemptId.newInstance(
        ApplicationId.newInstance(System.currentTimeMillis(), 1), 1), 1), "x");
  Assert.assertEquals(ContainerExitStatus.ABORTED, cd.getExitStatus());
}