Java Code Examples for org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils

The following examples show how to use org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: RMServerUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Utility method to validate a list resource requests, by insuring that the
 * requested memory/vcore is non-negative and not greater than max
 */
public static void normalizeAndValidateRequests(List<ResourceRequest> ask,
    Resource maximumResource, String queueName, YarnScheduler scheduler,
    RMContext rmContext)
    throws InvalidResourceRequestException {

  QueueInfo queueInfo = null;
  try {
    queueInfo = scheduler.getQueueInfo(queueName, false, false);
  } catch (IOException e) {
  }

  for (ResourceRequest resReq : ask) {
    SchedulerUtils.normalizeAndvalidateRequest(resReq, maximumResource,
        queueName, scheduler, rmContext, queueInfo);
  }
}
 
Example 2
Source Project: hadoop   Source File: FifoScheduler.java    License: Apache License 2.0 6 votes vote down vote up
private synchronized void removeNode(RMNode nodeInfo) {
  FiCaSchedulerNode node = getNode(nodeInfo.getNodeID());
  if (node == null) {
    return;
  }
  // Kill running containers
  for(RMContainer container : node.getRunningContainers()) {
    completedContainer(container, 
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(), 
            SchedulerUtils.LOST_CONTAINER),
            RMContainerEventType.KILL);
  }
  
  //Remove the node
  this.nodes.remove(nodeInfo.getNodeID());
  updateMaximumAllocation(node, false);
  
  // Update cluster metrics
  Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability());
}
 
Example 3
Source Project: hadoop   Source File: TestRMAppAttemptTransitions.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAMCrashAtScheduled() {
  // This is to test sending CONTAINER_FINISHED event at SCHEDULED state.
  // Verify the state transition is correct.
  scheduleApplicationAttempt();
  ContainerStatus cs =
      SchedulerUtils.createAbnormalContainerStatus(
          BuilderUtils.newContainerId(
              applicationAttempt.getAppAttemptId(), 1),
          SchedulerUtils.LOST_CONTAINER);
  // send CONTAINER_FINISHED event at SCHEDULED state,
  // The state should be FINAL_SAVING with previous state SCHEDULED
  NodeId anyNodeId = NodeId.newInstance("host", 1234);
  applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
      applicationAttempt.getAppAttemptId(), cs, anyNodeId));
  // createApplicationAttemptState will return previous state (SCHEDULED),
  // if the current state is FINAL_SAVING.
  assertEquals(YarnApplicationAttemptState.SCHEDULED,
      applicationAttempt.createApplicationAttemptState());
  // send ATTEMPT_UPDATE_SAVED event,
  // verify the state is changed to state FAILED.
  sendAttemptUpdateSavedEvent(applicationAttempt);
  assertEquals(RMAppAttemptState.FAILED,
      applicationAttempt.getAppAttemptState());
  verifyApplicationAttemptFinished(RMAppAttemptState.FAILED);
}
 
Example 4
Source Project: big-c   Source File: RMServerUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Utility method to validate a list resource requests, by insuring that the
 * requested memory/vcore is non-negative and not greater than max
 */
public static void normalizeAndValidateRequests(List<ResourceRequest> ask,
    Resource maximumResource, String queueName, YarnScheduler scheduler,
    RMContext rmContext)
    throws InvalidResourceRequestException {

  QueueInfo queueInfo = null;
  try {
    queueInfo = scheduler.getQueueInfo(queueName, false, false);
  } catch (IOException e) {
  }

  for (ResourceRequest resReq : ask) {
    SchedulerUtils.normalizeAndvalidateRequest(resReq, maximumResource,
        queueName, scheduler, rmContext, queueInfo);
  }
}
 
Example 5
Source Project: big-c   Source File: FifoScheduler.java    License: Apache License 2.0 6 votes vote down vote up
private synchronized void removeNode(RMNode nodeInfo) {
  FiCaSchedulerNode node = getNode(nodeInfo.getNodeID());
  if (node == null) {
    return;
  }
  // Kill running containers
  for(RMContainer container : node.getRunningContainers()) {
    completedContainer(container, 
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(), 
            SchedulerUtils.LOST_CONTAINER),
            RMContainerEventType.KILL);
  }
  
  //Remove the node
  this.nodes.remove(nodeInfo.getNodeID());
  updateMaximumAllocation(node, false);
  
  // Update cluster metrics
  Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability());
}
 
Example 6
Source Project: big-c   Source File: TestRMAppAttemptTransitions.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAMCrashAtScheduled() {
  // This is to test sending CONTAINER_FINISHED event at SCHEDULED state.
  // Verify the state transition is correct.
  scheduleApplicationAttempt();
  ContainerStatus cs =
      SchedulerUtils.createAbnormalContainerStatus(
          BuilderUtils.newContainerId(
              applicationAttempt.getAppAttemptId(), 1),
          SchedulerUtils.LOST_CONTAINER);
  // send CONTAINER_FINISHED event at SCHEDULED state,
  // The state should be FINAL_SAVING with previous state SCHEDULED
  NodeId anyNodeId = NodeId.newInstance("host", 1234);
  applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
      applicationAttempt.getAppAttemptId(), cs, anyNodeId));
  // createApplicationAttemptState will return previous state (SCHEDULED),
  // if the current state is FINAL_SAVING.
  assertEquals(YarnApplicationAttemptState.SCHEDULED,
      applicationAttempt.createApplicationAttemptState());
  // send ATTEMPT_UPDATE_SAVED event,
  // verify the state is changed to state FAILED.
  sendAttemptUpdateSavedEvent(applicationAttempt);
  assertEquals(RMAppAttemptState.FAILED,
      applicationAttempt.getAppAttemptState());
  verifyApplicationAttemptFinished(RMAppAttemptState.FAILED);
}
 
Example 7
Source Project: hadoop   Source File: FifoScheduler.java    License: Apache License 2.0 5 votes vote down vote up
private synchronized void doneApplicationAttempt(
    ApplicationAttemptId applicationAttemptId,
    RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers)
    throws IOException {
  FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
  SchedulerApplication<FiCaSchedulerApp> application =
      applications.get(applicationAttemptId.getApplicationId());
  if (application == null || attempt == null) {
    throw new IOException("Unknown application " + applicationAttemptId + 
    " has completed!");
  }

  // Kill all 'live' containers
  for (RMContainer container : attempt.getLiveContainers()) {
    if (keepContainers
        && container.getState().equals(RMContainerState.RUNNING)) {
      // do not kill the running container in the case of work-preserving AM
      // restart.
      LOG.info("Skip killing " + container.getContainerId());
      continue;
    }
    completedContainer(container,
      SchedulerUtils.createAbnormalContainerStatus(
        container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION),
      RMContainerEventType.KILL);
  }

  // Clean up pending requests, metrics etc.
  attempt.stop(rmAppAttemptFinalState);
}
 
Example 8
Source Project: hadoop   Source File: FairScheduler.java    License: Apache License 2.0 5 votes vote down vote up
protected void warnOrKillContainer(RMContainer container) {
  ApplicationAttemptId appAttemptId = container.getApplicationAttemptId();
  FSAppAttempt app = getSchedulerApp(appAttemptId);
  FSLeafQueue queue = app.getQueue();
  LOG.info("Preempting container (prio=" + container.getContainer().getPriority() +
      "res=" + container.getContainer().getResource() +
      ") from queue " + queue.getName());
  
  Long time = app.getContainerPreemptionTime(container);

  if (time != null) {
    // if we asked for preemption more than maxWaitTimeBeforeKill ms ago,
    // proceed with kill
    if (time + waitTimeBeforeKill < getClock().getTime()) {
      ContainerStatus status =
        SchedulerUtils.createPreemptedContainerStatus(
          container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER);

      recoverResourceRequestForContainer(container);
      // TODO: Not sure if this ever actually adds this to the list of cleanup
      // containers on the RMNode (see SchedulerNode.releaseContainer()).
      completedContainer(container, status, RMContainerEventType.KILL);
      LOG.info("Killing container" + container +
          " (after waiting for premption for " +
          (getClock().getTime() - time) + "ms)");
    }
  } else {
    // track the request in the FSAppAttempt itself
    app.addPreemption(container, getClock().getTime());
  }
}
 
Example 9
Source Project: hadoop   Source File: FairScheduler.java    License: Apache License 2.0 5 votes vote down vote up
private synchronized void removeNode(RMNode rmNode) {
  FSSchedulerNode node = getFSSchedulerNode(rmNode.getNodeID());
  // This can occur when an UNHEALTHY node reconnects
  if (node == null) {
    return;
  }
  Resources.subtractFrom(clusterResource, rmNode.getTotalCapability());
  updateRootQueueMetrics();

  // Remove running containers
  List<RMContainer> runningContainers = node.getRunningContainers();
  for (RMContainer container : runningContainers) {
    completedContainer(container,
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(),
            SchedulerUtils.LOST_CONTAINER),
        RMContainerEventType.KILL);
  }

  // Remove reservations, if any
  RMContainer reservedContainer = node.getReservedContainer();
  if (reservedContainer != null) {
    completedContainer(reservedContainer,
        SchedulerUtils.createAbnormalContainerStatus(
            reservedContainer.getContainerId(),
            SchedulerUtils.LOST_CONTAINER),
        RMContainerEventType.KILL);
  }

  nodes.remove(rmNode.getNodeID());
  queueMgr.getRootQueue().setSteadyFairShare(clusterResource);
  queueMgr.getRootQueue().recomputeSteadyShares();
  updateMaximumAllocation(node, false);
  LOG.info("Removed node " + rmNode.getNodeAddress() +
      " cluster capacity: " + clusterResource);
}
 
Example 10
Source Project: hadoop   Source File: CapacitySchedulerConfiguration.java    License: Apache License 2.0 5 votes vote down vote up
public Map<AccessType, AccessControlList> getAcls(String queue) {
  Map<AccessType, AccessControlList> acls =
    new HashMap<AccessType, AccessControlList>();
  for (QueueACL acl : QueueACL.values()) {
    acls.put(SchedulerUtils.toAccessType(acl), getAcl(queue, acl));
  }
  return acls;
}
 
Example 11
Source Project: hadoop   Source File: CapacityScheduler.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void dropContainerReservation(RMContainer container) {
  if(LOG.isDebugEnabled()){
    LOG.debug("DROP_RESERVATION:" + container.toString());
  }
  completedContainer(container,
      SchedulerUtils.createAbnormalContainerStatus(
          container.getContainerId(),
          SchedulerUtils.UNRESERVED_CONTAINER),
      RMContainerEventType.KILL);
}
 
Example 12
Source Project: hadoop   Source File: CapacityScheduler.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void killContainer(RMContainer cont) {
  if (LOG.isDebugEnabled()) {
    LOG.debug("KILL_CONTAINER: container" + cont.toString());
  }
  recoverResourceRequestForContainer(cont);
  completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus(
    cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER),
    RMContainerEventType.KILL);
}
 
Example 13
Source Project: big-c   Source File: FifoScheduler.java    License: Apache License 2.0 5 votes vote down vote up
private synchronized void doneApplicationAttempt(
    ApplicationAttemptId applicationAttemptId,
    RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers)
    throws IOException {
  FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
  SchedulerApplication<FiCaSchedulerApp> application =
      applications.get(applicationAttemptId.getApplicationId());
  if (application == null || attempt == null) {
    throw new IOException("Unknown application " + applicationAttemptId + 
    " has completed!");
  }

  // Kill all 'live' containers
  for (RMContainer container : attempt.getLiveContainers()) {
    if (keepContainers
        && container.getState().equals(RMContainerState.RUNNING)) {
      // do not kill the running container in the case of work-preserving AM
      // restart.
      LOG.info("Skip killing " + container.getContainerId());
      continue;
    }
    completedContainer(container,
      SchedulerUtils.createAbnormalContainerStatus(
        container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION),
      RMContainerEventType.KILL);
  }

  // Clean up pending requests, metrics etc.
  attempt.stop(rmAppAttemptFinalState);
}
 
Example 14
Source Project: big-c   Source File: FairScheduler.java    License: Apache License 2.0 5 votes vote down vote up
protected void warnOrKillContainer(RMContainer container) {
  ApplicationAttemptId appAttemptId = container.getApplicationAttemptId();
  FSAppAttempt app = getSchedulerApp(appAttemptId);
  FSLeafQueue queue = app.getQueue();
  LOG.info("Preempting container (prio=" + container.getContainer().getPriority() +
      "res=" + container.getContainer().getResource() +
      ") from queue " + queue.getName());
  
  Long time = app.getContainerPreemptionTime(container);

  if (time != null) {
    // if we asked for preemption more than maxWaitTimeBeforeKill ms ago,
    // proceed with kill
    if (time + waitTimeBeforeKill < getClock().getTime()) {
      ContainerStatus status =
        SchedulerUtils.createPreemptedContainerStatus(
          container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER);

      recoverResourceRequestForContainer(container);
      // TODO: Not sure if this ever actually adds this to the list of cleanup
      // containers on the RMNode (see SchedulerNode.releaseContainer()).
      completedContainer(container, status, RMContainerEventType.KILL);
      LOG.info("Killing container" + container +
          " (after waiting for premption for " +
          (getClock().getTime() - time) + "ms)");
    }
  } else {
    // track the request in the FSAppAttempt itself
    app.addPreemption(container, getClock().getTime());
  }
}
 
Example 15
Source Project: big-c   Source File: FairScheduler.java    License: Apache License 2.0 5 votes vote down vote up
private synchronized void removeNode(RMNode rmNode) {
  FSSchedulerNode node = getFSSchedulerNode(rmNode.getNodeID());
  // This can occur when an UNHEALTHY node reconnects
  if (node == null) {
    return;
  }
  Resources.subtractFrom(clusterResource, rmNode.getTotalCapability());
  updateRootQueueMetrics();

  // Remove running containers
  List<RMContainer> runningContainers = node.getRunningContainers();
  for (RMContainer container : runningContainers) {
    completedContainer(container,
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(),
            SchedulerUtils.LOST_CONTAINER),
        RMContainerEventType.KILL);
  }

  // Remove reservations, if any
  RMContainer reservedContainer = node.getReservedContainer();
  if (reservedContainer != null) {
    completedContainer(reservedContainer,
        SchedulerUtils.createAbnormalContainerStatus(
            reservedContainer.getContainerId(),
            SchedulerUtils.LOST_CONTAINER),
        RMContainerEventType.KILL);
  }

  nodes.remove(rmNode.getNodeID());
  queueMgr.getRootQueue().setSteadyFairShare(clusterResource);
  queueMgr.getRootQueue().recomputeSteadyShares();
  updateMaximumAllocation(node, false);
  LOG.info("Removed node " + rmNode.getNodeAddress() +
      " cluster capacity: " + clusterResource);
}
 
Example 16
Source Project: big-c   Source File: CapacitySchedulerConfiguration.java    License: Apache License 2.0 5 votes vote down vote up
public Map<AccessType, AccessControlList> getAcls(String queue) {
  Map<AccessType, AccessControlList> acls =
    new HashMap<AccessType, AccessControlList>();
  for (QueueACL acl : QueueACL.values()) {
    acls.put(SchedulerUtils.toAccessType(acl), getAcl(queue, acl));
  }
  return acls;
}
 
Example 17
Source Project: big-c   Source File: CapacityScheduler.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void dropContainerReservation(RMContainer container) {
  if(LOG.isDebugEnabled()){
    LOG.debug("DROP_RESERVATION:" + container.toString());
  }
  completedContainer(container,
      SchedulerUtils.createAbnormalContainerStatus(
          container.getContainerId(),
          SchedulerUtils.UNRESERVED_CONTAINER),
      RMContainerEventType.KILL);
}
 
Example 18
Source Project: big-c   Source File: CapacityScheduler.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void killContainer(RMContainer cont) {
  if (LOG.isDebugEnabled()) {
    LOG.debug("KILL_CONTAINER: container" + cont.toString());
  }
  recoverResourceRequestForContainer(cont);
  completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus(
  	      cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER),
  	      RMContainerEventType.KILL);
}
 
Example 19
Source Project: hadoop   Source File: FifoScheduler.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public Allocation allocate(
    ApplicationAttemptId applicationAttemptId, List<ResourceRequest> ask,
    List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) {
  FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId);
  if (application == null) {
    LOG.error("Calling allocate on removed " +
        "or non existant application " + applicationAttemptId);
    return EMPTY_ALLOCATION;
  }

  // Sanity check
  SchedulerUtils.normalizeRequests(ask, resourceCalculator, 
      clusterResource, minimumAllocation, getMaximumResourceCapability());

  // Release containers
  releaseContainers(release, application);

  synchronized (application) {

    // make sure we aren't stopping/removing the application
    // when the allocate comes in
    if (application.isStopped()) {
      LOG.info("Calling allocate on a stopped " +
          "application " + applicationAttemptId);
      return EMPTY_ALLOCATION;
    }

    if (!ask.isEmpty()) {
      LOG.debug("allocate: pre-update" +
          " applicationId=" + applicationAttemptId + 
          " application=" + application);
      application.showRequests();

      // Update application requests
      application.updateResourceRequests(ask);

      LOG.debug("allocate: post-update" +
          " applicationId=" + applicationAttemptId + 
          " application=" + application);
      application.showRequests();

      LOG.debug("allocate:" +
          " applicationId=" + applicationAttemptId + 
          " #ask=" + ask.size());
    }

    application.updateBlacklist(blacklistAdditions, blacklistRemovals);
    ContainersAndNMTokensAllocation allocation =
        application.pullNewlyAllocatedContainersAndNMTokens();
    Resource headroom = application.getHeadroom();
    application.setApplicationHeadroomForMetrics(headroom);
    return new Allocation(allocation.getContainerList(), headroom, null,
        null, null, allocation.getNMTokenList());
  }
}
 
Example 20
Source Project: hadoop   Source File: FifoScheduler.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void handle(SchedulerEvent event) {
  switch(event.getType()) {
  case NODE_ADDED:
  {
    NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
    addNode(nodeAddedEvent.getAddedRMNode());
    recoverContainersOnNode(nodeAddedEvent.getContainerReports(),
      nodeAddedEvent.getAddedRMNode());

  }
  break;
  case NODE_REMOVED:
  {
    NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event;
    removeNode(nodeRemovedEvent.getRemovedRMNode());
  }
  break;
  case NODE_RESOURCE_UPDATE:
  {
    NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = 
        (NodeResourceUpdateSchedulerEvent)event;
    updateNodeResource(nodeResourceUpdatedEvent.getRMNode(),
      nodeResourceUpdatedEvent.getResourceOption());
  }
  break;
  case NODE_UPDATE:
  {
    NodeUpdateSchedulerEvent nodeUpdatedEvent = 
    (NodeUpdateSchedulerEvent)event;
    nodeUpdate(nodeUpdatedEvent.getRMNode());
  }
  break;
  case APP_ADDED:
  {
    AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
    addApplication(appAddedEvent.getApplicationId(),
      appAddedEvent.getQueue(), appAddedEvent.getUser(),
      appAddedEvent.getIsAppRecovering());
  }
  break;
  case APP_REMOVED:
  {
    AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event;
    doneApplication(appRemovedEvent.getApplicationID(),
      appRemovedEvent.getFinalState());
  }
  break;
  case APP_ATTEMPT_ADDED:
  {
    AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
        (AppAttemptAddedSchedulerEvent) event;
    addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
      appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
      appAttemptAddedEvent.getIsAttemptRecovering());
  }
  break;
  case APP_ATTEMPT_REMOVED:
  {
    AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
        (AppAttemptRemovedSchedulerEvent) event;
    try {
      doneApplicationAttempt(
        appAttemptRemovedEvent.getApplicationAttemptID(),
        appAttemptRemovedEvent.getFinalAttemptState(),
        appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
    } catch(IOException ie) {
      LOG.error("Unable to remove application "
          + appAttemptRemovedEvent.getApplicationAttemptID(), ie);
    }
  }
  break;
  case CONTAINER_EXPIRED:
  {
    ContainerExpiredSchedulerEvent containerExpiredEvent = 
        (ContainerExpiredSchedulerEvent) event;
    ContainerId containerid = containerExpiredEvent.getContainerId();
    completedContainer(getRMContainer(containerid), 
        SchedulerUtils.createAbnormalContainerStatus(
            containerid, 
            SchedulerUtils.EXPIRED_CONTAINER),
        RMContainerEventType.EXPIRE);
  }
  break;
  default:
    LOG.error("Invalid eventtype " + event.getType() + ". Ignoring!");
  }
}
 
Example 21
Source Project: hadoop   Source File: FairScheduler.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public Allocation allocate(ApplicationAttemptId appAttemptId,
    List<ResourceRequest> ask, List<ContainerId> release,
    List<String> blacklistAdditions, List<String> blacklistRemovals) {

  // Make sure this application exists
  FSAppAttempt application = getSchedulerApp(appAttemptId);
  if (application == null) {
    LOG.info("Calling allocate on removed " +
        "or non existant application " + appAttemptId);
    return EMPTY_ALLOCATION;
  }

  // Sanity check
  SchedulerUtils.normalizeRequests(ask, DOMINANT_RESOURCE_CALCULATOR,
      clusterResource, minimumAllocation, getMaximumResourceCapability(),
      incrAllocation);

  // Set amResource for this app
  if (!application.getUnmanagedAM() && ask.size() == 1
      && application.getLiveContainers().isEmpty()) {
    application.setAMResource(ask.get(0).getCapability());
  }

  // Release containers
  releaseContainers(release, application);

  synchronized (application) {
    if (!ask.isEmpty()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("allocate: pre-update" +
            " applicationAttemptId=" + appAttemptId +
            " application=" + application.getApplicationId());
      }
      application.showRequests();

      // Update application requests
      application.updateResourceRequests(ask);

      application.showRequests();
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("allocate: post-update" +
          " applicationAttemptId=" + appAttemptId +
          " #ask=" + ask.size() +
          " reservation= " + application.getCurrentReservation());

      LOG.debug("Preempting " + application.getPreemptionContainers().size()
          + " container(s)");
    }
    
    Set<ContainerId> preemptionContainerIds = new HashSet<ContainerId>();
    for (RMContainer container : application.getPreemptionContainers()) {
      preemptionContainerIds.add(container.getContainerId());
    }

    application.updateBlacklist(blacklistAdditions, blacklistRemovals);
    ContainersAndNMTokensAllocation allocation =
        application.pullNewlyAllocatedContainersAndNMTokens();
    Resource headroom = application.getHeadroom();
    application.setApplicationHeadroomForMetrics(headroom);
    return new Allocation(allocation.getContainerList(), headroom,
        preemptionContainerIds, null, null, allocation.getNMTokenList());
  }
}
 
Example 22
Source Project: hadoop   Source File: FairScheduler.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void handle(SchedulerEvent event) {
  switch (event.getType()) {
  case NODE_ADDED:
    if (!(event instanceof NodeAddedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
    addNode(nodeAddedEvent.getAddedRMNode());
    recoverContainersOnNode(nodeAddedEvent.getContainerReports(),
        nodeAddedEvent.getAddedRMNode());
    break;
  case NODE_REMOVED:
    if (!(event instanceof NodeRemovedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event;
    removeNode(nodeRemovedEvent.getRemovedRMNode());
    break;
  case NODE_UPDATE:
    if (!(event instanceof NodeUpdateSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
    nodeUpdate(nodeUpdatedEvent.getRMNode());
    break;
  case APP_ADDED:
    if (!(event instanceof AppAddedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
    String queueName =
        resolveReservationQueueName(appAddedEvent.getQueue(),
            appAddedEvent.getApplicationId(),
            appAddedEvent.getReservationID());
    if (queueName != null) {
      addApplication(appAddedEvent.getApplicationId(),
          queueName, appAddedEvent.getUser(),
          appAddedEvent.getIsAppRecovering());
    }
    break;
  case APP_REMOVED:
    if (!(event instanceof AppRemovedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event;
    removeApplication(appRemovedEvent.getApplicationID(),
      appRemovedEvent.getFinalState());
    break;
  case NODE_RESOURCE_UPDATE:
    if (!(event instanceof NodeResourceUpdateSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = 
        (NodeResourceUpdateSchedulerEvent)event;
    updateNodeResource(nodeResourceUpdatedEvent.getRMNode(),
          nodeResourceUpdatedEvent.getResourceOption());
    break;
  case APP_ATTEMPT_ADDED:
    if (!(event instanceof AppAttemptAddedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
        (AppAttemptAddedSchedulerEvent) event;
    addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
      appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
      appAttemptAddedEvent.getIsAttemptRecovering());
    break;
  case APP_ATTEMPT_REMOVED:
    if (!(event instanceof AppAttemptRemovedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
        (AppAttemptRemovedSchedulerEvent) event;
    removeApplicationAttempt(
        appAttemptRemovedEvent.getApplicationAttemptID(),
        appAttemptRemovedEvent.getFinalAttemptState(),
        appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
    break;
  case CONTAINER_EXPIRED:
    if (!(event instanceof ContainerExpiredSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    ContainerExpiredSchedulerEvent containerExpiredEvent =
        (ContainerExpiredSchedulerEvent)event;
    ContainerId containerId = containerExpiredEvent.getContainerId();
    completedContainer(getRMContainer(containerId),
        SchedulerUtils.createAbnormalContainerStatus(
            containerId,
            SchedulerUtils.EXPIRED_CONTAINER),
        RMContainerEventType.EXPIRE);
    break;
  default:
    LOG.error("Unknown event arrived at FairScheduler: " + event.toString());
  }
}
 
Example 23
Source Project: hadoop   Source File: ParentQueue.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public synchronized CSAssignment assignContainers(Resource clusterResource,
    FiCaSchedulerNode node, ResourceLimits resourceLimits) {
  CSAssignment assignment = 
      new CSAssignment(Resources.createResource(0, 0, 0), NodeType.NODE_LOCAL);
  Set<String> nodeLabels = node.getLabels();
  
  // if our queue cannot access this node, just return
  if (!SchedulerUtils.checkQueueAccessToNode(accessibleLabels, nodeLabels)) {
    return assignment;
  }
  
  while (canAssign(clusterResource, node)) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("Trying to assign containers to child-queue of "
        + getQueueName());
    }
    
    // Are we over maximum-capacity for this queue?
    // This will also consider parent's limits and also continuous reservation
    // looking
    if (!super.canAssignToThisQueue(clusterResource, nodeLabels, resourceLimits,
        minimumAllocation, Resources.createResource(getMetrics()
            .getReservedMB(), getMetrics().getReservedVirtualCores(), getMetrics().getReservedGpuCores()))) {
      break;
    }
    
    // Schedule
    CSAssignment assignedToChild = 
        assignContainersToChildQueues(clusterResource, node, resourceLimits);
    assignment.setType(assignedToChild.getType());
    
    // Done if no child-queue assigned anything
    if (Resources.greaterThan(
            resourceCalculator, clusterResource, 
            assignedToChild.getResource(), Resources.none())) {
      // Track resource utilization for the parent-queue
      super.allocateResource(clusterResource, assignedToChild.getResource(),
          nodeLabels);
      
      // Track resource utilization in this pass of the scheduler
      Resources.addTo(assignment.getResource(), assignedToChild.getResource());
      
      LOG.info("assignedContainer" +
          " queue=" + getQueueName() + 
          " usedCapacity=" + getUsedCapacity() +
          " absoluteUsedCapacity=" + getAbsoluteUsedCapacity() +
          " used=" + queueUsage.getUsed() + 
          " cluster=" + clusterResource);

    } else {
      break;
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("ParentQ=" + getQueueName()
        + " assignedSoFarInThisIteration=" + assignment.getResource()
        + " usedCapacity=" + getUsedCapacity()
        + " absoluteUsedCapacity=" + getAbsoluteUsedCapacity());
    }

    // Do not assign more than one container if this isn't the root queue
    // or if we've already assigned an off-switch container
    if (!rootQueue || assignment.getType() == NodeType.OFF_SWITCH) {
      if (LOG.isDebugEnabled()) {
        if (rootQueue && assignment.getType() == NodeType.OFF_SWITCH) {
          LOG.debug("Not assigning more than one off-switch container," +
              " assignments so far: " + assignment);
        }
      }
      break;
    }
  } 
  
  return assignment;
}
 
Example 24
Source Project: hadoop   Source File: AbstractCSQueue.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public boolean hasAccess(QueueACL acl, UserGroupInformation user) {
  return authorizer.checkPermission(SchedulerUtils.toAccessType(acl),
    queueEntity, user);
}
 
Example 25
Source Project: hadoop   Source File: CapacityScheduler.java    License: Apache License 2.0 4 votes vote down vote up
@Override
@Lock(Lock.NoLock.class)
public Allocation allocate(ApplicationAttemptId applicationAttemptId,
    List<ResourceRequest> ask, List<ContainerId> release, 
    List<String> blacklistAdditions, List<String> blacklistRemovals) {

  FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId);
  if (application == null) {
    LOG.info("Calling allocate on removed " +
        "or non existant application " + applicationAttemptId);
    return EMPTY_ALLOCATION;
  }
  
  // Sanity check
  SchedulerUtils.normalizeRequests(
      ask, getResourceCalculator(), getClusterResource(),
      getMinimumResourceCapability(), getMaximumResourceCapability());

  // Release containers
  releaseContainers(release, application);

  synchronized (application) {

    // make sure we aren't stopping/removing the application
    // when the allocate comes in
    if (application.isStopped()) {
      LOG.info("Calling allocate on a stopped " +
          "application " + applicationAttemptId);
      return EMPTY_ALLOCATION;
    }

    if (!ask.isEmpty()) {

      if(LOG.isDebugEnabled()) {
        LOG.debug("allocate: pre-update" +
          " applicationAttemptId=" + applicationAttemptId + 
          " application=" + application);
      }
      application.showRequests();

      // Update application requests
      application.updateResourceRequests(ask);

      LOG.debug("allocate: post-update");
      application.showRequests();
    }

    if(LOG.isDebugEnabled()) {
      LOG.debug("allocate:" +
        " applicationAttemptId=" + applicationAttemptId + 
        " #ask=" + ask.size());
    }

    application.updateBlacklist(blacklistAdditions, blacklistRemovals);

    return application.getAllocation(getResourceCalculator(),
                 clusterResource, getMinimumResourceCapability());
  }
}
 
Example 26
Source Project: hadoop   Source File: CapacityScheduler.java    License: Apache License 2.0 4 votes vote down vote up
private synchronized void allocateContainersToNode(FiCaSchedulerNode node) {
  if (rmContext.isWorkPreservingRecoveryEnabled()
      && !rmContext.isSchedulerReadyForAllocatingContainers()) {
    return;
  }

  // Assign new containers...
  // 1. Check for reserved applications
  // 2. Schedule if there are no reservations

  RMContainer reservedContainer = node.getReservedContainer();
  if (reservedContainer != null) {
    FiCaSchedulerApp reservedApplication =
        getCurrentAttemptForContainer(reservedContainer.getContainerId());
    
    // Try to fulfill the reservation
    LOG.info("Trying to fulfill reservation for application " + 
        reservedApplication.getApplicationId() + " on node: " + 
        node.getNodeID());
    
    LeafQueue queue = ((LeafQueue)reservedApplication.getQueue());
    CSAssignment assignment =
        queue.assignContainers(
            clusterResource,
            node,
            // TODO, now we only consider limits for parent for non-labeled
            // resources, should consider labeled resources as well.
            new ResourceLimits(labelManager.getResourceByLabel(
                RMNodeLabelsManager.NO_LABEL, clusterResource)));
    
    RMContainer excessReservation = assignment.getExcessReservation();
    if (excessReservation != null) {
    Container container = excessReservation.getContainer();
    queue.completedContainer(
        clusterResource, assignment.getApplication(), node, 
        excessReservation, 
        SchedulerUtils.createAbnormalContainerStatus(
            container.getId(), 
            SchedulerUtils.UNRESERVED_CONTAINER), 
        RMContainerEventType.RELEASED, null, true);
    }

  }

  // Try to schedule more if there are no reservations to fulfill
  if (node.getReservedContainer() == null) {
    if (calculator.computeAvailableContainers(node.getAvailableResource(),
      minimumAllocation) > 0) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Trying to schedule on node: " + node.getNodeName() +
            ", available: " + node.getAvailableResource());
      }
      root.assignContainers(
          clusterResource,
          node,
          // TODO, now we only consider limits for parent for non-labeled
          // resources, should consider labeled resources as well.
          new ResourceLimits(labelManager.getResourceByLabel(
              RMNodeLabelsManager.NO_LABEL, clusterResource)));
    }
  } else {
    LOG.info("Skipping scheduling since node " + node.getNodeID() + 
        " is reserved by application " + 
        node.getReservedContainer().getContainerId().getApplicationAttemptId()
        );
  }

}
 
Example 27
Source Project: hadoop   Source File: CapacityScheduler.java    License: Apache License 2.0 4 votes vote down vote up
private synchronized void removeNode(RMNode nodeInfo) {
  // update this node to node label manager
  if (labelManager != null) {
    labelManager.deactivateNode(nodeInfo.getNodeID());
  }
  
  FiCaSchedulerNode node = nodes.get(nodeInfo.getNodeID());
  if (node == null) {
    return;
  }
  Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability());
  root.updateClusterResource(clusterResource, new ResourceLimits(
      clusterResource));
  int numNodes = numNodeManagers.decrementAndGet();

  if (scheduleAsynchronously && numNodes == 0) {
    asyncSchedulerThread.suspendSchedule();
  }
  
  // Remove running containers
  List<RMContainer> runningContainers = node.getRunningContainers();
  for (RMContainer container : runningContainers) {
    completedContainer(container, 
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(), 
            SchedulerUtils.LOST_CONTAINER), 
        RMContainerEventType.KILL);
  }
  
  // Remove reservations, if any
  RMContainer reservedContainer = node.getReservedContainer();
  if (reservedContainer != null) {
    completedContainer(reservedContainer, 
        SchedulerUtils.createAbnormalContainerStatus(
            reservedContainer.getContainerId(), 
            SchedulerUtils.LOST_CONTAINER), 
        RMContainerEventType.KILL);
  }

  this.nodes.remove(nodeInfo.getNodeID());
  updateMaximumAllocation(node, false);

  LOG.info("Removed node " + nodeInfo.getNodeAddress() + 
      " clusterResource: " + clusterResource);
}
 
Example 28
Source Project: hadoop   Source File: LeafQueue.java    License: Apache License 2.0 4 votes vote down vote up
@Private
protected boolean findNodeToUnreserve(Resource clusterResource,
    FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority,
    Resource minimumUnreservedResource) {
  // need to unreserve some other container first
  NodeId idToUnreserve =
      application.getNodeIdToUnreserve(priority, minimumUnreservedResource,
          resourceCalculator, clusterResource);
  if (idToUnreserve == null) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("checked to see if could unreserve for app but nothing "
          + "reserved that matches for this app");
    }
    return false;
  }
  FiCaSchedulerNode nodeToUnreserve = scheduler.getNode(idToUnreserve);
  if (nodeToUnreserve == null) {
    LOG.error("node to unreserve doesn't exist, nodeid: " + idToUnreserve);
    return false;
  }
  if (LOG.isDebugEnabled()) {
    LOG.debug("unreserving for app: " + application.getApplicationId()
      + " on nodeId: " + idToUnreserve
      + " in order to replace reserved application and place it on node: "
      + node.getNodeID() + " needing: " + minimumUnreservedResource);
  }

  // headroom
  Resources.addTo(application.getHeadroom(), nodeToUnreserve
      .getReservedContainer().getReservedResource());

  // Make sure to not have completedContainers sort the queues here since
  // we are already inside an iterator loop for the queues and this would
  // cause an concurrent modification exception.
  completedContainer(clusterResource, application, nodeToUnreserve,
      nodeToUnreserve.getReservedContainer(),
      SchedulerUtils.createAbnormalContainerStatus(nodeToUnreserve
          .getReservedContainer().getContainerId(),
          SchedulerUtils.UNRESERVED_CONTAINER),
      RMContainerEventType.RELEASED, null, false);
  return true;
}
 
Example 29
Source Project: hadoop   Source File: RMAppManager.java    License: Apache License 2.0 4 votes vote down vote up
private ResourceRequest validateAndCreateResourceRequest(
    ApplicationSubmissionContext submissionContext, boolean isRecovery)
    throws InvalidResourceRequestException {
  // Validation of the ApplicationSubmissionContext needs to be completed
  // here. Only those fields that are dependent on RM's configuration are
  // checked here as they have to be validated whether they are part of new
  // submission or just being recovered.

  // Check whether AM resource requirements are within required limits
  if (!submissionContext.getUnmanagedAM()) {
    ResourceRequest amReq = submissionContext.getAMContainerResourceRequest();
    if (amReq == null) {
      amReq = BuilderUtils
          .newResourceRequest(RMAppAttemptImpl.AM_CONTAINER_PRIORITY,
              ResourceRequest.ANY, submissionContext.getResource(), 1);
    }

    // set label expression for AM container
    if (null == amReq.getNodeLabelExpression()) {
      amReq.setNodeLabelExpression(submissionContext
          .getNodeLabelExpression());
    }

    try {
      SchedulerUtils.normalizeAndValidateRequest(amReq,
          scheduler.getMaximumResourceCapability(),
          submissionContext.getQueue(), scheduler, isRecovery, rmContext);
    } catch (InvalidResourceRequestException e) {
      LOG.warn("RM app submission failed in validating AM resource request"
          + " for application " + submissionContext.getApplicationId(), e);
      throw e;
    }

    SchedulerUtils.normalizeRequest(amReq, scheduler.getResourceCalculator(),
        scheduler.getClusterResource(),
        scheduler.getMinimumResourceCapability(),
        scheduler.getMaximumResourceCapability(),
        scheduler.getMinimumResourceCapability());
    return amReq;
  }
  
  return null;
}
 
Example 30
Source Project: hadoop   Source File: TestRMContainerImpl.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testReleaseWhileRunning() {

  DrainDispatcher drainDispatcher = new DrainDispatcher();
  EventHandler<RMAppAttemptEvent> appAttemptEventHandler = mock(EventHandler.class);
  EventHandler generic = mock(EventHandler.class);
  drainDispatcher.register(RMAppAttemptEventType.class,
      appAttemptEventHandler);
  drainDispatcher.register(RMNodeEventType.class, generic);
  drainDispatcher.init(new YarnConfiguration());
  drainDispatcher.start();
  NodeId nodeId = BuilderUtils.newNodeId("host", 3425);
  ApplicationId appId = BuilderUtils.newApplicationId(1, 1);
  ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(
      appId, 1);
  ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1);
  ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class);

  Resource resource = BuilderUtils.newResource(512, 1, 1);
  Priority priority = BuilderUtils.newPriority(5);

  Container container = BuilderUtils.newContainer(containerId, nodeId,
      "host:3465", resource, priority, null);
  ConcurrentMap<ApplicationId, RMApp> rmApps =
      spy(new ConcurrentHashMap<ApplicationId, RMApp>());
  RMApp rmApp = mock(RMApp.class);
  when(rmApp.getRMAppAttempt((ApplicationAttemptId)Matchers.any())).thenReturn(null);
  Mockito.doReturn(rmApp).when(rmApps).get((ApplicationId)Matchers.any());

  RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
  SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class);
  RMContext rmContext = mock(RMContext.class);
  when(rmContext.getDispatcher()).thenReturn(drainDispatcher);
  when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer);
  when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer);
  when(rmContext.getRMApps()).thenReturn(rmApps);
  when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher);
  when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration());
  RMContainer rmContainer = new RMContainerImpl(container, appAttemptId,
      nodeId, "user", rmContext);

  assertEquals(RMContainerState.NEW, rmContainer.getState());
  assertEquals(resource, rmContainer.getAllocatedResource());
  assertEquals(nodeId, rmContainer.getAllocatedNode());
  assertEquals(priority, rmContainer.getAllocatedPriority());
  verify(writer).containerStarted(any(RMContainer.class));
  verify(publisher).containerCreated(any(RMContainer.class), anyLong());

  rmContainer.handle(new RMContainerEvent(containerId,
      RMContainerEventType.START));
  drainDispatcher.await();
  assertEquals(RMContainerState.ALLOCATED, rmContainer.getState());
  rmContainer.handle(new RMContainerEvent(containerId,
      RMContainerEventType.ACQUIRED));
  drainDispatcher.await();
  assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());

  rmContainer.handle(new RMContainerEvent(containerId,
      RMContainerEventType.LAUNCHED));
  drainDispatcher.await();
  assertEquals(RMContainerState.RUNNING, rmContainer.getState());
  assertEquals("http://host:3465/node/containerlogs/container_1_0001_01_000001/user",
      rmContainer.getLogURL());

  // In RUNNING state. Verify RELEASED and associated actions.
  reset(appAttemptEventHandler);
  ContainerStatus containerStatus = SchedulerUtils
      .createAbnormalContainerStatus(containerId,
          SchedulerUtils.RELEASED_CONTAINER);
  rmContainer.handle(new RMContainerFinishedEvent(containerId,
      containerStatus, RMContainerEventType.RELEASED));
  drainDispatcher.await();
  assertEquals(RMContainerState.RELEASED, rmContainer.getState());
  assertEquals(SchedulerUtils.RELEASED_CONTAINER,
      rmContainer.getDiagnosticsInfo());
  assertEquals(ContainerExitStatus.ABORTED,
      rmContainer.getContainerExitStatus());
  assertEquals(ContainerState.COMPLETE, rmContainer.getContainerState());
  verify(writer).containerFinished(any(RMContainer.class));
  verify(publisher).containerFinished(any(RMContainer.class), anyLong());

  ArgumentCaptor<RMAppAttemptContainerFinishedEvent> captor = ArgumentCaptor
      .forClass(RMAppAttemptContainerFinishedEvent.class);
  verify(appAttemptEventHandler).handle(captor.capture());
  RMAppAttemptContainerFinishedEvent cfEvent = captor.getValue();
  assertEquals(appAttemptId, cfEvent.getApplicationAttemptId());
  assertEquals(containerStatus, cfEvent.getContainerStatus());
  assertEquals(RMAppAttemptEventType.CONTAINER_FINISHED, cfEvent.getType());
  
  // In RELEASED state. A FINIHSED event may come in.
  rmContainer.handle(new RMContainerFinishedEvent(containerId, SchedulerUtils
      .createAbnormalContainerStatus(containerId, "FinishedContainer"),
      RMContainerEventType.FINISHED));
  assertEquals(RMContainerState.RELEASED, rmContainer.getState());
}