org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils Java Examples

The following examples show how to use org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RMServerUtils.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Utility method to validate a list resource requests, by insuring that the
 * requested memory/vcore is non-negative and not greater than max
 */
public static void normalizeAndValidateRequests(List<ResourceRequest> ask,
    Resource maximumResource, String queueName, YarnScheduler scheduler,
    RMContext rmContext)
    throws InvalidResourceRequestException {

  QueueInfo queueInfo = null;
  try {
    queueInfo = scheduler.getQueueInfo(queueName, false, false);
  } catch (IOException e) {
  }

  for (ResourceRequest resReq : ask) {
    SchedulerUtils.normalizeAndvalidateRequest(resReq, maximumResource,
        queueName, scheduler, rmContext, queueInfo);
  }
}
 
Example #2
Source File: TestRMAppAttemptTransitions.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testAMCrashAtScheduled() {
  // This is to test sending CONTAINER_FINISHED event at SCHEDULED state.
  // Verify the state transition is correct.
  scheduleApplicationAttempt();
  ContainerStatus cs =
      SchedulerUtils.createAbnormalContainerStatus(
          BuilderUtils.newContainerId(
              applicationAttempt.getAppAttemptId(), 1),
          SchedulerUtils.LOST_CONTAINER);
  // send CONTAINER_FINISHED event at SCHEDULED state,
  // The state should be FINAL_SAVING with previous state SCHEDULED
  NodeId anyNodeId = NodeId.newInstance("host", 1234);
  applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
      applicationAttempt.getAppAttemptId(), cs, anyNodeId));
  // createApplicationAttemptState will return previous state (SCHEDULED),
  // if the current state is FINAL_SAVING.
  assertEquals(YarnApplicationAttemptState.SCHEDULED,
      applicationAttempt.createApplicationAttemptState());
  // send ATTEMPT_UPDATE_SAVED event,
  // verify the state is changed to state FAILED.
  sendAttemptUpdateSavedEvent(applicationAttempt);
  assertEquals(RMAppAttemptState.FAILED,
      applicationAttempt.getAppAttemptState());
  verifyApplicationAttemptFinished(RMAppAttemptState.FAILED);
}
 
Example #3
Source File: FifoScheduler.java    From big-c with Apache License 2.0 6 votes vote down vote up
private synchronized void removeNode(RMNode nodeInfo) {
  FiCaSchedulerNode node = getNode(nodeInfo.getNodeID());
  if (node == null) {
    return;
  }
  // Kill running containers
  for(RMContainer container : node.getRunningContainers()) {
    completedContainer(container, 
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(), 
            SchedulerUtils.LOST_CONTAINER),
            RMContainerEventType.KILL);
  }
  
  //Remove the node
  this.nodes.remove(nodeInfo.getNodeID());
  updateMaximumAllocation(node, false);
  
  // Update cluster metrics
  Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability());
}
 
Example #4
Source File: RMServerUtils.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Utility method to validate a list resource requests, by insuring that the
 * requested memory/vcore is non-negative and not greater than max
 */
public static void normalizeAndValidateRequests(List<ResourceRequest> ask,
    Resource maximumResource, String queueName, YarnScheduler scheduler,
    RMContext rmContext)
    throws InvalidResourceRequestException {

  QueueInfo queueInfo = null;
  try {
    queueInfo = scheduler.getQueueInfo(queueName, false, false);
  } catch (IOException e) {
  }

  for (ResourceRequest resReq : ask) {
    SchedulerUtils.normalizeAndvalidateRequest(resReq, maximumResource,
        queueName, scheduler, rmContext, queueInfo);
  }
}
 
Example #5
Source File: TestRMAppAttemptTransitions.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testAMCrashAtScheduled() {
  // This is to test sending CONTAINER_FINISHED event at SCHEDULED state.
  // Verify the state transition is correct.
  scheduleApplicationAttempt();
  ContainerStatus cs =
      SchedulerUtils.createAbnormalContainerStatus(
          BuilderUtils.newContainerId(
              applicationAttempt.getAppAttemptId(), 1),
          SchedulerUtils.LOST_CONTAINER);
  // send CONTAINER_FINISHED event at SCHEDULED state,
  // The state should be FINAL_SAVING with previous state SCHEDULED
  NodeId anyNodeId = NodeId.newInstance("host", 1234);
  applicationAttempt.handle(new RMAppAttemptContainerFinishedEvent(
      applicationAttempt.getAppAttemptId(), cs, anyNodeId));
  // createApplicationAttemptState will return previous state (SCHEDULED),
  // if the current state is FINAL_SAVING.
  assertEquals(YarnApplicationAttemptState.SCHEDULED,
      applicationAttempt.createApplicationAttemptState());
  // send ATTEMPT_UPDATE_SAVED event,
  // verify the state is changed to state FAILED.
  sendAttemptUpdateSavedEvent(applicationAttempt);
  assertEquals(RMAppAttemptState.FAILED,
      applicationAttempt.getAppAttemptState());
  verifyApplicationAttemptFinished(RMAppAttemptState.FAILED);
}
 
Example #6
Source File: FifoScheduler.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private synchronized void removeNode(RMNode nodeInfo) {
  FiCaSchedulerNode node = getNode(nodeInfo.getNodeID());
  if (node == null) {
    return;
  }
  // Kill running containers
  for(RMContainer container : node.getRunningContainers()) {
    completedContainer(container, 
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(), 
            SchedulerUtils.LOST_CONTAINER),
            RMContainerEventType.KILL);
  }
  
  //Remove the node
  this.nodes.remove(nodeInfo.getNodeID());
  updateMaximumAllocation(node, false);
  
  // Update cluster metrics
  Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability());
}
 
Example #7
Source File: FairScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private synchronized void removeNode(RMNode rmNode) {
  FSSchedulerNode node = getFSSchedulerNode(rmNode.getNodeID());
  // This can occur when an UNHEALTHY node reconnects
  if (node == null) {
    return;
  }
  Resources.subtractFrom(clusterResource, rmNode.getTotalCapability());
  updateRootQueueMetrics();

  // Remove running containers
  List<RMContainer> runningContainers = node.getRunningContainers();
  for (RMContainer container : runningContainers) {
    completedContainer(container,
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(),
            SchedulerUtils.LOST_CONTAINER),
        RMContainerEventType.KILL);
  }

  // Remove reservations, if any
  RMContainer reservedContainer = node.getReservedContainer();
  if (reservedContainer != null) {
    completedContainer(reservedContainer,
        SchedulerUtils.createAbnormalContainerStatus(
            reservedContainer.getContainerId(),
            SchedulerUtils.LOST_CONTAINER),
        RMContainerEventType.KILL);
  }

  nodes.remove(rmNode.getNodeID());
  queueMgr.getRootQueue().setSteadyFairShare(clusterResource);
  queueMgr.getRootQueue().recomputeSteadyShares();
  updateMaximumAllocation(node, false);
  LOG.info("Removed node " + rmNode.getNodeAddress() +
      " cluster capacity: " + clusterResource);
}
 
Example #8
Source File: CapacityScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public void killContainer(RMContainer cont) {
  if (LOG.isDebugEnabled()) {
    LOG.debug("KILL_CONTAINER: container" + cont.toString());
  }
  recoverResourceRequestForContainer(cont);
  completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus(
  	      cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER),
  	      RMContainerEventType.KILL);
}
 
Example #9
Source File: CapacityScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public void dropContainerReservation(RMContainer container) {
  if(LOG.isDebugEnabled()){
    LOG.debug("DROP_RESERVATION:" + container.toString());
  }
  completedContainer(container,
      SchedulerUtils.createAbnormalContainerStatus(
          container.getContainerId(),
          SchedulerUtils.UNRESERVED_CONTAINER),
      RMContainerEventType.KILL);
}
 
Example #10
Source File: CapacitySchedulerConfiguration.java    From big-c with Apache License 2.0 5 votes vote down vote up
public Map<AccessType, AccessControlList> getAcls(String queue) {
  Map<AccessType, AccessControlList> acls =
    new HashMap<AccessType, AccessControlList>();
  for (QueueACL acl : QueueACL.values()) {
    acls.put(SchedulerUtils.toAccessType(acl), getAcl(queue, acl));
  }
  return acls;
}
 
Example #11
Source File: FairScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
private synchronized void removeNode(RMNode rmNode) {
  FSSchedulerNode node = getFSSchedulerNode(rmNode.getNodeID());
  // This can occur when an UNHEALTHY node reconnects
  if (node == null) {
    return;
  }
  Resources.subtractFrom(clusterResource, rmNode.getTotalCapability());
  updateRootQueueMetrics();

  // Remove running containers
  List<RMContainer> runningContainers = node.getRunningContainers();
  for (RMContainer container : runningContainers) {
    completedContainer(container,
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(),
            SchedulerUtils.LOST_CONTAINER),
        RMContainerEventType.KILL);
  }

  // Remove reservations, if any
  RMContainer reservedContainer = node.getReservedContainer();
  if (reservedContainer != null) {
    completedContainer(reservedContainer,
        SchedulerUtils.createAbnormalContainerStatus(
            reservedContainer.getContainerId(),
            SchedulerUtils.LOST_CONTAINER),
        RMContainerEventType.KILL);
  }

  nodes.remove(rmNode.getNodeID());
  queueMgr.getRootQueue().setSteadyFairShare(clusterResource);
  queueMgr.getRootQueue().recomputeSteadyShares();
  updateMaximumAllocation(node, false);
  LOG.info("Removed node " + rmNode.getNodeAddress() +
      " cluster capacity: " + clusterResource);
}
 
Example #12
Source File: FairScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
protected void warnOrKillContainer(RMContainer container) {
  ApplicationAttemptId appAttemptId = container.getApplicationAttemptId();
  FSAppAttempt app = getSchedulerApp(appAttemptId);
  FSLeafQueue queue = app.getQueue();
  LOG.info("Preempting container (prio=" + container.getContainer().getPriority() +
      "res=" + container.getContainer().getResource() +
      ") from queue " + queue.getName());
  
  Long time = app.getContainerPreemptionTime(container);

  if (time != null) {
    // if we asked for preemption more than maxWaitTimeBeforeKill ms ago,
    // proceed with kill
    if (time + waitTimeBeforeKill < getClock().getTime()) {
      ContainerStatus status =
        SchedulerUtils.createPreemptedContainerStatus(
          container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER);

      recoverResourceRequestForContainer(container);
      // TODO: Not sure if this ever actually adds this to the list of cleanup
      // containers on the RMNode (see SchedulerNode.releaseContainer()).
      completedContainer(container, status, RMContainerEventType.KILL);
      LOG.info("Killing container" + container +
          " (after waiting for premption for " +
          (getClock().getTime() - time) + "ms)");
    }
  } else {
    // track the request in the FSAppAttempt itself
    app.addPreemption(container, getClock().getTime());
  }
}
 
Example #13
Source File: FifoScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private synchronized void doneApplicationAttempt(
    ApplicationAttemptId applicationAttemptId,
    RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers)
    throws IOException {
  FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
  SchedulerApplication<FiCaSchedulerApp> application =
      applications.get(applicationAttemptId.getApplicationId());
  if (application == null || attempt == null) {
    throw new IOException("Unknown application " + applicationAttemptId + 
    " has completed!");
  }

  // Kill all 'live' containers
  for (RMContainer container : attempt.getLiveContainers()) {
    if (keepContainers
        && container.getState().equals(RMContainerState.RUNNING)) {
      // do not kill the running container in the case of work-preserving AM
      // restart.
      LOG.info("Skip killing " + container.getContainerId());
      continue;
    }
    completedContainer(container,
      SchedulerUtils.createAbnormalContainerStatus(
        container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION),
      RMContainerEventType.KILL);
  }

  // Clean up pending requests, metrics etc.
  attempt.stop(rmAppAttemptFinalState);
}
 
Example #14
Source File: FifoScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
private synchronized void doneApplicationAttempt(
    ApplicationAttemptId applicationAttemptId,
    RMAppAttemptState rmAppAttemptFinalState, boolean keepContainers)
    throws IOException {
  FiCaSchedulerApp attempt = getApplicationAttempt(applicationAttemptId);
  SchedulerApplication<FiCaSchedulerApp> application =
      applications.get(applicationAttemptId.getApplicationId());
  if (application == null || attempt == null) {
    throw new IOException("Unknown application " + applicationAttemptId + 
    " has completed!");
  }

  // Kill all 'live' containers
  for (RMContainer container : attempt.getLiveContainers()) {
    if (keepContainers
        && container.getState().equals(RMContainerState.RUNNING)) {
      // do not kill the running container in the case of work-preserving AM
      // restart.
      LOG.info("Skip killing " + container.getContainerId());
      continue;
    }
    completedContainer(container,
      SchedulerUtils.createAbnormalContainerStatus(
        container.getContainerId(), SchedulerUtils.COMPLETED_APPLICATION),
      RMContainerEventType.KILL);
  }

  // Clean up pending requests, metrics etc.
  attempt.stop(rmAppAttemptFinalState);
}
 
Example #15
Source File: CapacityScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void killContainer(RMContainer cont) {
  if (LOG.isDebugEnabled()) {
    LOG.debug("KILL_CONTAINER: container" + cont.toString());
  }
  recoverResourceRequestForContainer(cont);
  completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus(
    cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER),
    RMContainerEventType.KILL);
}
 
Example #16
Source File: FairScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
protected void warnOrKillContainer(RMContainer container) {
  ApplicationAttemptId appAttemptId = container.getApplicationAttemptId();
  FSAppAttempt app = getSchedulerApp(appAttemptId);
  FSLeafQueue queue = app.getQueue();
  LOG.info("Preempting container (prio=" + container.getContainer().getPriority() +
      "res=" + container.getContainer().getResource() +
      ") from queue " + queue.getName());
  
  Long time = app.getContainerPreemptionTime(container);

  if (time != null) {
    // if we asked for preemption more than maxWaitTimeBeforeKill ms ago,
    // proceed with kill
    if (time + waitTimeBeforeKill < getClock().getTime()) {
      ContainerStatus status =
        SchedulerUtils.createPreemptedContainerStatus(
          container.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER);

      recoverResourceRequestForContainer(container);
      // TODO: Not sure if this ever actually adds this to the list of cleanup
      // containers on the RMNode (see SchedulerNode.releaseContainer()).
      completedContainer(container, status, RMContainerEventType.KILL);
      LOG.info("Killing container" + container +
          " (after waiting for premption for " +
          (getClock().getTime() - time) + "ms)");
    }
  } else {
    // track the request in the FSAppAttempt itself
    app.addPreemption(container, getClock().getTime());
  }
}
 
Example #17
Source File: CapacitySchedulerConfiguration.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public Map<AccessType, AccessControlList> getAcls(String queue) {
  Map<AccessType, AccessControlList> acls =
    new HashMap<AccessType, AccessControlList>();
  for (QueueACL acl : QueueACL.values()) {
    acls.put(SchedulerUtils.toAccessType(acl), getAcl(queue, acl));
  }
  return acls;
}
 
Example #18
Source File: CapacityScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void dropContainerReservation(RMContainer container) {
  if(LOG.isDebugEnabled()){
    LOG.debug("DROP_RESERVATION:" + container.toString());
  }
  completedContainer(container,
      SchedulerUtils.createAbnormalContainerStatus(
          container.getContainerId(),
          SchedulerUtils.UNRESERVED_CONTAINER),
      RMContainerEventType.KILL);
}
 
Example #19
Source File: FairScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public Allocation allocate(ApplicationAttemptId appAttemptId,
    List<ResourceRequest> ask, List<ContainerId> release,
    List<String> blacklistAdditions, List<String> blacklistRemovals) {

  // Make sure this application exists
  FSAppAttempt application = getSchedulerApp(appAttemptId);
  if (application == null) {
    LOG.info("Calling allocate on removed " +
        "or non existant application " + appAttemptId);
    return EMPTY_ALLOCATION;
  }

  // Sanity check
  SchedulerUtils.normalizeRequests(ask, DOMINANT_RESOURCE_CALCULATOR,
      clusterResource, minimumAllocation, getMaximumResourceCapability(),
      incrAllocation);

  // Set amResource for this app
  if (!application.getUnmanagedAM() && ask.size() == 1
      && application.getLiveContainers().isEmpty()) {
    application.setAMResource(ask.get(0).getCapability());
  }

  // Release containers
  releaseContainers(release, application);

  synchronized (application) {
    if (!ask.isEmpty()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("allocate: pre-update" +
            " applicationAttemptId=" + appAttemptId +
            " application=" + application.getApplicationId());
      }
      application.showRequests();

      // Update application requests
      application.updateResourceRequests(ask);

      application.showRequests();
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("allocate: post-update" +
          " applicationAttemptId=" + appAttemptId +
          " #ask=" + ask.size() +
          " reservation= " + application.getCurrentReservation());

      LOG.debug("Preempting " + application.getPreemptionContainers().size()
          + " container(s)");
    }
    
    Set<ContainerId> preemptionContainerIds = new HashSet<ContainerId>();
    for (RMContainer container : application.getPreemptionContainers()) {
      preemptionContainerIds.add(container.getContainerId());
    }

    application.updateBlacklist(blacklistAdditions, blacklistRemovals);
    ContainersAndNMTokensAllocation allocation =
        application.pullNewlyAllocatedContainersAndNMTokens();
    Resource headroom = application.getHeadroom();
    application.setApplicationHeadroomForMetrics(headroom);
    return new Allocation(allocation.getContainerList(), headroom,
        preemptionContainerIds, null, null, allocation.getNMTokenList());
  }
}
 
Example #20
Source File: FifoScheduler.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public Allocation allocate(
    ApplicationAttemptId applicationAttemptId, List<ResourceRequest> ask,
    List<ContainerId> release, List<String> blacklistAdditions, List<String> blacklistRemovals) {
  FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId);
  if (application == null) {
    LOG.error("Calling allocate on removed " +
        "or non existant application " + applicationAttemptId);
    return EMPTY_ALLOCATION;
  }

  // Sanity check
  SchedulerUtils.normalizeRequests(ask, resourceCalculator, 
      clusterResource, minimumAllocation, getMaximumResourceCapability());

  // Release containers
  releaseContainers(release, application);

  synchronized (application) {

    // make sure we aren't stopping/removing the application
    // when the allocate comes in
    if (application.isStopped()) {
      LOG.info("Calling allocate on a stopped " +
          "application " + applicationAttemptId);
      return EMPTY_ALLOCATION;
    }

    if (!ask.isEmpty()) {
      LOG.debug("allocate: pre-update" +
          " applicationId=" + applicationAttemptId + 
          " application=" + application);
      application.showRequests();

      // Update application requests
      application.updateResourceRequests(ask);

      LOG.debug("allocate: post-update" +
          " applicationId=" + applicationAttemptId + 
          " application=" + application);
      application.showRequests();

      LOG.debug("allocate:" +
          " applicationId=" + applicationAttemptId + 
          " #ask=" + ask.size());
    }

    application.updateBlacklist(blacklistAdditions, blacklistRemovals);
    ContainersAndNMTokensAllocation allocation =
        application.pullNewlyAllocatedContainersAndNMTokens();
    Resource headroom = application.getHeadroom();
    application.setApplicationHeadroomForMetrics(headroom);
    return new Allocation(allocation.getContainerList(), headroom, null,
        null, null, allocation.getNMTokenList());
  }
}
 
Example #21
Source File: TestRMContainerImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testExpireWhileRunning() {

  DrainDispatcher drainDispatcher = new DrainDispatcher();
  EventHandler<RMAppAttemptEvent> appAttemptEventHandler = mock(EventHandler.class);
  EventHandler generic = mock(EventHandler.class);
  drainDispatcher.register(RMAppAttemptEventType.class,
      appAttemptEventHandler);
  drainDispatcher.register(RMNodeEventType.class, generic);
  drainDispatcher.init(new YarnConfiguration());
  drainDispatcher.start();
  NodeId nodeId = BuilderUtils.newNodeId("host", 3425);
  ApplicationId appId = BuilderUtils.newApplicationId(1, 1);
  ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(
      appId, 1);
  ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1);
  ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class);

  Resource resource = BuilderUtils.newResource(512, 1);
  Priority priority = BuilderUtils.newPriority(5);

  Container container = BuilderUtils.newContainer(containerId, nodeId,
      "host:3465", resource, priority, null);

  RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
  SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class);
  RMContext rmContext = mock(RMContext.class);
  when(rmContext.getDispatcher()).thenReturn(drainDispatcher);
  when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer);
  when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer);
  when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher);
  when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration());
  RMContainer rmContainer = new RMContainerImpl(container, appAttemptId,
      nodeId, "user", rmContext);

  assertEquals(RMContainerState.NEW, rmContainer.getState());
  assertEquals(resource, rmContainer.getAllocatedResource());
  assertEquals(nodeId, rmContainer.getAllocatedNode());
  assertEquals(priority, rmContainer.getAllocatedPriority());
  verify(writer).containerStarted(any(RMContainer.class));
  verify(publisher).containerCreated(any(RMContainer.class), anyLong());

  rmContainer.handle(new RMContainerEvent(containerId,
      RMContainerEventType.START));
  drainDispatcher.await();
  assertEquals(RMContainerState.ALLOCATED, rmContainer.getState());

  rmContainer.handle(new RMContainerEvent(containerId,
      RMContainerEventType.ACQUIRED));
  drainDispatcher.await();
  assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());

  rmContainer.handle(new RMContainerEvent(containerId,
      RMContainerEventType.LAUNCHED));
  drainDispatcher.await();
  assertEquals(RMContainerState.RUNNING, rmContainer.getState());
  assertEquals("http://host:3465/node/containerlogs/container_1_0001_01_000001/user",
      rmContainer.getLogURL());

  // In RUNNING state. Verify EXPIRE and associated actions.
  reset(appAttemptEventHandler);
  ContainerStatus containerStatus = SchedulerUtils
      .createAbnormalContainerStatus(containerId,
          SchedulerUtils.EXPIRED_CONTAINER);
  rmContainer.handle(new RMContainerFinishedEvent(containerId,
      containerStatus, RMContainerEventType.EXPIRE));
  drainDispatcher.await();
  assertEquals(RMContainerState.RUNNING, rmContainer.getState());
  verify(writer, never()).containerFinished(any(RMContainer.class));
  verify(publisher, never()).containerFinished(any(RMContainer.class),
      anyLong());
}
 
Example #22
Source File: TestRMContainerImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testReleaseWhileRunning() {

  DrainDispatcher drainDispatcher = new DrainDispatcher();
  EventHandler<RMAppAttemptEvent> appAttemptEventHandler = mock(EventHandler.class);
  EventHandler generic = mock(EventHandler.class);
  drainDispatcher.register(RMAppAttemptEventType.class,
      appAttemptEventHandler);
  drainDispatcher.register(RMNodeEventType.class, generic);
  drainDispatcher.init(new YarnConfiguration());
  drainDispatcher.start();
  NodeId nodeId = BuilderUtils.newNodeId("host", 3425);
  ApplicationId appId = BuilderUtils.newApplicationId(1, 1);
  ApplicationAttemptId appAttemptId = BuilderUtils.newApplicationAttemptId(
      appId, 1);
  ContainerId containerId = BuilderUtils.newContainerId(appAttemptId, 1);
  ContainerAllocationExpirer expirer = mock(ContainerAllocationExpirer.class);

  Resource resource = BuilderUtils.newResource(512, 1);
  Priority priority = BuilderUtils.newPriority(5);

  Container container = BuilderUtils.newContainer(containerId, nodeId,
      "host:3465", resource, priority, null);
  ConcurrentMap<ApplicationId, RMApp> rmApps =
      spy(new ConcurrentHashMap<ApplicationId, RMApp>());
  RMApp rmApp = mock(RMApp.class);
  when(rmApp.getRMAppAttempt((ApplicationAttemptId)Matchers.any())).thenReturn(null);
  Mockito.doReturn(rmApp).when(rmApps).get((ApplicationId)Matchers.any());

  RMApplicationHistoryWriter writer = mock(RMApplicationHistoryWriter.class);
  SystemMetricsPublisher publisher = mock(SystemMetricsPublisher.class);
  RMContext rmContext = mock(RMContext.class);
  when(rmContext.getDispatcher()).thenReturn(drainDispatcher);
  when(rmContext.getContainerAllocationExpirer()).thenReturn(expirer);
  when(rmContext.getRMApplicationHistoryWriter()).thenReturn(writer);
  when(rmContext.getRMApps()).thenReturn(rmApps);
  when(rmContext.getSystemMetricsPublisher()).thenReturn(publisher);
  when(rmContext.getYarnConfiguration()).thenReturn(new YarnConfiguration());
  RMContainer rmContainer = new RMContainerImpl(container, appAttemptId,
      nodeId, "user", rmContext);

  assertEquals(RMContainerState.NEW, rmContainer.getState());
  assertEquals(resource, rmContainer.getAllocatedResource());
  assertEquals(nodeId, rmContainer.getAllocatedNode());
  assertEquals(priority, rmContainer.getAllocatedPriority());
  verify(writer).containerStarted(any(RMContainer.class));
  verify(publisher).containerCreated(any(RMContainer.class), anyLong());

  rmContainer.handle(new RMContainerEvent(containerId,
      RMContainerEventType.START));
  drainDispatcher.await();
  assertEquals(RMContainerState.ALLOCATED, rmContainer.getState());
  rmContainer.handle(new RMContainerEvent(containerId,
      RMContainerEventType.ACQUIRED));
  drainDispatcher.await();
  assertEquals(RMContainerState.ACQUIRED, rmContainer.getState());

  rmContainer.handle(new RMContainerEvent(containerId,
      RMContainerEventType.LAUNCHED));
  drainDispatcher.await();
  assertEquals(RMContainerState.RUNNING, rmContainer.getState());
  assertEquals("http://host:3465/node/containerlogs/container_1_0001_01_000001/user",
      rmContainer.getLogURL());

  // In RUNNING state. Verify RELEASED and associated actions.
  reset(appAttemptEventHandler);
  ContainerStatus containerStatus = SchedulerUtils
      .createAbnormalContainerStatus(containerId,
          SchedulerUtils.RELEASED_CONTAINER);
  rmContainer.handle(new RMContainerFinishedEvent(containerId,
      containerStatus, RMContainerEventType.RELEASED));
  drainDispatcher.await();
  assertEquals(RMContainerState.RELEASED, rmContainer.getState());
  assertEquals(SchedulerUtils.RELEASED_CONTAINER,
      rmContainer.getDiagnosticsInfo());
  assertEquals(ContainerExitStatus.ABORTED,
      rmContainer.getContainerExitStatus());
  assertEquals(ContainerState.COMPLETE, rmContainer.getContainerState());
  verify(writer).containerFinished(any(RMContainer.class));
  verify(publisher).containerFinished(any(RMContainer.class), anyLong());

  ArgumentCaptor<RMAppAttemptContainerFinishedEvent> captor = ArgumentCaptor
      .forClass(RMAppAttemptContainerFinishedEvent.class);
  verify(appAttemptEventHandler).handle(captor.capture());
  RMAppAttemptContainerFinishedEvent cfEvent = captor.getValue();
  assertEquals(appAttemptId, cfEvent.getApplicationAttemptId());
  assertEquals(containerStatus, cfEvent.getContainerStatus());
  assertEquals(RMAppAttemptEventType.CONTAINER_FINISHED, cfEvent.getType());
  
  // In RELEASED state. A FINIHSED event may come in.
  rmContainer.handle(new RMContainerFinishedEvent(containerId, SchedulerUtils
      .createAbnormalContainerStatus(containerId, "FinishedContainer"),
      RMContainerEventType.FINISHED));
  assertEquals(RMContainerState.RELEASED, rmContainer.getState());
}
 
Example #23
Source File: RMAppManager.java    From big-c with Apache License 2.0 4 votes vote down vote up
private ResourceRequest validateAndCreateResourceRequest(
    ApplicationSubmissionContext submissionContext, boolean isRecovery)
    throws InvalidResourceRequestException {
  // Validation of the ApplicationSubmissionContext needs to be completed
  // here. Only those fields that are dependent on RM's configuration are
  // checked here as they have to be validated whether they are part of new
  // submission or just being recovered.

  // Check whether AM resource requirements are within required limits
  if (!submissionContext.getUnmanagedAM()) {
    ResourceRequest amReq = submissionContext.getAMContainerResourceRequest();
    if (amReq == null) {
      amReq = BuilderUtils
          .newResourceRequest(RMAppAttemptImpl.AM_CONTAINER_PRIORITY,
              ResourceRequest.ANY, submissionContext.getResource(), 1);
    }

    // set label expression for AM container
    if (null == amReq.getNodeLabelExpression()) {
      amReq.setNodeLabelExpression(submissionContext
          .getNodeLabelExpression());
    }

    try {
      SchedulerUtils.normalizeAndValidateRequest(amReq,
          scheduler.getMaximumResourceCapability(),
          submissionContext.getQueue(), scheduler, isRecovery, rmContext);
    } catch (InvalidResourceRequestException e) {
      LOG.warn("RM app submission failed in validating AM resource request"
          + " for application " + submissionContext.getApplicationId(), e);
      throw e;
    }

    SchedulerUtils.normalizeRequest(amReq, scheduler.getResourceCalculator(),
        scheduler.getClusterResource(),
        scheduler.getMinimumResourceCapability(),
        scheduler.getMaximumResourceCapability(),
        scheduler.getMinimumResourceCapability());
    return amReq;
  }
  
  return null;
}
 
Example #24
Source File: LeafQueue.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Private
protected boolean findNodeToUnreserve(Resource clusterResource,
    FiCaSchedulerNode node, FiCaSchedulerApp application, Priority priority,
    Resource minimumUnreservedResource) {
  // need to unreserve some other container first
  NodeId idToUnreserve =
      application.getNodeIdToUnreserve(priority, minimumUnreservedResource,
          resourceCalculator, clusterResource);
  
  
  if (idToUnreserve == null) {
    if (LOG.isDebugEnabled()) {
      LOG.debug("checked to see if could unreserve for app but nothing "
          + "reserved that matches for this app");
    }
    return false;
  }
  FiCaSchedulerNode nodeToUnreserve = scheduler.getNode(idToUnreserve);
  if (nodeToUnreserve == null) {
    LOG.error("node to unreserve doesn't exist, nodeid: " + idToUnreserve);
    return false;
  }
  
 {
    LOG.info("unreserving for app: " + application.getApplicationId()
      + " on nodeId: " + idToUnreserve
      + " in order to replace reserved application and place it on node: "
      + node.getNodeID() + " needing: " + minimumUnreservedResource);
  }

  // headroom
  Resources.addTo(application.getHeadroom(), nodeToUnreserve
      .getReservedContainer().getReservedResource());

  // Make sure to not have completedContainers sort the queues here since
  // we are already inside an iterator loop for the queues and this would
  // cause an concurrent modification exception.
  completedContainer(clusterResource, application, nodeToUnreserve,
      nodeToUnreserve.getReservedContainer(),
      SchedulerUtils.createAbnormalContainerStatus(nodeToUnreserve
          .getReservedContainer().getContainerId(),
          SchedulerUtils.UNRESERVED_CONTAINER),
      RMContainerEventType.RELEASED, null, false);
  return true;
}
 
Example #25
Source File: CapacityScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
 public void suspendContianer(RMContainer cont, Resource toPreempt) {
 	// TODO Auto-generated method stub
if (LOG.isDebugEnabled()) {
      LOG.debug("SUSPEND_CONTAINER: container" + cont.toString());
}

LOG.info("capacity scheduler try to preempt "+cont.getContainerId()+" resource: "+toPreempt);

if(toPreempt == null){
    LOG.info("preempted resource can not be null");
    return;
 }
 if(!Resources.greaterThan(getResourceCalculator(), clusterResource, 
		                        toPreempt,Resources.none())){
     LOG.info("preempted resource is none");
     return;
 }
   //set preempted resource
    cont.addPreemptedResource(toPreempt);
   //mark this container to be preempted
 completedContainer(cont, SchedulerUtils.createPreemptedContainerStatus(
	      cont.getContainerId(), SchedulerUtils.PREEMPTED_CONTAINER),
	      RMContainerEventType.SUSPEND);
 //send this resource update info to NodeManager
 NodeId nodeId = cont.getContainer().getNodeId();
 ContainerId containerId = cont.getContainerId();
 //get current resource after preemption
 Resource currentResource = cont.getCurrentUsedResource();
 NodeContainerUpdate nodeContainerUpdate= NodeContainerUpdate.newInstance(containerId, 
		                                  currentResource.getMemory(), currentResource.getVirtualCores(),true,false);
 
 LOG.info("get container   "+containerId+" to suspend "+" on host "+nodeId.getHost()+" currentresource:   "+currentResource);
 if(nodeContainerUpdateMap.get(nodeId) == null){
	 ConcurrentLinkedQueue<NodeContainerUpdate> listNodeContainerUpdate = new  ConcurrentLinkedQueue<NodeContainerUpdate>();
	 listNodeContainerUpdate.add(nodeContainerUpdate);
	 nodeContainerUpdateMap.put(nodeId, listNodeContainerUpdate);
 }else{
	 nodeContainerUpdateMap.get(nodeId).add(nodeContainerUpdate);
 }
 }
 
Example #26
Source File: FairScheduler.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public Allocation allocate(ApplicationAttemptId appAttemptId,
    List<ResourceRequest> ask, List<ContainerId> release,
    List<String> blacklistAdditions, List<String> blacklistRemovals) {

  // Make sure this application exists
  FSAppAttempt application = getSchedulerApp(appAttemptId);
  if (application == null) {
    LOG.info("Calling allocate on removed " +
        "or non existant application " + appAttemptId);
    return EMPTY_ALLOCATION;
  }

  // Sanity check
  SchedulerUtils.normalizeRequests(ask, DOMINANT_RESOURCE_CALCULATOR,
      clusterResource, minimumAllocation, getMaximumResourceCapability(),
      incrAllocation);

  // Set amResource for this app
  if (!application.getUnmanagedAM() && ask.size() == 1
      && application.getLiveContainers().isEmpty()) {
    application.setAMResource(ask.get(0).getCapability());
  }

  // Release containers
  releaseContainers(release, application);

  synchronized (application) {
    if (!ask.isEmpty()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("allocate: pre-update" +
            " applicationAttemptId=" + appAttemptId +
            " application=" + application.getApplicationId());
      }
      application.showRequests();

      // Update application requests
      application.updateResourceRequests(ask);

      application.showRequests();
    }

    if (LOG.isDebugEnabled()) {
      LOG.debug("allocate: post-update" +
          " applicationAttemptId=" + appAttemptId +
          " #ask=" + ask.size() +
          " reservation= " + application.getCurrentReservation());

      LOG.debug("Preempting " + application.getPreemptionContainers().size()
          + " container(s)");
    }
    
    Set<ContainerId> preemptionContainerIds = new HashSet<ContainerId>();
    for (RMContainer container : application.getPreemptionContainers()) {
      preemptionContainerIds.add(container.getContainerId());
    }

    application.updateBlacklist(blacklistAdditions, blacklistRemovals);
    ContainersAndNMTokensAllocation allocation =
        application.pullNewlyAllocatedContainersAndNMTokens();
    Resource headroom = application.getHeadroom();
    application.setApplicationHeadroomForMetrics(headroom);
    return new Allocation(allocation.getContainerList(), headroom,
        preemptionContainerIds, null, null, allocation.getNMTokenList());
  }
}
 
Example #27
Source File: CapacityScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
private synchronized void removeNode(RMNode nodeInfo) {
  // update this node to node label manager
  if (labelManager != null) {
    labelManager.deactivateNode(nodeInfo.getNodeID());
  }
  
  FiCaSchedulerNode node = nodes.get(nodeInfo.getNodeID());
  if (node == null) {
    return;
  }
  Resources.subtractFrom(clusterResource, node.getRMNode().getTotalCapability());
  root.updateClusterResource(clusterResource, new ResourceLimits(
      clusterResource));
  int numNodes = numNodeManagers.decrementAndGet();

  if (scheduleAsynchronously && numNodes == 0) {
    asyncSchedulerThread.suspendSchedule();
  }
  
  // Remove running containers
  List<RMContainer> runningContainers = node.getRunningContainers();
  for (RMContainer container : runningContainers) {
    completedContainer(container, 
        SchedulerUtils.createAbnormalContainerStatus(
            container.getContainerId(), 
            SchedulerUtils.LOST_CONTAINER), 
        RMContainerEventType.KILL);
  }
  
  // Remove reservations, if any
  RMContainer reservedContainer = node.getReservedContainer();
  if (reservedContainer != null) {
    completedContainer(reservedContainer, 
        SchedulerUtils.createAbnormalContainerStatus(
            reservedContainer.getContainerId(), 
            SchedulerUtils.LOST_CONTAINER), 
        RMContainerEventType.KILL);
  }

  this.nodes.remove(nodeInfo.getNodeID());
  updateMaximumAllocation(node, false);

  LOG.info("Removed node " + nodeInfo.getNodeAddress() + 
      " clusterResource: " + clusterResource);
}
 
Example #28
Source File: CapacityScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
@Lock(Lock.NoLock.class)
public Allocation allocate(ApplicationAttemptId applicationAttemptId,
    List<ResourceRequest> ask, List<ContainerId> release, 
    List<String> blacklistAdditions, List<String> blacklistRemovals) {

  FiCaSchedulerApp application = getApplicationAttempt(applicationAttemptId);
  if (application == null) {
    LOG.info("Calling allocate on removed " +
        "or non existant application " + applicationAttemptId);
    return EMPTY_ALLOCATION;
  }
  
  // Sanity check
  SchedulerUtils.normalizeRequests(
      ask, getResourceCalculator(), getClusterResource(),
      getMinimumResourceCapability(), getMaximumResourceCapability());

  // Release containers
 
  releaseContainers(release, application);

  synchronized (application) {

    // make sure we aren't stopping/removing the application
    // when the allocate comes in
    if (application.isStopped()) {
      LOG.info("Calling allocate on a stopped " +
          "application " + applicationAttemptId);
      return EMPTY_ALLOCATION;
    }

    if (!ask.isEmpty()) {

      if(LOG.isDebugEnabled()) {
        LOG.debug("allocate: pre-update" +
          " applicationAttemptId=" + applicationAttemptId + 
          " application=" + application);
      }
      application.showRequests();

      // Update application requests
      application.updateResourceRequests(ask);

      LOG.debug("allocate: post-update");
      application.showRequests();
    }

    if(LOG.isDebugEnabled()) {
      LOG.debug("allocate:" +
        " applicationAttemptId=" + applicationAttemptId + 
        " #ask=" + ask.size());
    }

    application.updateBlacklist(blacklistAdditions, blacklistRemovals);

    return application.getAllocation(getResourceCalculator(),
                 clusterResource, getMinimumResourceCapability());
  }
}
 
Example #29
Source File: FairScheduler.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void handle(SchedulerEvent event) {
  switch (event.getType()) {
  case NODE_ADDED:
    if (!(event instanceof NodeAddedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
    addNode(nodeAddedEvent.getAddedRMNode());
    recoverContainersOnNode(nodeAddedEvent.getContainerReports(),
        nodeAddedEvent.getAddedRMNode());
    break;
  case NODE_REMOVED:
    if (!(event instanceof NodeRemovedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event;
    removeNode(nodeRemovedEvent.getRemovedRMNode());
    break;
  case NODE_UPDATE:
    if (!(event instanceof NodeUpdateSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
    nodeUpdate(nodeUpdatedEvent.getRMNode());
    break;
  case APP_ADDED:
    if (!(event instanceof AppAddedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
    String queueName =
        resolveReservationQueueName(appAddedEvent.getQueue(),
            appAddedEvent.getApplicationId(),
            appAddedEvent.getReservationID());
    if (queueName != null) {
      addApplication(appAddedEvent.getApplicationId(),
          queueName, appAddedEvent.getUser(),
          appAddedEvent.getIsAppRecovering());
    }
    break;
  case APP_REMOVED:
    if (!(event instanceof AppRemovedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event;
    removeApplication(appRemovedEvent.getApplicationID(),
      appRemovedEvent.getFinalState());
    break;
  case NODE_RESOURCE_UPDATE:
    if (!(event instanceof NodeResourceUpdateSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = 
        (NodeResourceUpdateSchedulerEvent)event;
    updateNodeResource(nodeResourceUpdatedEvent.getRMNode(),
          nodeResourceUpdatedEvent.getResourceOption());
    break;
  case APP_ATTEMPT_ADDED:
    if (!(event instanceof AppAttemptAddedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
        (AppAttemptAddedSchedulerEvent) event;
    addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
      appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
      appAttemptAddedEvent.getIsAttemptRecovering());
    break;
  case APP_ATTEMPT_REMOVED:
    if (!(event instanceof AppAttemptRemovedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
        (AppAttemptRemovedSchedulerEvent) event;
    removeApplicationAttempt(
        appAttemptRemovedEvent.getApplicationAttemptID(),
        appAttemptRemovedEvent.getFinalAttemptState(),
        appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
    break;
  case CONTAINER_EXPIRED:
    if (!(event instanceof ContainerExpiredSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    ContainerExpiredSchedulerEvent containerExpiredEvent =
        (ContainerExpiredSchedulerEvent)event;
    ContainerId containerId = containerExpiredEvent.getContainerId();
    completedContainer(getRMContainer(containerId),
        SchedulerUtils.createAbnormalContainerStatus(
            containerId,
            SchedulerUtils.EXPIRED_CONTAINER),
        RMContainerEventType.EXPIRE);
    break;
  default:
    LOG.error("Unknown event arrived at FairScheduler: " + event.toString());
  }
}
 
Example #30
Source File: AbstractCSQueue.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public boolean hasAccess(QueueACL acl, UserGroupInformation user) {
  return authorizer.checkPermission(SchedulerUtils.toAccessType(acl),
    queueEntity, user);
}