org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent Java Examples

The following examples show how to use org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.NodeRemovedSchedulerEvent. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RMNodeImpl.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
  // Inform the scheduler
  rmNode.nodeUpdateQueue.clear();
  // If the current state is NodeState.UNHEALTHY
  // Then node is already been removed from the
  // Scheduler
  NodeState initialState = rmNode.getState();
  if (!initialState.equals(NodeState.UNHEALTHY)) {
    rmNode.context.getDispatcher().getEventHandler()
      .handle(new NodeRemovedSchedulerEvent(rmNode));
  }
  rmNode.context.getDispatcher().getEventHandler().handle(
      new NodesListManagerEvent(
          NodesListManagerEventType.NODE_UNUSABLE, rmNode));

  // Deactivate the node
  rmNode.context.getRMNodes().remove(rmNode.nodeId);
  LOG.info("Deactivating Node " + rmNode.nodeId + " as it is now "
      + finalState);
  rmNode.context.getInactiveRMNodes().put(rmNode.nodeId.getHost(), rmNode);

  //Update the metrics
  rmNode.updateMetricsForDeactivatedNode(initialState, finalState);
}
 
Example #2
Source File: LeastAMNodesFirstPolicy.java    From incubator-myriad with Apache License 2.0 6 votes vote down vote up
@Override
public void afterSchedulerEventHandled(SchedulerEvent event) {

  try {
    switch (event.getType()) {
      case NODE_UPDATE:
        onNodeUpdated((NodeUpdateSchedulerEvent) event);
        break;

      case NODE_REMOVED:
        onNodeRemoved((NodeRemovedSchedulerEvent) event);
        break;

      default:
        break;
    }
  } catch (ClassCastException e) {
    LOGGER.error("incorrect event object", e);
  }
}
 
Example #3
Source File: TestFairScheduler.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testAggregateCapacityTracking() throws Exception {
  scheduler.init(conf);
  scheduler.start();
  scheduler.reinitialize(conf, resourceManager.getRMContext());

  // Add a node
  RMNode node1 =
      MockNodes
          .newNodeInfo(1, Resources.createResource(1024), 1, "127.0.0.1");
  NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
  scheduler.handle(nodeEvent1);
  assertEquals(1024, scheduler.getClusterResource().getMemory());

  // Add another node
  RMNode node2 =
      MockNodes.newNodeInfo(1, Resources.createResource(512), 2, "127.0.0.2");
  NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
  scheduler.handle(nodeEvent2);
  assertEquals(1536, scheduler.getClusterResource().getMemory());

  // Remove the first node
  NodeRemovedSchedulerEvent nodeEvent3 = new NodeRemovedSchedulerEvent(node1);
  scheduler.handle(nodeEvent3);
  assertEquals(512, scheduler.getClusterResource().getMemory());
}
 
Example #4
Source File: RMNodeImpl.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
  // Inform the scheduler
  rmNode.nodeUpdateQueue.clear();
  // If the current state is NodeState.UNHEALTHY
  // Then node is already been removed from the
  // Scheduler
  NodeState initialState = rmNode.getState();
  if (!initialState.equals(NodeState.UNHEALTHY)) {
    rmNode.context.getDispatcher().getEventHandler()
      .handle(new NodeRemovedSchedulerEvent(rmNode));
  }
  rmNode.context.getDispatcher().getEventHandler().handle(
      new NodesListManagerEvent(
          NodesListManagerEventType.NODE_UNUSABLE, rmNode));

  // Deactivate the node
  rmNode.context.getRMNodes().remove(rmNode.nodeId);
  LOG.info("Deactivating Node " + rmNode.nodeId + " as it is now "
      + finalState);
  rmNode.context.getInactiveRMNodes().put(rmNode.nodeId.getHost(), rmNode);

  //Update the metrics
  rmNode.updateMetricsForDeactivatedNode(initialState, finalState);
}
 
Example #5
Source File: TestFairScheduler.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testAggregateCapacityTracking() throws Exception {
  scheduler.init(conf);
  scheduler.start();
  scheduler.reinitialize(conf, resourceManager.getRMContext());

  // Add a node
  RMNode node1 =
      MockNodes
          .newNodeInfo(1, Resources.createResource(1024), 1, "127.0.0.1");
  NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
  scheduler.handle(nodeEvent1);
  assertEquals(1024, scheduler.getClusterResource().getMemory());

  // Add another node
  RMNode node2 =
      MockNodes.newNodeInfo(1, Resources.createResource(512), 2, "127.0.0.2");
  NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
  scheduler.handle(nodeEvent2);
  assertEquals(1536, scheduler.getClusterResource().getMemory());

  // Remove the first node
  NodeRemovedSchedulerEvent nodeEvent3 = new NodeRemovedSchedulerEvent(node1);
  scheduler.handle(nodeEvent3);
  assertEquals(512, scheduler.getClusterResource().getMemory());
}
 
Example #6
Source File: TestRMNodeTransitions.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnhealthyExpireForSchedulerRemove() {
  RMNodeImpl node = getUnhealthyNode();
  verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
  node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
  verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
  Assert.assertEquals(NodeState.LOST, node.getState());
}
 
Example #7
Source File: TestFairScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testContinuousSchedulingWithNodeRemoved() throws Exception {
  // Disable continuous scheduling, will invoke continuous scheduling once manually
  scheduler.init(conf);
  scheduler.start();
  Assert.assertTrue("Continuous scheduling should be disabled.",
      !scheduler.isContinuousSchedulingEnabled());

  // Add two nodes
  RMNode node1 =
      MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 1,
          "127.0.0.1");
  NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
  scheduler.handle(nodeEvent1);
  RMNode node2 =
      MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8), 2,
          "127.0.0.2");
  NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
  scheduler.handle(nodeEvent2);
  Assert.assertEquals("We should have two alive nodes.",
      2, scheduler.getNumClusterNodes());

  // Remove one node
  NodeRemovedSchedulerEvent removeNode1 = new NodeRemovedSchedulerEvent(node1);
  scheduler.handle(removeNode1);
  Assert.assertEquals("We should only have one alive node.",
      1, scheduler.getNumClusterNodes());

  // Invoke the continuous scheduling once
  try {
    scheduler.continuousSchedulingAttempt();
  } catch (Exception e) {
    fail("Exception happened when doing continuous scheduling. " +
      e.toString());
  }
}
 
Example #8
Source File: TestFairScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
  public void testRemoveNodeUpdatesRootQueueMetrics() throws IOException {
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());

    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
	assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024, 4), 1,
        "127.0.0.1");
    NodeAddedSchedulerEvent addEvent = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(addEvent);
    
    assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(4, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    scheduler.update(); // update shouldn't change things
    assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(4, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    
    NodeRemovedSchedulerEvent removeEvent = new NodeRemovedSchedulerEvent(node1);
    scheduler.handle(removeEvent);
    
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    scheduler.update(); // update shouldn't change things
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
}
 
Example #9
Source File: TestFifoScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test (timeout = 50000)
public void testReconnectedNode() throws Exception {
  CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
  conf.setQueues("default", new String[] {"default"});
  conf.setCapacity("default", 100);
  FifoScheduler fs = new FifoScheduler();
  fs.init(conf);
  fs.start();
  // mock rmContext to avoid NPE.
  RMContext context = mock(RMContext.class);
  fs.reinitialize(conf, null);
  fs.setRMContext(context);

  RMNode n1 =
      MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, "127.0.0.2");
  RMNode n2 =
      MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 2, "127.0.0.3");

  fs.handle(new NodeAddedSchedulerEvent(n1));
  fs.handle(new NodeAddedSchedulerEvent(n2));
  fs.handle(new NodeUpdateSchedulerEvent(n1));
  Assert.assertEquals(6 * GB, fs.getRootQueueMetrics().getAvailableMB());

  // reconnect n1 with downgraded memory
  n1 =
      MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 1, "127.0.0.2");
  fs.handle(new NodeRemovedSchedulerEvent(n1));
  fs.handle(new NodeAddedSchedulerEvent(n1));
  fs.handle(new NodeUpdateSchedulerEvent(n1));

  Assert.assertEquals(4 * GB, fs.getRootQueueMetrics().getAvailableMB());
  fs.stop();
}
 
Example #10
Source File: TestFifoScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test (timeout = 50000)
public void testReconnectedNode() throws Exception {
  CapacitySchedulerConfiguration conf = new CapacitySchedulerConfiguration();
  conf.setQueues("default", new String[] {"default"});
  conf.setCapacity("default", 100);
  FifoScheduler fs = new FifoScheduler();
  fs.init(conf);
  fs.start();
  // mock rmContext to avoid NPE.
  RMContext context = mock(RMContext.class);
  fs.reinitialize(conf, null);
  fs.setRMContext(context);

  RMNode n1 =
      MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1, "127.0.0.2");
  RMNode n2 =
      MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 2, "127.0.0.3");

  fs.handle(new NodeAddedSchedulerEvent(n1));
  fs.handle(new NodeAddedSchedulerEvent(n2));
  fs.handle(new NodeUpdateSchedulerEvent(n1));
  Assert.assertEquals(6 * GB, fs.getRootQueueMetrics().getAvailableMB());

  // reconnect n1 with downgraded memory
  n1 =
      MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 1, "127.0.0.2");
  fs.handle(new NodeRemovedSchedulerEvent(n1));
  fs.handle(new NodeAddedSchedulerEvent(n1));
  fs.handle(new NodeUpdateSchedulerEvent(n1));

  Assert.assertEquals(4 * GB, fs.getRootQueueMetrics().getAvailableMB());
  fs.stop();
}
 
Example #11
Source File: TestCapacityScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testReconnectedNode() throws Exception {
  CapacitySchedulerConfiguration csConf =
      new CapacitySchedulerConfiguration();
  setupQueueConfiguration(csConf);
  CapacityScheduler cs = new CapacityScheduler();
  cs.setConf(new YarnConfiguration());
  cs.setRMContext(resourceManager.getRMContext());
  cs.init(csConf);
  cs.start();
  cs.reinitialize(csConf, new RMContextImpl(null, null, null, null,
    null, null, new RMContainerTokenSecretManager(csConf),
    new NMTokenSecretManagerInRM(csConf),
    new ClientToAMTokenSecretManagerInRM(), null));

  RMNode n1 = MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1);
  RMNode n2 = MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 2);

  cs.handle(new NodeAddedSchedulerEvent(n1));
  cs.handle(new NodeAddedSchedulerEvent(n2));

  Assert.assertEquals(6 * GB, cs.getClusterResource().getMemory());

  // reconnect n1 with downgraded memory
  n1 = MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 1);
  cs.handle(new NodeRemovedSchedulerEvent(n1));
  cs.handle(new NodeAddedSchedulerEvent(n1));

  Assert.assertEquals(4 * GB, cs.getClusterResource().getMemory());
  cs.stop();
}
 
Example #12
Source File: TestCapacityScheduler.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testNumClusterNodes() throws Exception {
  YarnConfiguration conf = new YarnConfiguration();
  CapacityScheduler cs = new CapacityScheduler();
  cs.setConf(conf);
  RMContext rmContext = TestUtils.getMockRMContext();
  cs.setRMContext(rmContext);
  CapacitySchedulerConfiguration csConf =
      new CapacitySchedulerConfiguration();
  setupQueueConfiguration(csConf);
  cs.init(csConf);
  cs.start();
  assertEquals(0, cs.getNumClusterNodes());

  RMNode n1 = MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1);
  RMNode n2 = MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 2);
  cs.handle(new NodeAddedSchedulerEvent(n1));
  cs.handle(new NodeAddedSchedulerEvent(n2));
  assertEquals(2, cs.getNumClusterNodes());

  cs.handle(new NodeRemovedSchedulerEvent(n1));
  assertEquals(1, cs.getNumClusterNodes());
  cs.handle(new NodeAddedSchedulerEvent(n1));
  assertEquals(2, cs.getNumClusterNodes());
  cs.handle(new NodeRemovedSchedulerEvent(n2));
  cs.handle(new NodeRemovedSchedulerEvent(n1));
  assertEquals(0, cs.getNumClusterNodes());

  cs.stop();
}
 
Example #13
Source File: TestRMNodeTransitions.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnhealthyExpireForSchedulerRemove() {
  RMNodeImpl node = getUnhealthyNode();
  verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
  node.handle(new RMNodeEvent(node.getNodeID(), RMNodeEventType.EXPIRE));
  verify(scheduler,times(2)).handle(any(NodeRemovedSchedulerEvent.class));
  Assert.assertEquals(NodeState.LOST, node.getState());
}
 
Example #14
Source File: TestCapacityScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testNumClusterNodes() throws Exception {
  YarnConfiguration conf = new YarnConfiguration();
  CapacityScheduler cs = new CapacityScheduler();
  cs.setConf(conf);
  RMContext rmContext = TestUtils.getMockRMContext();
  cs.setRMContext(rmContext);
  CapacitySchedulerConfiguration csConf =
      new CapacitySchedulerConfiguration();
  setupQueueConfiguration(csConf);
  cs.init(csConf);
  cs.start();
  assertEquals(0, cs.getNumClusterNodes());

  RMNode n1 = MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1);
  RMNode n2 = MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 2);
  cs.handle(new NodeAddedSchedulerEvent(n1));
  cs.handle(new NodeAddedSchedulerEvent(n2));
  assertEquals(2, cs.getNumClusterNodes());

  cs.handle(new NodeRemovedSchedulerEvent(n1));
  assertEquals(1, cs.getNumClusterNodes());
  cs.handle(new NodeAddedSchedulerEvent(n1));
  assertEquals(2, cs.getNumClusterNodes());
  cs.handle(new NodeRemovedSchedulerEvent(n2));
  cs.handle(new NodeRemovedSchedulerEvent(n1));
  assertEquals(0, cs.getNumClusterNodes());

  cs.stop();
}
 
Example #15
Source File: TestCapacityScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testReconnectedNode() throws Exception {
  CapacitySchedulerConfiguration csConf =
      new CapacitySchedulerConfiguration();
  setupQueueConfiguration(csConf);
  CapacityScheduler cs = new CapacityScheduler();
  cs.setConf(new YarnConfiguration());
  cs.setRMContext(resourceManager.getRMContext());
  cs.init(csConf);
  cs.start();
  cs.reinitialize(csConf, new RMContextImpl(null, null, null, null,
    null, null, new RMContainerTokenSecretManager(csConf),
    new NMTokenSecretManagerInRM(csConf),
    new ClientToAMTokenSecretManagerInRM(), null));

  RMNode n1 = MockNodes.newNodeInfo(0, MockNodes.newResource(4 * GB), 1);
  RMNode n2 = MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 2);

  cs.handle(new NodeAddedSchedulerEvent(n1));
  cs.handle(new NodeAddedSchedulerEvent(n2));

  Assert.assertEquals(6 * GB, cs.getClusterResource().getMemory());

  // reconnect n1 with downgraded memory
  n1 = MockNodes.newNodeInfo(0, MockNodes.newResource(2 * GB), 1);
  cs.handle(new NodeRemovedSchedulerEvent(n1));
  cs.handle(new NodeAddedSchedulerEvent(n1));

  Assert.assertEquals(4 * GB, cs.getClusterResource().getMemory());
  cs.stop();
}
 
Example #16
Source File: TestFairScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testContinuousSchedulingWithNodeRemoved() throws Exception {
  // Disable continuous scheduling, will invoke continuous scheduling once manually
  scheduler.init(conf);
  scheduler.start();
  Assert.assertTrue("Continuous scheduling should be disabled.",
      !scheduler.isContinuousSchedulingEnabled());

  // Add two nodes
  RMNode node1 =
      MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 1,
          "127.0.0.1");
  NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
  scheduler.handle(nodeEvent1);
  RMNode node2 =
      MockNodes.newNodeInfo(1, Resources.createResource(8 * 1024, 8, 8), 2,
          "127.0.0.2");
  NodeAddedSchedulerEvent nodeEvent2 = new NodeAddedSchedulerEvent(node2);
  scheduler.handle(nodeEvent2);
  Assert.assertEquals("We should have two alive nodes.",
      2, scheduler.getNumClusterNodes());

  // Remove one node
  NodeRemovedSchedulerEvent removeNode1 = new NodeRemovedSchedulerEvent(node1);
  scheduler.handle(removeNode1);
  Assert.assertEquals("We should only have one alive node.",
      1, scheduler.getNumClusterNodes());

  // Invoke the continuous scheduling once
  try {
    scheduler.continuousSchedulingAttempt();
  } catch (Exception e) {
    fail("Exception happened when doing continuous scheduling. " +
      e.toString());
  }
}
 
Example #17
Source File: TestFairScheduler.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
  public void testRemoveNodeUpdatesRootQueueMetrics() throws IOException {
    scheduler.init(conf);
    scheduler.start();
    scheduler.reinitialize(conf, resourceManager.getRMContext());

    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
	assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    
    RMNode node1 = MockNodes.newNodeInfo(1, Resources.createResource(1024, 4, 4), 1,
        "127.0.0.1");
    NodeAddedSchedulerEvent addEvent = new NodeAddedSchedulerEvent(node1);
    scheduler.handle(addEvent);
    
    assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(4, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    scheduler.update(); // update shouldn't change things
    assertEquals(1024, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(4, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    
    NodeRemovedSchedulerEvent removeEvent = new NodeRemovedSchedulerEvent(node1);
    scheduler.handle(removeEvent);
    
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
    scheduler.update(); // update shouldn't change things
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableMB());
    assertEquals(0, scheduler.getRootQueueMetrics().getAvailableVirtualCores());
}
 
Example #18
Source File: CompositeInterceptor.java    From incubator-myriad with Apache License 2.0 5 votes vote down vote up
private NodeId getNodeIdForSchedulerEvent(SchedulerEvent event) {
  switch (event.getType()) {
    case NODE_ADDED:
      return ((NodeAddedSchedulerEvent) event).getAddedRMNode().getNodeID();
    case NODE_REMOVED:
      return ((NodeRemovedSchedulerEvent) event).getRemovedRMNode().getNodeID();
    case NODE_UPDATE:
      return ((NodeUpdateSchedulerEvent) event).getRMNode().getNodeID();
    case NODE_RESOURCE_UPDATE:
      return ((NodeResourceUpdateSchedulerEvent) event).getRMNode().getNodeID();
  }
  return null;
}
 
Example #19
Source File: RMNodeImpl.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
  RMNodeReconnectEvent reconnectEvent = (RMNodeReconnectEvent) event;
  RMNode newNode = reconnectEvent.getReconnectedNode();
  rmNode.nodeManagerVersion = newNode.getNodeManagerVersion();
  List<ApplicationId> runningApps = reconnectEvent.getRunningApplications();
  boolean noRunningApps = 
      (runningApps == null) || (runningApps.size() == 0);
  
  // No application running on the node, so send node-removal event with 
  // cleaning up old container info.
  if (noRunningApps) {
    rmNode.nodeUpdateQueue.clear();
    rmNode.context.getDispatcher().getEventHandler().handle(
        new NodeRemovedSchedulerEvent(rmNode));

    if (rmNode.getHttpPort() == newNode.getHttpPort()) {
      // Reset heartbeat ID since node just restarted.
      rmNode.getLastNodeHeartBeatResponse().setResponseId(0);
      if (rmNode.getState().equals(NodeState.RUNNING)) {
        // Only add new node if old state is RUNNING
        rmNode.context.getDispatcher().getEventHandler().handle(
            new NodeAddedSchedulerEvent(newNode));
      }
    } else {
      // Reconnected node differs, so replace old node and start new node
      switch (rmNode.getState()) {
        case RUNNING:
          ClusterMetrics.getMetrics().decrNumActiveNodes();
          break;
        case UNHEALTHY:
          ClusterMetrics.getMetrics().decrNumUnhealthyNMs();
          break;
        default:
          LOG.debug("Unexpected Rmnode state");
        }
        rmNode.context.getRMNodes().put(newNode.getNodeID(), newNode);
        rmNode.context.getDispatcher().getEventHandler().handle(
            new RMNodeStartedEvent(newNode.getNodeID(), null, null));
    }
  } else {
    rmNode.httpPort = newNode.getHttpPort();
    rmNode.httpAddress = newNode.getHttpAddress();
    boolean isCapabilityChanged = false;
    if (rmNode.getTotalCapability() != newNode.getTotalCapability()) {
      rmNode.totalCapability = newNode.getTotalCapability();
      isCapabilityChanged = true;
    }
  
    handleNMContainerStatus(reconnectEvent.getNMContainerStatuses(), rmNode);

    // Reset heartbeat ID since node just restarted.
    rmNode.getLastNodeHeartBeatResponse().setResponseId(0);

    for (ApplicationId appId : reconnectEvent.getRunningApplications()) {
      handleRunningAppOnNode(rmNode, rmNode.context, appId, rmNode.nodeId);
    }

    if (isCapabilityChanged
        && rmNode.getState().equals(NodeState.RUNNING)) {
      // Update scheduler node's capacity for reconnect node.
      rmNode.context
          .getDispatcher()
          .getEventHandler()
          .handle(
              new NodeResourceUpdateSchedulerEvent(rmNode, ResourceOption
                  .newInstance(newNode.getTotalCapability(), -1)));
    }
  }
}
 
Example #20
Source File: FairScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void handle(SchedulerEvent event) {
  switch (event.getType()) {
  case NODE_ADDED:
    if (!(event instanceof NodeAddedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
    addNode(nodeAddedEvent.getAddedRMNode());
    recoverContainersOnNode(nodeAddedEvent.getContainerReports(),
        nodeAddedEvent.getAddedRMNode());
    break;
  case NODE_REMOVED:
    if (!(event instanceof NodeRemovedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event;
    removeNode(nodeRemovedEvent.getRemovedRMNode());
    break;
  case NODE_UPDATE:
    if (!(event instanceof NodeUpdateSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeUpdateSchedulerEvent nodeUpdatedEvent = (NodeUpdateSchedulerEvent)event;
    nodeUpdate(nodeUpdatedEvent.getRMNode());
    break;
  case APP_ADDED:
    if (!(event instanceof AppAddedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
    String queueName =
        resolveReservationQueueName(appAddedEvent.getQueue(),
            appAddedEvent.getApplicationId(),
            appAddedEvent.getReservationID());
    if (queueName != null) {
      addApplication(appAddedEvent.getApplicationId(),
          queueName, appAddedEvent.getUser(),
          appAddedEvent.getIsAppRecovering());
    }
    break;
  case APP_REMOVED:
    if (!(event instanceof AppRemovedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event;
    removeApplication(appRemovedEvent.getApplicationID(),
      appRemovedEvent.getFinalState());
    break;
  case NODE_RESOURCE_UPDATE:
    if (!(event instanceof NodeResourceUpdateSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = 
        (NodeResourceUpdateSchedulerEvent)event;
    updateNodeResource(nodeResourceUpdatedEvent.getRMNode(),
          nodeResourceUpdatedEvent.getResourceOption());
    break;
  case APP_ATTEMPT_ADDED:
    if (!(event instanceof AppAttemptAddedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
        (AppAttemptAddedSchedulerEvent) event;
    addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
      appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
      appAttemptAddedEvent.getIsAttemptRecovering());
    break;
  case APP_ATTEMPT_REMOVED:
    if (!(event instanceof AppAttemptRemovedSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
        (AppAttemptRemovedSchedulerEvent) event;
    removeApplicationAttempt(
        appAttemptRemovedEvent.getApplicationAttemptID(),
        appAttemptRemovedEvent.getFinalAttemptState(),
        appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
    break;
  case CONTAINER_EXPIRED:
    if (!(event instanceof ContainerExpiredSchedulerEvent)) {
      throw new RuntimeException("Unexpected event type: " + event);
    }
    ContainerExpiredSchedulerEvent containerExpiredEvent =
        (ContainerExpiredSchedulerEvent)event;
    ContainerId containerId = containerExpiredEvent.getContainerId();
    completedContainer(getRMContainer(containerId),
        SchedulerUtils.createAbnormalContainerStatus(
            containerId,
            SchedulerUtils.EXPIRED_CONTAINER),
        RMContainerEventType.EXPIRE);
    break;
  default:
    LOG.error("Unknown event arrived at FairScheduler: " + event.toString());
  }
}
 
Example #21
Source File: TestAbstractYarnScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void testMaximumAllocationVCoresHelper(
    AbstractYarnScheduler scheduler,
    final int node1MaxVCores, final int node2MaxVCores,
    final int node3MaxVCores, final int... expectedMaxVCores)
    throws Exception {
  Assert.assertEquals(6, expectedMaxVCores.length);

  Assert.assertEquals(0, scheduler.getNumClusterNodes());
  int maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[0], maxVCores);

  RMNode node1 = MockNodes.newNodeInfo(
      0, Resources.createResource(1024, node1MaxVCores), 1, "127.0.0.2");
  scheduler.handle(new NodeAddedSchedulerEvent(node1));
  Assert.assertEquals(1, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[1], maxVCores);

  scheduler.handle(new NodeRemovedSchedulerEvent(node1));
  Assert.assertEquals(0, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[2], maxVCores);

  RMNode node2 = MockNodes.newNodeInfo(
      0, Resources.createResource(1024, node2MaxVCores), 2, "127.0.0.3");
  scheduler.handle(new NodeAddedSchedulerEvent(node2));
  Assert.assertEquals(1, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[3], maxVCores);

  RMNode node3 = MockNodes.newNodeInfo(
      0, Resources.createResource(1024, node3MaxVCores), 3, "127.0.0.4");
  scheduler.handle(new NodeAddedSchedulerEvent(node3));
  Assert.assertEquals(2, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[4], maxVCores);

  scheduler.handle(new NodeRemovedSchedulerEvent(node3));
  Assert.assertEquals(1, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[5], maxVCores);

  scheduler.handle(new NodeRemovedSchedulerEvent(node2));
  Assert.assertEquals(0, scheduler.getNumClusterNodes());
}
 
Example #22
Source File: TestAbstractYarnScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void testMaximumAllocationMemoryHelper(
     AbstractYarnScheduler scheduler,
     final int node1MaxMemory, final int node2MaxMemory,
     final int node3MaxMemory, final int... expectedMaxMemory)
     throws Exception {
  Assert.assertEquals(6, expectedMaxMemory.length);

  Assert.assertEquals(0, scheduler.getNumClusterNodes());
  int maxMemory = scheduler.getMaximumResourceCapability().getMemory();
  Assert.assertEquals(expectedMaxMemory[0], maxMemory);

  RMNode node1 = MockNodes.newNodeInfo(
      0, Resources.createResource(node1MaxMemory), 1, "127.0.0.2");
  scheduler.handle(new NodeAddedSchedulerEvent(node1));
  Assert.assertEquals(1, scheduler.getNumClusterNodes());
  maxMemory = scheduler.getMaximumResourceCapability().getMemory();
  Assert.assertEquals(expectedMaxMemory[1], maxMemory);

  scheduler.handle(new NodeRemovedSchedulerEvent(node1));
  Assert.assertEquals(0, scheduler.getNumClusterNodes());
  maxMemory = scheduler.getMaximumResourceCapability().getMemory();
  Assert.assertEquals(expectedMaxMemory[2], maxMemory);

  RMNode node2 = MockNodes.newNodeInfo(
      0, Resources.createResource(node2MaxMemory), 2, "127.0.0.3");
  scheduler.handle(new NodeAddedSchedulerEvent(node2));
  Assert.assertEquals(1, scheduler.getNumClusterNodes());
  maxMemory = scheduler.getMaximumResourceCapability().getMemory();
  Assert.assertEquals(expectedMaxMemory[3], maxMemory);

  RMNode node3 = MockNodes.newNodeInfo(
      0, Resources.createResource(node3MaxMemory), 3, "127.0.0.4");
  scheduler.handle(new NodeAddedSchedulerEvent(node3));
  Assert.assertEquals(2, scheduler.getNumClusterNodes());
  maxMemory = scheduler.getMaximumResourceCapability().getMemory();
  Assert.assertEquals(expectedMaxMemory[4], maxMemory);

  scheduler.handle(new NodeRemovedSchedulerEvent(node3));
  Assert.assertEquals(1, scheduler.getNumClusterNodes());
  maxMemory = scheduler.getMaximumResourceCapability().getMemory();
  Assert.assertEquals(expectedMaxMemory[5], maxMemory);

  scheduler.handle(new NodeRemovedSchedulerEvent(node2));
  Assert.assertEquals(0, scheduler.getNumClusterNodes());
}
 
Example #23
Source File: LeastAMNodesFirstPolicy.java    From incubator-myriad with Apache License 2.0 4 votes vote down vote up
private void onNodeRemoved(NodeRemovedSchedulerEvent event) {
  SchedulerNode schedulerNode = schedulerNodes.get(event.getRemovedRMNode().getNodeID().getHost());
  if (schedulerNode != null && schedulerNode.getNodeID().equals(event.getRemovedRMNode().getNodeID())) {
    schedulerNodes.remove(schedulerNode.getNodeID().getHost());
  }
}
 
Example #24
Source File: TestFairScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testSchedulingOnRemovedNode() throws Exception {
  // Disable continuous scheduling, will invoke continuous scheduling manually
  scheduler.init(conf);
  scheduler.start();
  Assert.assertTrue("Continuous scheduling should be disabled.",
      !scheduler.isContinuousSchedulingEnabled());

  ApplicationAttemptId id11 = createAppAttemptId(1, 1);
  createMockRMApp(id11);

  scheduler.addApplication(id11.getApplicationId(), "root.queue1", "user1",
      false);
  scheduler.addApplicationAttempt(id11, false, false);

  List<ResourceRequest> ask1 = new ArrayList<>();
  ResourceRequest request1 =
      createResourceRequest(1024, 8, ResourceRequest.ANY, 1, 1, true);

  ask1.add(request1);
  scheduler.allocate(id11, ask1, new ArrayList<ContainerId>(), null,
      null);

  String hostName = "127.0.0.1";
  RMNode node1 = MockNodes.newNodeInfo(1,
    Resources.createResource(8 * 1024, 8), 1, hostName);
  NodeAddedSchedulerEvent nodeEvent1 = new NodeAddedSchedulerEvent(node1);
  scheduler.handle(nodeEvent1);

  FSSchedulerNode node = (FSSchedulerNode)scheduler.getSchedulerNode(
    node1.getNodeID());

  NodeRemovedSchedulerEvent removeNode1 =
      new NodeRemovedSchedulerEvent(node1);
  scheduler.handle(removeNode1);

  scheduler.attemptScheduling(node);

  AppAttemptRemovedSchedulerEvent appRemovedEvent1 =
      new AppAttemptRemovedSchedulerEvent(id11,
          RMAppAttemptState.FINISHED, false);
  scheduler.handle(appRemovedEvent1);
}
 
Example #25
Source File: RMNodeImpl.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) {

  RMNodeStatusEvent statusEvent = (RMNodeStatusEvent) event;

  // Switch the last heartbeatresponse.
  rmNode.latestNodeHeartBeatResponse = statusEvent.getLatestResponse();

  NodeHealthStatus remoteNodeHealthStatus = 
      statusEvent.getNodeHealthStatus();
  rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport());
  rmNode.setLastHealthReportTime(
      remoteNodeHealthStatus.getLastHealthReportTime());
  if (!remoteNodeHealthStatus.getIsNodeHealthy()) {
    LOG.info("Node " + rmNode.nodeId + " reported UNHEALTHY with details: "
        + remoteNodeHealthStatus.getHealthReport());
    rmNode.nodeUpdateQueue.clear();
    // Inform the scheduler
    rmNode.context.getDispatcher().getEventHandler().handle(
        new NodeRemovedSchedulerEvent(rmNode));
    rmNode.context.getDispatcher().getEventHandler().handle(
        new NodesListManagerEvent(
            NodesListManagerEventType.NODE_UNUSABLE, rmNode));
    // Update metrics
    rmNode.updateMetricsForDeactivatedNode(rmNode.getState(),
        NodeState.UNHEALTHY);
    return NodeState.UNHEALTHY;
  }

  rmNode.handleContainerStatus(statusEvent.getContainers());

  if(rmNode.nextHeartBeat) {
    rmNode.nextHeartBeat = false;
    rmNode.context.getDispatcher().getEventHandler().handle(
        new NodeUpdateSchedulerEvent(rmNode));
  }

  // Update DTRenewer in secure mode to keep these apps alive. Today this is
  // needed for log-aggregation to finish long after the apps are gone.
  if (UserGroupInformation.isSecurityEnabled()) {
    rmNode.context.getDelegationTokenRenewer().updateKeepAliveApplications(
      statusEvent.getKeepAliveAppIds());
  }

  return NodeState.RUNNING;
}
 
Example #26
Source File: FifoScheduler.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void handle(SchedulerEvent event) {
  switch(event.getType()) {
  case NODE_ADDED:
  {
    NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
    addNode(nodeAddedEvent.getAddedRMNode());
    recoverContainersOnNode(nodeAddedEvent.getContainerReports(),
      nodeAddedEvent.getAddedRMNode());

  }
  break;
  case NODE_REMOVED:
  {
    NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event;
    removeNode(nodeRemovedEvent.getRemovedRMNode());
  }
  break;
  case NODE_RESOURCE_UPDATE:
  {
    NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = 
        (NodeResourceUpdateSchedulerEvent)event;
    updateNodeResource(nodeResourceUpdatedEvent.getRMNode(),
      nodeResourceUpdatedEvent.getResourceOption());
  }
  break;
  case NODE_UPDATE:
  {
    NodeUpdateSchedulerEvent nodeUpdatedEvent = 
    (NodeUpdateSchedulerEvent)event;
    nodeUpdate(nodeUpdatedEvent.getRMNode());
  }
  break;
  case APP_ADDED:
  {
    AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
    addApplication(appAddedEvent.getApplicationId(),
      appAddedEvent.getQueue(), appAddedEvent.getUser(),
      appAddedEvent.getIsAppRecovering());
  }
  break;
  case APP_REMOVED:
  {
    AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event;
    doneApplication(appRemovedEvent.getApplicationID(),
      appRemovedEvent.getFinalState());
  }
  break;
  case APP_ATTEMPT_ADDED:
  {
    AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
        (AppAttemptAddedSchedulerEvent) event;
    addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
      appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
      appAttemptAddedEvent.getIsAttemptRecovering());
  }
  break;
  case APP_ATTEMPT_REMOVED:
  {
    AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
        (AppAttemptRemovedSchedulerEvent) event;
    try {
      doneApplicationAttempt(
        appAttemptRemovedEvent.getApplicationAttemptID(),
        appAttemptRemovedEvent.getFinalAttemptState(),
        appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
    } catch(IOException ie) {
      LOG.error("Unable to remove application "
          + appAttemptRemovedEvent.getApplicationAttemptID(), ie);
    }
  }
  break;
  case CONTAINER_EXPIRED:
  {
    ContainerExpiredSchedulerEvent containerExpiredEvent = 
        (ContainerExpiredSchedulerEvent) event;
    ContainerId containerid = containerExpiredEvent.getContainerId();
    completedContainer(getRMContainer(containerid), 
        SchedulerUtils.createAbnormalContainerStatus(
            containerid, 
            SchedulerUtils.EXPIRED_CONTAINER),
        RMContainerEventType.EXPIRE);
  }
  break;
  default:
    LOG.error("Invalid eventtype " + event.getType() + ". Ignoring!");
  }
}
 
Example #27
Source File: RMNodeImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) {

  RMNodeStatusEvent statusEvent = (RMNodeStatusEvent) event;

  // Switch the last heartbeatresponse.
  rmNode.latestNodeHeartBeatResponse = statusEvent.getLatestResponse();

  NodeHealthStatus remoteNodeHealthStatus = 
      statusEvent.getNodeHealthStatus();
  rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport());
  rmNode.setLastHealthReportTime(
      remoteNodeHealthStatus.getLastHealthReportTime());
  if (!remoteNodeHealthStatus.getIsNodeHealthy()) {
    LOG.info("Node " + rmNode.nodeId + " reported UNHEALTHY with details: "
        + remoteNodeHealthStatus.getHealthReport());
    rmNode.nodeUpdateQueue.clear();
    // Inform the scheduler
    rmNode.context.getDispatcher().getEventHandler().handle(
        new NodeRemovedSchedulerEvent(rmNode));
    rmNode.context.getDispatcher().getEventHandler().handle(
        new NodesListManagerEvent(
            NodesListManagerEventType.NODE_UNUSABLE, rmNode));
    // Update metrics
    rmNode.updateMetricsForDeactivatedNode(rmNode.getState(),
        NodeState.UNHEALTHY);
    return NodeState.UNHEALTHY;
  }

  rmNode.handleContainerStatus(statusEvent.getContainers());

  if(rmNode.nextHeartBeat) {
    rmNode.nextHeartBeat = false;
    rmNode.context.getDispatcher().getEventHandler().handle(
        new NodeUpdateSchedulerEvent(rmNode));
  }

  // Update DTRenewer in secure mode to keep these apps alive. Today this is
  // needed for log-aggregation to finish long after the apps are gone.
  if (UserGroupInformation.isSecurityEnabled()) {
    rmNode.context.getDelegationTokenRenewer().updateKeepAliveApplications(
      statusEvent.getKeepAliveAppIds());
  }

  return NodeState.RUNNING;
}
 
Example #28
Source File: FifoScheduler.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void handle(SchedulerEvent event) {
  switch(event.getType()) {
  case NODE_ADDED:
  {
    NodeAddedSchedulerEvent nodeAddedEvent = (NodeAddedSchedulerEvent)event;
    addNode(nodeAddedEvent.getAddedRMNode());
    recoverContainersOnNode(nodeAddedEvent.getContainerReports(),
      nodeAddedEvent.getAddedRMNode());

  }
  break;
  case NODE_REMOVED:
  {
    NodeRemovedSchedulerEvent nodeRemovedEvent = (NodeRemovedSchedulerEvent)event;
    removeNode(nodeRemovedEvent.getRemovedRMNode());
  }
  break;
  case NODE_RESOURCE_UPDATE:
  {
    NodeResourceUpdateSchedulerEvent nodeResourceUpdatedEvent = 
        (NodeResourceUpdateSchedulerEvent)event;
    updateNodeResource(nodeResourceUpdatedEvent.getRMNode(),
      nodeResourceUpdatedEvent.getResourceOption());
  }
  break;
  case NODE_UPDATE:
  {
    NodeUpdateSchedulerEvent nodeUpdatedEvent = 
    (NodeUpdateSchedulerEvent)event;
    nodeUpdate(nodeUpdatedEvent.getRMNode());
  }
  break;
  case APP_ADDED:
  {
    AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
    addApplication(appAddedEvent.getApplicationId(),
      appAddedEvent.getQueue(), appAddedEvent.getUser(),
      appAddedEvent.getIsAppRecovering());
  }
  break;
  case APP_REMOVED:
  {
    AppRemovedSchedulerEvent appRemovedEvent = (AppRemovedSchedulerEvent)event;
    doneApplication(appRemovedEvent.getApplicationID(),
      appRemovedEvent.getFinalState());
  }
  break;
  case APP_ATTEMPT_ADDED:
  {
    AppAttemptAddedSchedulerEvent appAttemptAddedEvent =
        (AppAttemptAddedSchedulerEvent) event;
    addApplicationAttempt(appAttemptAddedEvent.getApplicationAttemptId(),
      appAttemptAddedEvent.getTransferStateFromPreviousAttempt(),
      appAttemptAddedEvent.getIsAttemptRecovering());
  }
  break;
  case APP_ATTEMPT_REMOVED:
  {
    AppAttemptRemovedSchedulerEvent appAttemptRemovedEvent =
        (AppAttemptRemovedSchedulerEvent) event;
    try {
      doneApplicationAttempt(
        appAttemptRemovedEvent.getApplicationAttemptID(),
        appAttemptRemovedEvent.getFinalAttemptState(),
        appAttemptRemovedEvent.getKeepContainersAcrossAppAttempts());
    } catch(IOException ie) {
      LOG.error("Unable to remove application "
          + appAttemptRemovedEvent.getApplicationAttemptID(), ie);
    }
  }
  break;
  case CONTAINER_EXPIRED:
  {
    ContainerExpiredSchedulerEvent containerExpiredEvent = 
        (ContainerExpiredSchedulerEvent) event;
    ContainerId containerid = containerExpiredEvent.getContainerId();
    completedContainer(getRMContainer(containerid), 
        SchedulerUtils.createAbnormalContainerStatus(
            containerid, 
            SchedulerUtils.EXPIRED_CONTAINER),
        RMContainerEventType.EXPIRE);
  }
  break;
  default:
    LOG.error("Invalid eventtype " + event.getType() + ". Ignoring!");
  }
}
 
Example #29
Source File: RMNodeImpl.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void transition(RMNodeImpl rmNode, RMNodeEvent event) {
  RMNodeReconnectEvent reconnectEvent = (RMNodeReconnectEvent) event;
  RMNode newNode = reconnectEvent.getReconnectedNode();
  rmNode.nodeManagerVersion = newNode.getNodeManagerVersion();
  List<ApplicationId> runningApps = reconnectEvent.getRunningApplications();
  boolean noRunningApps = 
      (runningApps == null) || (runningApps.size() == 0);
  
  // No application running on the node, so send node-removal event with 
  // cleaning up old container info.
  if (noRunningApps) {
    rmNode.nodeUpdateQueue.clear();
    rmNode.context.getDispatcher().getEventHandler().handle(
        new NodeRemovedSchedulerEvent(rmNode));

    if (rmNode.getHttpPort() == newNode.getHttpPort()) {
      // Reset heartbeat ID since node just restarted.
      rmNode.getLastNodeHeartBeatResponse().setResponseId(0);
      if (rmNode.getState().equals(NodeState.RUNNING)) {
        // Only add new node if old state is RUNNING
        rmNode.context.getDispatcher().getEventHandler().handle(
            new NodeAddedSchedulerEvent(newNode));
      }
    } else {
      // Reconnected node differs, so replace old node and start new node
      switch (rmNode.getState()) {
        case RUNNING:
          ClusterMetrics.getMetrics().decrNumActiveNodes();
          break;
        case UNHEALTHY:
          ClusterMetrics.getMetrics().decrNumUnhealthyNMs();
          break;
        default:
          LOG.debug("Unexpected Rmnode state");
        }
        rmNode.context.getRMNodes().put(newNode.getNodeID(), newNode);
        rmNode.context.getDispatcher().getEventHandler().handle(
            new RMNodeStartedEvent(newNode.getNodeID(), null, null));
    }
  } else {
    rmNode.httpPort = newNode.getHttpPort();
    rmNode.httpAddress = newNode.getHttpAddress();
    boolean isCapabilityChanged = false;
    if (rmNode.getTotalCapability() != newNode.getTotalCapability()) {
      rmNode.totalCapability = newNode.getTotalCapability();
      isCapabilityChanged = true;
    }
  
    handleNMContainerStatus(reconnectEvent.getNMContainerStatuses(), rmNode);

    // Reset heartbeat ID since node just restarted.
    rmNode.getLastNodeHeartBeatResponse().setResponseId(0);

    for (ApplicationId appId : reconnectEvent.getRunningApplications()) {
      handleRunningAppOnNode(rmNode, rmNode.context, appId, rmNode.nodeId);
    }

    if (isCapabilityChanged
        && rmNode.getState().equals(NodeState.RUNNING)) {
      // Update scheduler node's capacity for reconnect node.
      rmNode.context
          .getDispatcher()
          .getEventHandler()
          .handle(
              new NodeResourceUpdateSchedulerEvent(rmNode, ResourceOption
                  .newInstance(newNode.getTotalCapability(), -1)));
    }
  }
}
 
Example #30
Source File: TestAbstractYarnScheduler.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void testMaximumAllocationVCoresHelper(
    AbstractYarnScheduler scheduler,
    final int node1MaxVCores, final int node2MaxVCores,
    final int node3MaxVCores, final int... expectedMaxVCores)
    throws Exception {
  Assert.assertEquals(6, expectedMaxVCores.length);

  Assert.assertEquals(0, scheduler.getNumClusterNodes());
  int maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[0], maxVCores);

  RMNode node1 = MockNodes.newNodeInfo(
      0, Resources.createResource(1024, node1MaxVCores, node1MaxVCores), 1, "127.0.0.2");
  scheduler.handle(new NodeAddedSchedulerEvent(node1));
  Assert.assertEquals(1, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[1], maxVCores);

  scheduler.handle(new NodeRemovedSchedulerEvent(node1));
  Assert.assertEquals(0, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[2], maxVCores);

  RMNode node2 = MockNodes.newNodeInfo(
      0, Resources.createResource(1024, node2MaxVCores, node2MaxVCores), 2, "127.0.0.3");
  scheduler.handle(new NodeAddedSchedulerEvent(node2));
  Assert.assertEquals(1, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[3], maxVCores);

  RMNode node3 = MockNodes.newNodeInfo(
      0, Resources.createResource(1024, node3MaxVCores, node3MaxVCores), 3, "127.0.0.4");
  scheduler.handle(new NodeAddedSchedulerEvent(node3));
  Assert.assertEquals(2, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[4], maxVCores);

  scheduler.handle(new NodeRemovedSchedulerEvent(node3));
  Assert.assertEquals(1, scheduler.getNumClusterNodes());
  maxVCores = scheduler.getMaximumResourceCapability().getVirtualCores();
  Assert.assertEquals(expectedMaxVCores[5], maxVCores);

  scheduler.handle(new NodeRemovedSchedulerEvent(node2));
  Assert.assertEquals(0, scheduler.getNumClusterNodes());
}