Java Code Examples for org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse#getContainersToCleanup()

The following examples show how to use org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse#getContainersToCleanup() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestApplicationCleanup.java    From hadoop with Apache License 2.0 6 votes vote down vote up
protected void waitForContainerCleanup(DrainDispatcher dispatcher, MockNM nm,
    NodeHeartbeatResponse resp) throws Exception {
  int waitCount = 0, cleanedConts = 0;
  List<ContainerId> contsToClean;
  do {
    dispatcher.await();
    contsToClean = resp.getContainersToCleanup();
    cleanedConts += contsToClean.size();
    if (cleanedConts >= 1) {
      break;
    }
    Thread.sleep(100);
    resp = nm.nodeHeartbeat(true);
  } while(waitCount++ < 200);

  if (contsToClean.isEmpty()) {
    LOG.error("Failed to get any containers to cleanup");
  } else {
    LOG.info("Got cleanup for " + contsToClean.get(0));
  }
  Assert.assertEquals(1, cleanedConts);
}
 
Example 2
Source File: TestApplicationCleanup.java    From big-c with Apache License 2.0 6 votes vote down vote up
protected void waitForContainerCleanup(DrainDispatcher dispatcher, MockNM nm,
    NodeHeartbeatResponse resp) throws Exception {
  int waitCount = 0, cleanedConts = 0;
  List<ContainerId> contsToClean;
  do {
    dispatcher.await();
    contsToClean = resp.getContainersToCleanup();
    cleanedConts += contsToClean.size();
    if (cleanedConts >= 1) {
      break;
    }
    Thread.sleep(100);
    resp = nm.nodeHeartbeat(true);
  } while(waitCount++ < 200);

  if (contsToClean.isEmpty()) {
    LOG.error("Failed to get any containers to cleanup");
  } else {
    LOG.info("Got cleanup for " + contsToClean.get(0));
  }
  Assert.assertEquals(1, cleanedConts);
}
 
Example 3
Source File: TestRMApplicationHistoryWriter.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void testRMWritingMassiveHistory(MockRM rm) throws Exception {
  rm.start();
  MockNM nm = rm.registerNode("127.0.0.1:1234", 1024 * 10100);

  RMApp app = rm.submitApp(1024);
  nm.nodeHeartbeat(true);
  RMAppAttempt attempt = app.getCurrentAppAttempt();
  MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
  am.registerAppAttempt();

  int request = 10000;
  am.allocate("127.0.0.1", 1024, request, new ArrayList<ContainerId>());
  nm.nodeHeartbeat(true);
  List<Container> allocated =
      am.allocate(new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>()).getAllocatedContainers();
  int waitCount = 0;
  int allocatedSize = allocated.size();
  while (allocatedSize < request && waitCount++ < 200) {
    Thread.sleep(300);
    allocated =
        am.allocate(new ArrayList<ResourceRequest>(),
          new ArrayList<ContainerId>()).getAllocatedContainers();
    allocatedSize += allocated.size();
    nm.nodeHeartbeat(true);
  }
  Assert.assertEquals(request, allocatedSize);

  am.unregisterAppAttempt();
  am.waitForState(RMAppAttemptState.FINISHING);
  nm.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
  am.waitForState(RMAppAttemptState.FINISHED);

  NodeHeartbeatResponse resp = nm.nodeHeartbeat(true);
  List<ContainerId> cleaned = resp.getContainersToCleanup();
  int cleanedSize = cleaned.size();
  waitCount = 0;
  while (cleanedSize < allocatedSize && waitCount++ < 200) {
    Thread.sleep(300);
    resp = nm.nodeHeartbeat(true);
    cleaned = resp.getContainersToCleanup();
    cleanedSize += cleaned.size();
  }
  Assert.assertEquals(allocatedSize, cleanedSize);
  rm.waitForState(app.getApplicationId(), RMAppState.FINISHED);

  rm.stop();
}
 
Example 4
Source File: TestApplicationCleanup.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("resource")
@Test
public void testAppCleanup() throws Exception {
  Logger rootLogger = LogManager.getRootLogger();
  rootLogger.setLevel(Level.DEBUG);
  MockRM rm = new MockRM();
  rm.start();

  MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5000);

  RMApp app = rm.submitApp(2000);

  //kick the scheduling
  nm1.nodeHeartbeat(true);

  RMAppAttempt attempt = app.getCurrentAppAttempt();
  MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
  am.registerAppAttempt();
  
  //request for containers
  int request = 2;
  am.allocate("127.0.0.1" , 1000, request, 
      new ArrayList<ContainerId>());
  
  //kick the scheduler
  nm1.nodeHeartbeat(true);
  List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(),
      new ArrayList<ContainerId>()).getAllocatedContainers();
  int contReceived = conts.size();
  int waitCount = 0;
  while (contReceived < request && waitCount++ < 200) {
    LOG.info("Got " + contReceived + " containers. Waiting to get "
             + request);
    Thread.sleep(100);
    conts = am.allocate(new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>()).getAllocatedContainers();
    contReceived += conts.size();
    nm1.nodeHeartbeat(true);
  }
  Assert.assertEquals(request, contReceived);
  
  am.unregisterAppAttempt();
  NodeHeartbeatResponse resp = nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1,
      ContainerState.COMPLETE);
  am.waitForState(RMAppAttemptState.FINISHED);

  //currently only containers are cleaned via this
  //AM container is cleaned via container launcher
  resp = nm1.nodeHeartbeat(true);
  List<ContainerId> containersToCleanup = resp.getContainersToCleanup();
  List<ApplicationId> appsToCleanup = resp.getApplicationsToCleanup();
  int numCleanedContainers = containersToCleanup.size();
  int numCleanedApps = appsToCleanup.size();
  waitCount = 0;
  while ((numCleanedContainers < 2 || numCleanedApps < 1)
      && waitCount++ < 200) {
    LOG.info("Waiting to get cleanup events.. cleanedConts: "
        + numCleanedContainers + " cleanedApps: " + numCleanedApps);
    Thread.sleep(100);
    resp = nm1.nodeHeartbeat(true);
    List<ContainerId> deltaContainersToCleanup =
        resp.getContainersToCleanup();
    List<ApplicationId> deltaAppsToCleanup = resp.getApplicationsToCleanup();
    // Add the deltas to the global list
    containersToCleanup.addAll(deltaContainersToCleanup);
    appsToCleanup.addAll(deltaAppsToCleanup);
    // Update counts now
    numCleanedContainers = containersToCleanup.size();
    numCleanedApps = appsToCleanup.size();
  }
  
  Assert.assertEquals(1, appsToCleanup.size());
  Assert.assertEquals(app.getApplicationId(), appsToCleanup.get(0));
  Assert.assertEquals(1, numCleanedApps);
  Assert.assertEquals(2, numCleanedContainers);

  rm.stop();
}
 
Example 5
Source File: NMSimulator.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void middleStep() throws Exception {
  // we check the lifetime for each running containers
  ContainerSimulator cs = null;
  synchronized(completedContainerList) {
    while ((cs = containerQueue.poll()) != null) {
      runningContainers.remove(cs.getId());
      completedContainerList.add(cs.getId());
      LOG.debug(MessageFormat.format("Container {0} has completed",
              cs.getId()));
    }
  }
  
  // send heart beat
  NodeHeartbeatRequest beatRequest =
          Records.newRecord(NodeHeartbeatRequest.class);
  beatRequest.setLastKnownNMTokenMasterKey(masterKey);
  NodeStatus ns = Records.newRecord(NodeStatus.class);
  
  ns.setContainersStatuses(generateContainerStatusList());
  ns.setNodeId(node.getNodeID());
  ns.setKeepAliveApplications(new ArrayList<ApplicationId>());
  ns.setResponseId(RESPONSE_ID ++);
  ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
  beatRequest.setNodeStatus(ns);
  NodeHeartbeatResponse beatResponse =
      rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
  if (! beatResponse.getContainersToCleanup().isEmpty()) {
    // remove from queue
    synchronized(releasedContainerList) {
      for (ContainerId containerId : beatResponse.getContainersToCleanup()){
        if (amContainerList.contains(containerId)) {
          // AM container (not killed?, only release)
          synchronized(amContainerList) {
            amContainerList.remove(containerId);
          }
          LOG.debug(MessageFormat.format("NodeManager {0} releases " +
              "an AM ({1}).", node.getNodeID(), containerId));
        } else {
          cs = runningContainers.remove(containerId);
          containerQueue.remove(cs);
          releasedContainerList.add(containerId);
          LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
              "container ({1}).", node.getNodeID(), containerId));
        }
      }
    }
  }
  if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
    lastStep();
  }
}
 
Example 6
Source File: TestRMApplicationHistoryWriter.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void testRMWritingMassiveHistory(MockRM rm) throws Exception {
  rm.start();
  MockNM nm = rm.registerNode("127.0.0.1:1234", 1024 * 10100);

  RMApp app = rm.submitApp(1024);
  nm.nodeHeartbeat(true);
  RMAppAttempt attempt = app.getCurrentAppAttempt();
  MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
  am.registerAppAttempt();

  int request = 10000;
  am.allocate("127.0.0.1", 1024, request, new ArrayList<ContainerId>());
  nm.nodeHeartbeat(true);
  List<Container> allocated =
      am.allocate(new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>()).getAllocatedContainers();
  int waitCount = 0;
  int allocatedSize = allocated.size();
  while (allocatedSize < request && waitCount++ < 200) {
    Thread.sleep(300);
    allocated =
        am.allocate(new ArrayList<ResourceRequest>(),
          new ArrayList<ContainerId>()).getAllocatedContainers();
    allocatedSize += allocated.size();
    nm.nodeHeartbeat(true);
  }
  Assert.assertEquals(request, allocatedSize);

  am.unregisterAppAttempt();
  am.waitForState(RMAppAttemptState.FINISHING);
  nm.nodeHeartbeat(am.getApplicationAttemptId(), 1, ContainerState.COMPLETE);
  am.waitForState(RMAppAttemptState.FINISHED);

  NodeHeartbeatResponse resp = nm.nodeHeartbeat(true);
  List<ContainerId> cleaned = resp.getContainersToCleanup();
  int cleanedSize = cleaned.size();
  waitCount = 0;
  while (cleanedSize < allocatedSize && waitCount++ < 200) {
    Thread.sleep(300);
    resp = nm.nodeHeartbeat(true);
    cleaned = resp.getContainersToCleanup();
    cleanedSize += cleaned.size();
  }
  Assert.assertEquals(allocatedSize, cleanedSize);
  rm.waitForState(app.getApplicationId(), RMAppState.FINISHED);

  rm.stop();
}
 
Example 7
Source File: TestApplicationCleanup.java    From big-c with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("resource")
@Test
public void testAppCleanup() throws Exception {
  Logger rootLogger = LogManager.getRootLogger();
  rootLogger.setLevel(Level.DEBUG);
  MockRM rm = new MockRM();
  rm.start();

  MockNM nm1 = rm.registerNode("127.0.0.1:1234", 5000);

  RMApp app = rm.submitApp(2000);

  //kick the scheduling
  nm1.nodeHeartbeat(true);

  RMAppAttempt attempt = app.getCurrentAppAttempt();
  MockAM am = rm.sendAMLaunched(attempt.getAppAttemptId());
  am.registerAppAttempt();
  
  //request for containers
  int request = 2;
  am.allocate("127.0.0.1" , 1000, request, 
      new ArrayList<ContainerId>());
  
  //kick the scheduler
  nm1.nodeHeartbeat(true);
  List<Container> conts = am.allocate(new ArrayList<ResourceRequest>(),
      new ArrayList<ContainerId>()).getAllocatedContainers();
  int contReceived = conts.size();
  int waitCount = 0;
  while (contReceived < request && waitCount++ < 200) {
    LOG.info("Got " + contReceived + " containers. Waiting to get "
             + request);
    Thread.sleep(100);
    conts = am.allocate(new ArrayList<ResourceRequest>(),
        new ArrayList<ContainerId>()).getAllocatedContainers();
    contReceived += conts.size();
    nm1.nodeHeartbeat(true);
  }
  Assert.assertEquals(request, contReceived);
  
  am.unregisterAppAttempt();
  NodeHeartbeatResponse resp = nm1.nodeHeartbeat(attempt.getAppAttemptId(), 1,
      ContainerState.COMPLETE);
  am.waitForState(RMAppAttemptState.FINISHED);

  //currently only containers are cleaned via this
  //AM container is cleaned via container launcher
  resp = nm1.nodeHeartbeat(true);
  List<ContainerId> containersToCleanup = resp.getContainersToCleanup();
  List<ApplicationId> appsToCleanup = resp.getApplicationsToCleanup();
  int numCleanedContainers = containersToCleanup.size();
  int numCleanedApps = appsToCleanup.size();
  waitCount = 0;
  while ((numCleanedContainers < 2 || numCleanedApps < 1)
      && waitCount++ < 200) {
    LOG.info("Waiting to get cleanup events.. cleanedConts: "
        + numCleanedContainers + " cleanedApps: " + numCleanedApps);
    Thread.sleep(100);
    resp = nm1.nodeHeartbeat(true);
    List<ContainerId> deltaContainersToCleanup =
        resp.getContainersToCleanup();
    List<ApplicationId> deltaAppsToCleanup = resp.getApplicationsToCleanup();
    // Add the deltas to the global list
    containersToCleanup.addAll(deltaContainersToCleanup);
    appsToCleanup.addAll(deltaAppsToCleanup);
    // Update counts now
    numCleanedContainers = containersToCleanup.size();
    numCleanedApps = appsToCleanup.size();
  }
  
  Assert.assertEquals(1, appsToCleanup.size());
  Assert.assertEquals(app.getApplicationId(), appsToCleanup.get(0));
  Assert.assertEquals(1, numCleanedApps);
  Assert.assertEquals(2, numCleanedContainers);

  rm.stop();
}
 
Example 8
Source File: NMSimulator.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
public void middleStep() throws Exception {
  // we check the lifetime for each running containers
  ContainerSimulator cs = null;
  synchronized(completedContainerList) {
    while ((cs = containerQueue.poll()) != null) {
      runningContainers.remove(cs.getId());
      completedContainerList.add(cs.getId());
      LOG.debug(MessageFormat.format("Container {0} has completed",
              cs.getId()));
    }
  }
  
  // send heart beat
  NodeHeartbeatRequest beatRequest =
          Records.newRecord(NodeHeartbeatRequest.class);
  beatRequest.setLastKnownNMTokenMasterKey(masterKey);
  NodeStatus ns = Records.newRecord(NodeStatus.class);
  
  ns.setContainersStatuses(generateContainerStatusList());
  ns.setNodeId(node.getNodeID());
  ns.setKeepAliveApplications(new ArrayList<ApplicationId>());
  ns.setResponseId(RESPONSE_ID ++);
  ns.setNodeHealthStatus(NodeHealthStatus.newInstance(true, "", 0));
  beatRequest.setNodeStatus(ns);
  NodeHeartbeatResponse beatResponse =
      rm.getResourceTrackerService().nodeHeartbeat(beatRequest);
  if (! beatResponse.getContainersToCleanup().isEmpty()) {
    // remove from queue
    synchronized(releasedContainerList) {
      for (ContainerId containerId : beatResponse.getContainersToCleanup()){
        if (amContainerList.contains(containerId)) {
          // AM container (not killed?, only release)
          synchronized(amContainerList) {
            amContainerList.remove(containerId);
          }
          LOG.debug(MessageFormat.format("NodeManager {0} releases " +
              "an AM ({1}).", node.getNodeID(), containerId));
        } else {
          cs = runningContainers.remove(containerId);
          containerQueue.remove(cs);
          releasedContainerList.add(containerId);
          LOG.debug(MessageFormat.format("NodeManager {0} releases a " +
              "container ({1}).", node.getNodeID(), containerId));
        }
      }
    }
  }
  if (beatResponse.getNodeAction() == NodeAction.SHUTDOWN) {
    lastStep();
  }
}