Java Code Examples for org.apache.hadoop.yarn.api.records.NodeState#UNHEALTHY
The following examples show how to use
org.apache.hadoop.yarn.api.records.NodeState#UNHEALTHY .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RMNodeImpl.java From hadoop with Apache License 2.0 | 6 votes |
@Override public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) { RMNodeStatusEvent statusEvent = (RMNodeStatusEvent) event; // Switch the last heartbeatresponse. rmNode.latestNodeHeartBeatResponse = statusEvent.getLatestResponse(); NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus(); rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport()); rmNode.setLastHealthReportTime( remoteNodeHealthStatus.getLastHealthReportTime()); if (remoteNodeHealthStatus.getIsNodeHealthy()) { rmNode.context.getDispatcher().getEventHandler().handle( new NodeAddedSchedulerEvent(rmNode)); rmNode.context.getDispatcher().getEventHandler().handle( new NodesListManagerEvent( NodesListManagerEventType.NODE_USABLE, rmNode)); // ??? how about updating metrics before notifying to ensure that // notifiers get update metadata because they will very likely query it // upon notification // Update metrics rmNode.updateMetricsForRejoinedNode(NodeState.UNHEALTHY); return NodeState.RUNNING; } return NodeState.UNHEALTHY; }
Example 2
Source File: MockNodes.java From hadoop with Apache License 2.0 | 6 votes |
private static RMNode buildRMNode(int rack, final Resource perNode, NodeState state, String httpAddr, int hostnum, String hostName, int port, Set<String> labels) { final String rackName = "rack"+ rack; final int nid = hostnum; final String nodeAddr = hostName + ":" + nid; if (hostName == null) { hostName = "host"+ nid; } final NodeId nodeID = NodeId.newInstance(hostName, port); final String httpAddress = httpAddr; String healthReport = (state == NodeState.UNHEALTHY) ? null : "HealthyMe"; return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress, perNode, rackName, healthReport, 0, nid, hostName, state, labels); }
Example 3
Source File: TestResourceTrackerService.java From hadoop with Apache License 2.0 | 6 votes |
private void checkUnealthyNMCount(MockRM rm, MockNM nm1, boolean health, int count) throws Exception { int waitCount = 0; while((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health && waitCount++ < 20) { synchronized (this) { wait(100); } } Assert.assertFalse((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health); Assert.assertEquals("Unhealthy metrics not incremented", count, ClusterMetrics.getMetrics().getUnhealthyNMs()); }
Example 4
Source File: RMNodeImpl.java From big-c with Apache License 2.0 | 6 votes |
@Override public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) { RMNodeStatusEvent statusEvent = (RMNodeStatusEvent) event; // Switch the last heartbeatresponse. rmNode.latestNodeHeartBeatResponse = statusEvent.getLatestResponse(); NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus(); rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport()); rmNode.setLastHealthReportTime( remoteNodeHealthStatus.getLastHealthReportTime()); if (remoteNodeHealthStatus.getIsNodeHealthy()) { rmNode.context.getDispatcher().getEventHandler().handle( new NodeAddedSchedulerEvent(rmNode)); rmNode.context.getDispatcher().getEventHandler().handle( new NodesListManagerEvent( NodesListManagerEventType.NODE_USABLE, rmNode)); // ??? how about updating metrics before notifying to ensure that // notifiers get update metadata because they will very likely query it // upon notification // Update metrics rmNode.updateMetricsForRejoinedNode(NodeState.UNHEALTHY); return NodeState.RUNNING; } return NodeState.UNHEALTHY; }
Example 5
Source File: MockNodes.java From big-c with Apache License 2.0 | 6 votes |
private static RMNode buildRMNode(int rack, final Resource perNode, NodeState state, String httpAddr, int hostnum, String hostName, int port, Set<String> labels) { final String rackName = "rack"+ rack; final int nid = hostnum; final String nodeAddr = hostName + ":" + nid; if (hostName == null) { hostName = "host"+ nid; } final NodeId nodeID = NodeId.newInstance(hostName, port); final String httpAddress = httpAddr; String healthReport = (state == NodeState.UNHEALTHY) ? null : "HealthyMe"; return new MockRMNodeImpl(nodeID, nodeAddr, httpAddress, perNode, rackName, healthReport, 0, nid, hostName, state, labels); }
Example 6
Source File: TestResourceTrackerService.java From big-c with Apache License 2.0 | 6 votes |
private void checkUnealthyNMCount(MockRM rm, MockNM nm1, boolean health, int count) throws Exception { int waitCount = 0; while((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health && waitCount++ < 20) { synchronized (this) { wait(100); } } Assert.assertFalse((rm.getRMContext().getRMNodes().get(nm1.getNodeId()) .getState() != NodeState.UNHEALTHY) == health); Assert.assertEquals("Unhealthy metrics not incremented", count, ClusterMetrics.getMetrics().getUnhealthyNMs()); }
Example 7
Source File: NodesPage.java From hadoop with Apache License 2.0 | 4 votes |
@Override protected void render(Block html) { html._(MetricsOverviewTable.class); ResourceScheduler sched = rm.getResourceScheduler(); String type = $(NODE_STATE); String labelFilter = $(NODE_LABEL, CommonNodeLabelsManager.ANY).trim(); TBODY<TABLE<Hamlet>> tbody = html.table("#nodes").thead().tr() .th(".nodelabels", "Node Labels") .th(".rack", "Rack") .th(".state", "Node State") .th(".nodeaddress", "Node Address") .th(".nodehttpaddress", "Node HTTP Address") .th(".lastHealthUpdate", "Last health-update") .th(".healthReport", "Health-report") .th(".containers", "Containers") .th(".mem", "Mem Used") .th(".mem", "Mem Avail") .th(".vcores", "VCores Used") .th(".vcores", "VCores Avail") .th(".gcores", "GCores Used") .th(".gcores", "GCores Avail") .th(".nodeManagerVersion", "Version")._()._().tbody(); NodeState stateFilter = null; if (type != null && !type.isEmpty()) { stateFilter = NodeState.valueOf(StringUtils.toUpperCase(type)); } Collection<RMNode> rmNodes = this.rm.getRMContext().getRMNodes().values(); boolean isInactive = false; if (stateFilter != null) { switch (stateFilter) { case DECOMMISSIONED: case LOST: case REBOOTED: rmNodes = this.rm.getRMContext().getInactiveRMNodes().values(); isInactive = true; break; default: LOG.debug("Unexpected state filter for inactive RM node"); } } for (RMNode ni : rmNodes) { if (stateFilter != null) { NodeState state = ni.getState(); if (!stateFilter.equals(state)) { continue; } } else { // No filter. User is asking for all nodes. Make sure you skip the // unhealthy nodes. if (ni.getState() == NodeState.UNHEALTHY) { continue; } } // Besides state, we need to filter label as well. if (!labelFilter.equals(RMNodeLabelsManager.ANY)) { if (labelFilter.isEmpty()) { // Empty label filter means only shows nodes without label if (!ni.getNodeLabels().isEmpty()) { continue; } } else if (!ni.getNodeLabels().contains(labelFilter)) { // Only nodes have given label can show on web page. continue; } } NodeInfo info = new NodeInfo(ni, sched); int usedMemory = (int) info.getUsedMemory(); int availableMemory = (int) info.getAvailableMemory(); TR<TBODY<TABLE<Hamlet>>> row = tbody.tr().td(StringUtils.join(",", info.getNodeLabels())) .td(info.getRack()).td(info.getState()).td(info.getNodeId()); if (isInactive) { row.td()._("N/A")._(); } else { String httpAddress = info.getNodeHTTPAddress(); row.td().a("//" + httpAddress, httpAddress)._(); } row.td().br().$title(String.valueOf(info.getLastHealthUpdate()))._() ._(Times.format(info.getLastHealthUpdate()))._() .td(info.getHealthReport()) .td(String.valueOf(info.getNumContainers())).td().br() .$title(String.valueOf(usedMemory))._() ._(StringUtils.byteDesc(usedMemory * BYTES_IN_MB))._().td().br() .$title(String.valueOf(availableMemory))._() ._(StringUtils.byteDesc(availableMemory * BYTES_IN_MB))._() .td(String.valueOf(info.getUsedVirtualCores())) .td(String.valueOf(info.getAvailableVirtualCores())) .td(String.valueOf(info.getUsedGpuCores())) .td(String.valueOf(info.getAvailableGpuCores())) .td(ni.getNodeManagerVersion())._(); } tbody._()._(); }
Example 8
Source File: RMNodeImpl.java From hadoop with Apache License 2.0 | 4 votes |
@Override public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) { RMNodeStatusEvent statusEvent = (RMNodeStatusEvent) event; // Switch the last heartbeatresponse. rmNode.latestNodeHeartBeatResponse = statusEvent.getLatestResponse(); NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus(); rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport()); rmNode.setLastHealthReportTime( remoteNodeHealthStatus.getLastHealthReportTime()); if (!remoteNodeHealthStatus.getIsNodeHealthy()) { LOG.info("Node " + rmNode.nodeId + " reported UNHEALTHY with details: " + remoteNodeHealthStatus.getHealthReport()); rmNode.nodeUpdateQueue.clear(); // Inform the scheduler rmNode.context.getDispatcher().getEventHandler().handle( new NodeRemovedSchedulerEvent(rmNode)); rmNode.context.getDispatcher().getEventHandler().handle( new NodesListManagerEvent( NodesListManagerEventType.NODE_UNUSABLE, rmNode)); // Update metrics rmNode.updateMetricsForDeactivatedNode(rmNode.getState(), NodeState.UNHEALTHY); return NodeState.UNHEALTHY; } rmNode.handleContainerStatus(statusEvent.getContainers()); if(rmNode.nextHeartBeat) { rmNode.nextHeartBeat = false; rmNode.context.getDispatcher().getEventHandler().handle( new NodeUpdateSchedulerEvent(rmNode)); } // Update DTRenewer in secure mode to keep these apps alive. Today this is // needed for log-aggregation to finish long after the apps are gone. if (UserGroupInformation.isSecurityEnabled()) { rmNode.context.getDelegationTokenRenewer().updateKeepAliveApplications( statusEvent.getKeepAliveAppIds()); } return NodeState.RUNNING; }
Example 9
Source File: NodesPage.java From big-c with Apache License 2.0 | 4 votes |
@Override protected void render(Block html) { html._(MetricsOverviewTable.class); ResourceScheduler sched = rm.getResourceScheduler(); String type = $(NODE_STATE); String labelFilter = $(NODE_LABEL, CommonNodeLabelsManager.ANY).trim(); TBODY<TABLE<Hamlet>> tbody = html.table("#nodes").thead().tr() .th(".nodelabels", "Node Labels") .th(".rack", "Rack") .th(".state", "Node State") .th(".nodeaddress", "Node Address") .th(".nodehttpaddress", "Node HTTP Address") .th(".lastHealthUpdate", "Last health-update") .th(".healthReport", "Health-report") .th(".containers", "Containers") .th(".mem", "Mem Used") .th(".mem", "Mem Avail") .th(".vcores", "VCores Used") .th(".vcores", "VCores Avail") .th(".nodeManagerVersion", "Version")._()._().tbody(); NodeState stateFilter = null; if (type != null && !type.isEmpty()) { stateFilter = NodeState.valueOf(StringUtils.toUpperCase(type)); } Collection<RMNode> rmNodes = this.rm.getRMContext().getRMNodes().values(); boolean isInactive = false; if (stateFilter != null) { switch (stateFilter) { case DECOMMISSIONED: case LOST: case REBOOTED: rmNodes = this.rm.getRMContext().getInactiveRMNodes().values(); isInactive = true; break; default: LOG.debug("Unexpected state filter for inactive RM node"); } } for (RMNode ni : rmNodes) { if (stateFilter != null) { NodeState state = ni.getState(); if (!stateFilter.equals(state)) { continue; } } else { // No filter. User is asking for all nodes. Make sure you skip the // unhealthy nodes. if (ni.getState() == NodeState.UNHEALTHY) { continue; } } // Besides state, we need to filter label as well. if (!labelFilter.equals(RMNodeLabelsManager.ANY)) { if (labelFilter.isEmpty()) { // Empty label filter means only shows nodes without label if (!ni.getNodeLabels().isEmpty()) { continue; } } else if (!ni.getNodeLabels().contains(labelFilter)) { // Only nodes have given label can show on web page. continue; } } NodeInfo info = new NodeInfo(ni, sched); int usedMemory = (int) info.getUsedMemory(); int availableMemory = (int) info.getAvailableMemory(); TR<TBODY<TABLE<Hamlet>>> row = tbody.tr().td(StringUtils.join(",", info.getNodeLabels())) .td(info.getRack()).td(info.getState()).td(info.getNodeId()); if (isInactive) { row.td()._("N/A")._(); } else { String httpAddress = info.getNodeHTTPAddress(); row.td().a("//" + httpAddress, httpAddress)._(); } row.td().br().$title(String.valueOf(info.getLastHealthUpdate()))._() ._(Times.format(info.getLastHealthUpdate()))._() .td(info.getHealthReport()) .td(String.valueOf(info.getNumContainers())).td().br() .$title(String.valueOf(usedMemory))._() ._(StringUtils.byteDesc(usedMemory * BYTES_IN_MB))._().td().br() .$title(String.valueOf(availableMemory))._() ._(StringUtils.byteDesc(availableMemory * BYTES_IN_MB))._() .td(String.valueOf(info.getUsedVirtualCores())) .td(String.valueOf(info.getAvailableVirtualCores())) .td(ni.getNodeManagerVersion())._(); } tbody._()._(); }
Example 10
Source File: RMNodeImpl.java From big-c with Apache License 2.0 | 4 votes |
@Override public NodeState transition(RMNodeImpl rmNode, RMNodeEvent event) { RMNodeStatusEvent statusEvent = (RMNodeStatusEvent) event; // Switch the last heartbeatresponse. rmNode.latestNodeHeartBeatResponse = statusEvent.getLatestResponse(); NodeHealthStatus remoteNodeHealthStatus = statusEvent.getNodeHealthStatus(); rmNode.setHealthReport(remoteNodeHealthStatus.getHealthReport()); rmNode.setLastHealthReportTime( remoteNodeHealthStatus.getLastHealthReportTime()); if (!remoteNodeHealthStatus.getIsNodeHealthy()) { LOG.info("Node " + rmNode.nodeId + " reported UNHEALTHY with details: " + remoteNodeHealthStatus.getHealthReport()); rmNode.nodeUpdateQueue.clear(); // Inform the scheduler rmNode.context.getDispatcher().getEventHandler().handle( new NodeRemovedSchedulerEvent(rmNode)); rmNode.context.getDispatcher().getEventHandler().handle( new NodesListManagerEvent( NodesListManagerEventType.NODE_UNUSABLE, rmNode)); // Update metrics rmNode.updateMetricsForDeactivatedNode(rmNode.getState(), NodeState.UNHEALTHY); return NodeState.UNHEALTHY; } rmNode.handleContainerStatus(statusEvent.getContainers()); if(rmNode.nextHeartBeat) { rmNode.nextHeartBeat = false; rmNode.context.getDispatcher().getEventHandler().handle( new NodeUpdateSchedulerEvent(rmNode)); } // Update DTRenewer in secure mode to keep these apps alive. Today this is // needed for log-aggregation to finish long after the apps are gone. if (UserGroupInformation.isSecurityEnabled()) { rmNode.context.getDelegationTokenRenewer().updateKeepAliveApplications( statusEvent.getKeepAliveAppIds()); } return NodeState.RUNNING; }