Java Code Examples for org.apache.hadoop.hdfs.protocol.DatanodeID#getXferAddr()

The following examples show how to use org.apache.hadoop.hdfs.protocol.DatanodeID#getXferAddr() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDecommissioningStatus.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Verify the support for decommissioning a datanode that is already dead.
 * Under this scenario the datanode should immediately be marked as
 * DECOMMISSIONED
 */
@Test(timeout=120000)
public void testDecommissionDeadDN() throws Exception {
  Logger log = Logger.getLogger(DecommissionManager.class);
  log.setLevel(Level.DEBUG);
  DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId();
  String dnName = dnID.getXferAddr();
  DataNodeProperties stoppedDN = cluster.stopDataNode(0);
  DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(),
      false, 30000);
  FSNamesystem fsn = cluster.getNamesystem();
  final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
  DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID);
  decommissionNode(fsn, localFileSys, dnName);
  dm.refreshNodes(conf);
  BlockManagerTestUtil.recheckDecommissionState(dm);
  assertTrue(dnDescriptor.isDecommissioned());

  // Add the node back
  cluster.restartDataNode(stoppedDN, true);
  cluster.waitActive();

  // Call refreshNodes on FSNamesystem with empty exclude file to remove the
  // datanode from decommissioning list and make it available again.
  writeConfigFile(localFileSys, excludeFile, null);
  dm.refreshNodes(conf);
}
 
Example 2
Source File: TestDecommissioningStatus.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Verify the support for decommissioning a datanode that is already dead.
 * Under this scenario the datanode should immediately be marked as
 * DECOMMISSIONED
 */
@Test(timeout=120000)
public void testDecommissionDeadDN() throws Exception {
  Logger log = Logger.getLogger(DecommissionManager.class);
  log.setLevel(Level.DEBUG);
  DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId();
  String dnName = dnID.getXferAddr();
  DataNodeProperties stoppedDN = cluster.stopDataNode(0);
  DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(),
      false, 30000);
  FSNamesystem fsn = cluster.getNamesystem();
  final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
  DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID);
  decommissionNode(fsn, localFileSys, dnName);
  dm.refreshNodes(conf);
  BlockManagerTestUtil.recheckDecommissionState(dm);
  assertTrue(dnDescriptor.isDecommissioned());

  // Add the node back
  cluster.restartDataNode(stoppedDN, true);
  cluster.waitActive();

  // Call refreshNodes on FSNamesystem with empty exclude file to remove the
  // datanode from decommissioning list and make it available again.
  writeConfigFile(localFileSys, excludeFile, null);
  dm.refreshNodes(conf);
}
 
Example 3
Source File: TestOverReplicatedBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
@Test
public void testProcesOverReplicateBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
      Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    assertTrue(cluster.corruptReplica(0, block));
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(
        cluster.getInstanceStorageDir(0, 0),
        cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(),
        "scanner.cursor");
    //wait for one minute for deletion to succeed;
    for(int i = 0; !scanCursor.delete(); i++) {
      assertTrue("Could not delete " + scanCursor.getAbsolutePath() +
          " in one minute", i < 60);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {}
    }
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    final DatanodeID corruptDataNode = 
      DataNodeTestUtils.getDNRegistrationForBP(
          cluster.getDataNodes().get(2), blockPoolId);
       
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    try {
      namesystem.writeLock();
      synchronized(hm) {
        // set live datanode's remaining space to be 0 
        // so they will be chosen to be deleted when over-replication occurs
        String corruptMachineName = corruptDataNode.getXferAddr();
        for (DatanodeDescriptor datanode : hm.getDatanodes()) {
          if (!corruptMachineName.equals(datanode.getXferAddr())) {
            datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
            datanode.updateHeartbeat(
                BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
                0L, 0L, 0, 0, null);
          }
        }

        // decrease the replication factor to 1; 
        NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1);

        // corrupt one won't be chosen to be excess one
        // without 4910 the number of live replicas would be 0: block gets lost
        assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas());
      }
    } finally {
      namesystem.writeUnlock();
    }
    
  } finally {
    cluster.shutdown();
  }
}
 
Example 4
Source File: TestOverReplicatedBlocks.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
@Test
public void testProcesOverReplicateBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
      Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    assertTrue(cluster.corruptReplica(0, block));
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(
        cluster.getInstanceStorageDir(0, 0),
        cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(),
        "scanner.cursor");
    //wait for one minute for deletion to succeed;
    for(int i = 0; !scanCursor.delete(); i++) {
      assertTrue("Could not delete " + scanCursor.getAbsolutePath() +
          " in one minute", i < 60);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {}
    }
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    final DatanodeID corruptDataNode = 
      DataNodeTestUtils.getDNRegistrationForBP(
          cluster.getDataNodes().get(2), blockPoolId);
       
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    try {
      namesystem.writeLock();
      synchronized(hm) {
        // set live datanode's remaining space to be 0 
        // so they will be chosen to be deleted when over-replication occurs
        String corruptMachineName = corruptDataNode.getXferAddr();
        for (DatanodeDescriptor datanode : hm.getDatanodes()) {
          if (!corruptMachineName.equals(datanode.getXferAddr())) {
            datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
            datanode.updateHeartbeat(
                BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
                0L, 0L, 0, 0, null);
          }
        }

        // decrease the replication factor to 1; 
        NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1);

        // corrupt one won't be chosen to be excess one
        // without 4910 the number of live replicas would be 0: block gets lost
        assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas());
      }
    } finally {
      namesystem.writeUnlock();
    }
    
  } finally {
    cluster.shutdown();
  }
}