Java Code Examples for org.apache.hadoop.hdfs.server.namenode.FSNamesystem#writeUnlock()

The following examples show how to use org.apache.hadoop.hdfs.server.namenode.FSNamesystem#writeUnlock() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BlockManagerTestUtil.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Ensure that the given NameNode marks the specified DataNode as
 * entirely dead/expired.
 * @param nn the NameNode to manipulate
 * @param dnName the name of the DataNode
 */
public static void noticeDeadDatanode(NameNode nn, String dnName) {
  FSNamesystem namesystem = nn.getNamesystem();
  namesystem.writeLock();
  try {
    DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
    HeartbeatManager hbm = dnm.getHeartbeatManager();
    DatanodeDescriptor[] dnds = hbm.getDatanodes();
    DatanodeDescriptor theDND = null;
    for (DatanodeDescriptor dnd : dnds) {
      if (dnd.getXferAddr().equals(dnName)) {
        theDND = dnd;
      }
    }
    Assert.assertNotNull("Could not find DN with name: " + dnName, theDND);
    
    synchronized (hbm) {
      DFSTestUtil.setDatanodeDead(theDND);
      hbm.heartbeatCheck();
    }
  } finally {
    namesystem.writeUnlock();
  }
}
 
Example 2
Source File: BlockManagerTestUtil.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Ensure that the given NameNode marks the specified DataNode as
 * entirely dead/expired.
 * @param nn the NameNode to manipulate
 * @param dnName the name of the DataNode
 */
public static void noticeDeadDatanode(NameNode nn, String dnName) {
  FSNamesystem namesystem = nn.getNamesystem();
  namesystem.writeLock();
  try {
    DatanodeManager dnm = namesystem.getBlockManager().getDatanodeManager();
    HeartbeatManager hbm = dnm.getHeartbeatManager();
    DatanodeDescriptor[] dnds = hbm.getDatanodes();
    DatanodeDescriptor theDND = null;
    for (DatanodeDescriptor dnd : dnds) {
      if (dnd.getXferAddr().equals(dnName)) {
        theDND = dnd;
      }
    }
    Assert.assertNotNull("Could not find DN with name: " + dnName, theDND);
    
    synchronized (hbm) {
      DFSTestUtil.setDatanodeDead(theDND);
      hbm.heartbeatCheck();
    }
  } finally {
    namesystem.writeUnlock();
  }
}
 
Example 3
Source File: BlockManagerTestUtil.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Call heartbeat check function of HeartbeatManager and get
 * under replicated blocks count within write lock to make sure
 * computeDatanodeWork doesn't interfere.
 * @param namesystem the FSNamesystem
 * @param bm the BlockManager to manipulate
 * @return the number of under replicated blocks
 */
public static int checkHeartbeatAndGetUnderReplicatedBlocksCount(
    FSNamesystem namesystem, BlockManager bm) {
  namesystem.writeLock();
  try {
    bm.getDatanodeManager().getHeartbeatManager().heartbeatCheck();
    return bm.getUnderReplicatedNotMissingBlocks();
  } finally {
    namesystem.writeUnlock();
  }
}
 
Example 4
Source File: BlockManagerTestUtil.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Call heartbeat check function of HeartbeatManager and get
 * under replicated blocks count within write lock to make sure
 * computeDatanodeWork doesn't interfere.
 * @param namesystem the FSNamesystem
 * @param bm the BlockManager to manipulate
 * @return the number of under replicated blocks
 */
public static int checkHeartbeatAndGetUnderReplicatedBlocksCount(
    FSNamesystem namesystem, BlockManager bm) {
  namesystem.writeLock();
  try {
    bm.getDatanodeManager().getHeartbeatManager().heartbeatCheck();
    return bm.getUnderReplicatedNotMissingBlocks();
  } finally {
    namesystem.writeUnlock();
  }
}
 
Example 5
Source File: TestFavoredNodes.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Does a lot of hacks to change namenode and datanode datastructures to
 * identify datanodes by the machine name rather than the IP address. This is
 * done since we can give each datanode a different hostname in a unit test
 * but not a different ip address.
 * 
 * @param cluster
 *          the {@link MiniDFSCluster} to operate on
 * @throws Exception
 */
private static void updateDatanodeMap(MiniDFSCluster cluster)
    throws Exception {
  FSNamesystem namesystem = cluster.getNameNode().namesystem;
  for (DataNode node : cluster.getDataNodes()) {
    // Get old descriptor.
    DatanodeID dnId = createDataNodeID(node);
    DatanodeDescriptor dnDs = namesystem.getDatanode(dnId);

    // Create new id and descriptor.
    DatanodeID newId = new DatanodeID(node.getMachineName(),
        dnDs.getStorageID(), dnDs.getInfoPort(), dnDs.getIpcPort());
    DatanodeDescriptor newDS = new DatanodeDescriptor(newId,
        dnDs.getNetworkLocation(), dnDs.getHostName(), dnDs.getCapacity(),
        dnDs.getDfsUsed(), dnDs.getRemaining(), dnDs.getNamespaceUsed(),
        dnDs.getXceiverCount());
    
    newDS.isAlive = true;
    // Overwrite NN maps with new descriptor.
    namesystem.writeLock();
    namesystem.clusterMap.remove(dnDs);
    namesystem.resolveNetworkLocation(newDS);
    namesystem.unprotectedAddDatanode(newDS);
    namesystem.clusterMap.add(newDS);
    namesystem.writeUnlock();
    // Overwrite DN map with new registration.
    node.setRegistrationName(node.getMachineName());
  }
}
 
Example 6
Source File: TestFileCorruption.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Test the case that a replica is reported corrupt while it is not
 * in blocksMap. Make sure that ArrayIndexOutOfBounds does not thrown.
 * See Hadoop-4351.
 */
@Test
public void testArrayOutOfBoundsException() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new HdfsConfiguration();
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    cluster.waitActive();
    
    FileSystem fs = cluster.getFileSystem();
    final Path FILE_PATH = new Path("/tmp.txt");
    final long FILE_LEN = 1L;
    DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short)2, 1L);
    
    // get the block
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    File storageDir = cluster.getInstanceStorageDir(0, 0);
    File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
    assertTrue("Data directory does not exist", dataDir.exists());
    ExtendedBlock blk = getBlock(bpid, dataDir);
    if (blk == null) {
      storageDir = cluster.getInstanceStorageDir(0, 1);
      dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
      blk = getBlock(bpid, dataDir);
    }
    assertFalse("Data directory does not contain any blocks or there was an "
        + "IO error", blk==null);

    // start a third datanode
    cluster.startDataNodes(conf, 1, true, null, null);
    ArrayList<DataNode> datanodes = cluster.getDataNodes();
    assertEquals(datanodes.size(), 3);
    DataNode dataNode = datanodes.get(2);
    
    // report corrupted block by the third datanode
    DatanodeRegistration dnR = 
      DataNodeTestUtils.getDNRegistrationForBP(dataNode, blk.getBlockPoolId());
    FSNamesystem ns = cluster.getNamesystem();
    ns.writeLock();
    try {
      cluster.getNamesystem().getBlockManager().findAndMarkBlockAsCorrupt(
          blk, new DatanodeInfo(dnR), "TEST", "STORAGE_ID");
    } finally {
      ns.writeUnlock();
    }
    
    // open the file
    fs.open(FILE_PATH);
    
    //clean up
    fs.delete(FILE_PATH, false);
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
  
}
 
Example 7
Source File: TestHeartbeatHandling.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test if
 * {@link FSNamesystem#handleHeartbeat}
 * can pick up replication and/or invalidate requests and observes the max
 * limit
 */
@Test
public void testHeartbeat() throws Exception {
  final Configuration conf = new HdfsConfiguration();
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
  try {
    cluster.waitActive();
    final FSNamesystem namesystem = cluster.getNamesystem();
    final HeartbeatManager hm = namesystem.getBlockManager(
        ).getDatanodeManager().getHeartbeatManager();
    final String poolId = namesystem.getBlockPoolId();
    final DatanodeRegistration nodeReg =
      DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
    final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg);
    final String storageID = DatanodeStorage.generateUuid();
    dd.updateStorage(new DatanodeStorage(storageID));

    final int REMAINING_BLOCKS = 1;
    final int MAX_REPLICATE_LIMIT =
      conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2);
    final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT;
    final int MAX_INVALIDATE_BLOCKS = 2*MAX_INVALIDATE_LIMIT+REMAINING_BLOCKS;
    final int MAX_REPLICATE_BLOCKS = 2*MAX_REPLICATE_LIMIT+REMAINING_BLOCKS;
    final DatanodeStorageInfo[] ONE_TARGET = {dd.getStorageInfo(storageID)};

    try {
      namesystem.writeLock();
      synchronized(hm) {
        for (int i=0; i<MAX_REPLICATE_BLOCKS; i++) {
          dd.addBlockToBeReplicated(
              new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP),
              ONE_TARGET);
        }
        DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd,
            namesystem).getCommands();
        assertEquals(1, cmds.length);
        assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
        assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);

        ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
        for (int i=0; i<MAX_INVALIDATE_BLOCKS; i++) {
          blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP));
        }
        dd.addBlocksToBeInvalidated(blockList);
        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(2, cmds.length);
        assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
        assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);
        assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
        assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
        
        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(2, cmds.length);
        assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
        assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);
        assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
        assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
        
        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(1, cmds.length);
        assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
        assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);

        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(0, cmds.length);
      }
    } finally {
      namesystem.writeUnlock();
    }
  } finally {
    cluster.shutdown();
  }
}
 
Example 8
Source File: TestOverReplicatedBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
@Test
public void testProcesOverReplicateBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
      Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    assertTrue(cluster.corruptReplica(0, block));
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(
        cluster.getInstanceStorageDir(0, 0),
        cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(),
        "scanner.cursor");
    //wait for one minute for deletion to succeed;
    for(int i = 0; !scanCursor.delete(); i++) {
      assertTrue("Could not delete " + scanCursor.getAbsolutePath() +
          " in one minute", i < 60);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {}
    }
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    final DatanodeID corruptDataNode = 
      DataNodeTestUtils.getDNRegistrationForBP(
          cluster.getDataNodes().get(2), blockPoolId);
       
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    try {
      namesystem.writeLock();
      synchronized(hm) {
        // set live datanode's remaining space to be 0 
        // so they will be chosen to be deleted when over-replication occurs
        String corruptMachineName = corruptDataNode.getXferAddr();
        for (DatanodeDescriptor datanode : hm.getDatanodes()) {
          if (!corruptMachineName.equals(datanode.getXferAddr())) {
            datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
            datanode.updateHeartbeat(
                BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
                0L, 0L, 0, 0, null);
          }
        }

        // decrease the replication factor to 1; 
        NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1);

        // corrupt one won't be chosen to be excess one
        // without 4910 the number of live replicas would be 0: block gets lost
        assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas());
      }
    } finally {
      namesystem.writeUnlock();
    }
    
  } finally {
    cluster.shutdown();
  }
}
 
Example 9
Source File: TestFileCorruption.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Test the case that a replica is reported corrupt while it is not
 * in blocksMap. Make sure that ArrayIndexOutOfBounds does not thrown.
 * See Hadoop-4351.
 */
@Test
public void testArrayOutOfBoundsException() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new HdfsConfiguration();
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
    cluster.waitActive();
    
    FileSystem fs = cluster.getFileSystem();
    final Path FILE_PATH = new Path("/tmp.txt");
    final long FILE_LEN = 1L;
    DFSTestUtil.createFile(fs, FILE_PATH, FILE_LEN, (short)2, 1L);
    
    // get the block
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    File storageDir = cluster.getInstanceStorageDir(0, 0);
    File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
    assertTrue("Data directory does not exist", dataDir.exists());
    ExtendedBlock blk = getBlock(bpid, dataDir);
    if (blk == null) {
      storageDir = cluster.getInstanceStorageDir(0, 1);
      dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
      blk = getBlock(bpid, dataDir);
    }
    assertFalse("Data directory does not contain any blocks or there was an "
        + "IO error", blk==null);

    // start a third datanode
    cluster.startDataNodes(conf, 1, true, null, null);
    ArrayList<DataNode> datanodes = cluster.getDataNodes();
    assertEquals(datanodes.size(), 3);
    DataNode dataNode = datanodes.get(2);
    
    // report corrupted block by the third datanode
    DatanodeRegistration dnR = 
      DataNodeTestUtils.getDNRegistrationForBP(dataNode, blk.getBlockPoolId());
    FSNamesystem ns = cluster.getNamesystem();
    ns.writeLock();
    try {
      cluster.getNamesystem().getBlockManager().findAndMarkBlockAsCorrupt(
          blk, new DatanodeInfo(dnR), "TEST", "STORAGE_ID");
    } finally {
      ns.writeUnlock();
    }
    
    // open the file
    fs.open(FILE_PATH);
    
    //clean up
    fs.delete(FILE_PATH, false);
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
  
}
 
Example 10
Source File: TestHeartbeatHandling.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test if
 * {@link FSNamesystem#handleHeartbeat}
 * can pick up replication and/or invalidate requests and observes the max
 * limit
 */
@Test
public void testHeartbeat() throws Exception {
  final Configuration conf = new HdfsConfiguration();
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
  try {
    cluster.waitActive();
    final FSNamesystem namesystem = cluster.getNamesystem();
    final HeartbeatManager hm = namesystem.getBlockManager(
        ).getDatanodeManager().getHeartbeatManager();
    final String poolId = namesystem.getBlockPoolId();
    final DatanodeRegistration nodeReg =
      DataNodeTestUtils.getDNRegistrationForBP(cluster.getDataNodes().get(0), poolId);
    final DatanodeDescriptor dd = NameNodeAdapter.getDatanode(namesystem, nodeReg);
    final String storageID = DatanodeStorage.generateUuid();
    dd.updateStorage(new DatanodeStorage(storageID));

    final int REMAINING_BLOCKS = 1;
    final int MAX_REPLICATE_LIMIT =
      conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, 2);
    final int MAX_INVALIDATE_LIMIT = DFSConfigKeys.DFS_BLOCK_INVALIDATE_LIMIT_DEFAULT;
    final int MAX_INVALIDATE_BLOCKS = 2*MAX_INVALIDATE_LIMIT+REMAINING_BLOCKS;
    final int MAX_REPLICATE_BLOCKS = 2*MAX_REPLICATE_LIMIT+REMAINING_BLOCKS;
    final DatanodeStorageInfo[] ONE_TARGET = {dd.getStorageInfo(storageID)};

    try {
      namesystem.writeLock();
      synchronized(hm) {
        for (int i=0; i<MAX_REPLICATE_BLOCKS; i++) {
          dd.addBlockToBeReplicated(
              new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP),
              ONE_TARGET);
        }
        DatanodeCommand[] cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd,
            namesystem).getCommands();
        assertEquals(1, cmds.length);
        assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
        assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);

        ArrayList<Block> blockList = new ArrayList<Block>(MAX_INVALIDATE_BLOCKS);
        for (int i=0; i<MAX_INVALIDATE_BLOCKS; i++) {
          blockList.add(new Block(i, 0, GenerationStamp.LAST_RESERVED_STAMP));
        }
        dd.addBlocksToBeInvalidated(blockList);
        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(2, cmds.length);
        assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
        assertEquals(MAX_REPLICATE_LIMIT, ((BlockCommand)cmds[0]).getBlocks().length);
        assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
        assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
        
        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(2, cmds.length);
        assertEquals(DatanodeProtocol.DNA_TRANSFER, cmds[0].getAction());
        assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);
        assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[1].getAction());
        assertEquals(MAX_INVALIDATE_LIMIT, ((BlockCommand)cmds[1]).getBlocks().length);
        
        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(1, cmds.length);
        assertEquals(DatanodeProtocol.DNA_INVALIDATE, cmds[0].getAction());
        assertEquals(REMAINING_BLOCKS, ((BlockCommand)cmds[0]).getBlocks().length);

        cmds = NameNodeAdapter.sendHeartBeat(nodeReg, dd, namesystem)
            .getCommands();
        assertEquals(0, cmds.length);
      }
    } finally {
      namesystem.writeUnlock();
    }
  } finally {
    cluster.shutdown();
  }
}
 
Example 11
Source File: TestOverReplicatedBlocks.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
@Test
public void testProcesOverReplicateBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
      Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    assertTrue(cluster.corruptReplica(0, block));
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(
        cluster.getInstanceStorageDir(0, 0),
        cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(),
        "scanner.cursor");
    //wait for one minute for deletion to succeed;
    for(int i = 0; !scanCursor.delete(); i++) {
      assertTrue("Could not delete " + scanCursor.getAbsolutePath() +
          " in one minute", i < 60);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {}
    }
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    final DatanodeID corruptDataNode = 
      DataNodeTestUtils.getDNRegistrationForBP(
          cluster.getDataNodes().get(2), blockPoolId);
       
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    try {
      namesystem.writeLock();
      synchronized(hm) {
        // set live datanode's remaining space to be 0 
        // so they will be chosen to be deleted when over-replication occurs
        String corruptMachineName = corruptDataNode.getXferAddr();
        for (DatanodeDescriptor datanode : hm.getDatanodes()) {
          if (!corruptMachineName.equals(datanode.getXferAddr())) {
            datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
            datanode.updateHeartbeat(
                BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
                0L, 0L, 0, 0, null);
          }
        }

        // decrease the replication factor to 1; 
        NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1);

        // corrupt one won't be chosen to be excess one
        // without 4910 the number of live replicas would be 0: block gets lost
        assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas());
      }
    } finally {
      namesystem.writeUnlock();
    }
    
  } finally {
    cluster.shutdown();
  }
}