Java Code Examples for org.apache.hadoop.hdfs.server.namenode.FSNamesystem#getBlockManager()

The following examples show how to use org.apache.hadoop.hdfs.server.namenode.FSNamesystem#getBlockManager() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestOverReplicatedBlocks.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Test over replicated block should get invalidated when decreasing the
 * replication for a partial block.
 */
@Test
public void testInvalidateOverReplicatedBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3)
      .build();
  try {
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    FileSystem fs = cluster.getFileSystem();
    Path p = new Path(MiniDFSCluster.getBaseDirectory(), "/foo1");
    FSDataOutputStream out = fs.create(p, (short) 2);
    out.writeBytes("HDFS-3119: " + p);
    out.hsync();
    fs.setReplication(p, (short) 1);
    out.close();
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, p);
    assertEquals("Expected only one live replica for the block", 1, bm
        .countNodes(block.getLocalBlock()).liveReplicas());
  } finally {
    cluster.shutdown();
  }
}
 
Example 2
Source File: TestOverReplicatedBlocks.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Test over replicated block should get invalidated when decreasing the
 * replication for a partial block.
 */
@Test
public void testInvalidateOverReplicatedBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3)
      .build();
  try {
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    FileSystem fs = cluster.getFileSystem();
    Path p = new Path(MiniDFSCluster.getBaseDirectory(), "/foo1");
    FSDataOutputStream out = fs.create(p, (short) 2);
    out.writeBytes("HDFS-3119: " + p);
    out.hsync();
    fs.setReplication(p, (short) 1);
    out.close();
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, p);
    assertEquals("Expected only one live replica for the block", 1, bm
        .countNodes(block.getLocalBlock()).liveReplicas());
  } finally {
    cluster.shutdown();
  }
}
 
Example 3
Source File: CacheReplicationMonitor.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public CacheReplicationMonitor(FSNamesystem namesystem,
    CacheManager cacheManager, long intervalMs, ReentrantLock lock) {
  this.namesystem = namesystem;
  this.blockManager = namesystem.getBlockManager();
  this.cacheManager = cacheManager;
  this.cachedBlocks = cacheManager.getCachedBlocks();
  this.intervalMs = intervalMs;
  this.lock = lock;
  this.doRescan = this.lock.newCondition();
  this.scanFinished = this.lock.newCondition();
}
 
Example 4
Source File: BlockManagerTestUtil.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * @return a tuple of the replica state (number racks, number live
 * replicas, and number needed replicas) for the given block.
 */
public static int[] getReplicaInfo(final FSNamesystem namesystem, final Block b) {
  final BlockManager bm = namesystem.getBlockManager();
  namesystem.readLock();
  try {
    return new int[]{getNumberOfRacks(bm, b),
        bm.countNodes(b).liveReplicas(),
        bm.neededReplications.contains(b) ? 1 : 0};
  } finally {
    namesystem.readUnlock();
  }
}
 
Example 5
Source File: CacheReplicationMonitor.java    From big-c with Apache License 2.0 5 votes vote down vote up
public CacheReplicationMonitor(FSNamesystem namesystem,
    CacheManager cacheManager, long intervalMs, ReentrantLock lock) {
  this.namesystem = namesystem;
  this.blockManager = namesystem.getBlockManager();
  this.cacheManager = cacheManager;
  this.cachedBlocks = cacheManager.getCachedBlocks();
  this.intervalMs = intervalMs;
  this.lock = lock;
  this.doRescan = this.lock.newCondition();
  this.scanFinished = this.lock.newCondition();
}
 
Example 6
Source File: BlockManagerTestUtil.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * @return a tuple of the replica state (number racks, number live
 * replicas, and number needed replicas) for the given block.
 */
public static int[] getReplicaInfo(final FSNamesystem namesystem, final Block b) {
  final BlockManager bm = namesystem.getBlockManager();
  namesystem.readLock();
  try {
    return new int[]{getNumberOfRacks(bm, b),
        bm.countNodes(b).liveReplicas(),
        bm.neededReplications.contains(b) ? 1 : 0};
  } finally {
    namesystem.readUnlock();
  }
}
 
Example 7
Source File: TestNodeCount.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testNodeCount() throws Exception {
  // start a mini dfs cluster of 2 nodes
  final Configuration conf = new HdfsConfiguration();
  final MiniDFSCluster cluster = 
    new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR).build();
  try {
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    final FileSystem fs = cluster.getFileSystem();
    
    // populate the cluster with a one block file
    final Path FILE_PATH = new Path("/testfile");
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, FILE_PATH);

    // keep a copy of all datanode descriptor
    final DatanodeDescriptor[] datanodes = hm.getDatanodes();
    
    // start two new nodes
    cluster.startDataNodes(conf, 2, true, null, null);
    cluster.waitActive();
    
    // bring down first datanode
    DatanodeDescriptor datanode = datanodes[0];
    DataNodeProperties dnprop = cluster.stopDataNode(datanode.getXferAddr());
    
    // make sure that NN detects that the datanode is down
    BlockManagerTestUtil.noticeDeadDatanode(
        cluster.getNameNode(), datanode.getXferAddr());
    
    // the block will be replicated
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);

    // restart the first datanode
    cluster.restartDataNode(dnprop);
    cluster.waitActive();
    
    // check if excessive replica is detected (transient)
    initializeTimeout(TIMEOUT);
    while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() == 0) {
      checkTimeout("excess replicas not detected");
    }
    
    // find out a non-excess node
    DatanodeDescriptor nonExcessDN = null;
    for(DatanodeStorageInfo storage : bm.blocksMap.getStorages(block.getLocalBlock())) {
      final DatanodeDescriptor dn = storage.getDatanodeDescriptor();
      Collection<Block> blocks = bm.excessReplicateMap.get(dn.getDatanodeUuid());
      if (blocks == null || !blocks.contains(block.getLocalBlock()) ) {
        nonExcessDN = dn;
        break;
      }
    }
    assertTrue(nonExcessDN!=null);
    
    // bring down non excessive datanode
    dnprop = cluster.stopDataNode(nonExcessDN.getXferAddr());
    // make sure that NN detects that the datanode is down
    BlockManagerTestUtil.noticeDeadDatanode(
        cluster.getNameNode(), nonExcessDN.getXferAddr());

    // The block should be replicated
    initializeTimeout(TIMEOUT);
    while (countNodes(block.getLocalBlock(), namesystem).liveReplicas() != REPLICATION_FACTOR) {
      checkTimeout("live replica count not correct", 1000);
    }

    // restart the first datanode
    cluster.restartDataNode(dnprop);
    cluster.waitActive();

    // check if excessive replica is detected (transient)
    initializeTimeout(TIMEOUT);
    while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() != 2) {
      checkTimeout("excess replica count not equal to 2");
    }

  } finally {
    cluster.shutdown();
  }
}
 
Example 8
Source File: TestOverReplicatedBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
@Test
public void testProcesOverReplicateBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
      Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    assertTrue(cluster.corruptReplica(0, block));
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(
        cluster.getInstanceStorageDir(0, 0),
        cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(),
        "scanner.cursor");
    //wait for one minute for deletion to succeed;
    for(int i = 0; !scanCursor.delete(); i++) {
      assertTrue("Could not delete " + scanCursor.getAbsolutePath() +
          " in one minute", i < 60);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {}
    }
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    final DatanodeID corruptDataNode = 
      DataNodeTestUtils.getDNRegistrationForBP(
          cluster.getDataNodes().get(2), blockPoolId);
       
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    try {
      namesystem.writeLock();
      synchronized(hm) {
        // set live datanode's remaining space to be 0 
        // so they will be chosen to be deleted when over-replication occurs
        String corruptMachineName = corruptDataNode.getXferAddr();
        for (DatanodeDescriptor datanode : hm.getDatanodes()) {
          if (!corruptMachineName.equals(datanode.getXferAddr())) {
            datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
            datanode.updateHeartbeat(
                BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
                0L, 0L, 0, 0, null);
          }
        }

        // decrease the replication factor to 1; 
        NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1);

        // corrupt one won't be chosen to be excess one
        // without 4910 the number of live replicas would be 0: block gets lost
        assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas());
      }
    } finally {
      namesystem.writeUnlock();
    }
    
  } finally {
    cluster.shutdown();
  }
}
 
Example 9
Source File: TestPendingReplication.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test if BlockManager can correctly remove corresponding pending records
 * when a file is deleted
 * 
 * @throws Exception
 */
@Test
public void testPendingAndInvalidate() throws Exception {
  final Configuration CONF = new HdfsConfiguration();
  CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
  CONF.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
      DFS_REPLICATION_INTERVAL);
  CONF.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 
      DFS_REPLICATION_INTERVAL);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(
      DATANODE_COUNT).build();
  cluster.waitActive();
  
  FSNamesystem namesystem = cluster.getNamesystem();
  BlockManager bm = namesystem.getBlockManager();
  DistributedFileSystem fs = cluster.getFileSystem();
  try {
    // 1. create a file
    Path filePath = new Path("/tmp.txt");
    DFSTestUtil.createFile(fs, filePath, 1024, (short) 3, 0L);
    
    // 2. disable the heartbeats
    for (DataNode dn : cluster.getDataNodes()) {
      DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true);
    }
    
    // 3. mark a couple of blocks as corrupt
    LocatedBlock block = NameNodeAdapter.getBlockLocations(
        cluster.getNameNode(), filePath.toString(), 0, 1).get(0);
    cluster.getNamesystem().writeLock();
    try {
      bm.findAndMarkBlockAsCorrupt(block.getBlock(), block.getLocations()[0],
          "STORAGE_ID", "TEST");
      bm.findAndMarkBlockAsCorrupt(block.getBlock(), block.getLocations()[1],
          "STORAGE_ID", "TEST");
    } finally {
      cluster.getNamesystem().writeUnlock();
    }
    BlockManagerTestUtil.computeAllPendingWork(bm);
    BlockManagerTestUtil.updateState(bm);
    assertEquals(bm.getPendingReplicationBlocksCount(), 1L);
    assertEquals(bm.pendingReplications.getNumReplicas(block.getBlock()
        .getLocalBlock()), 2);
    
    // 4. delete the file
    fs.delete(filePath, true);
    // retry at most 10 times, each time sleep for 1s. Note that 10s is much
    // less than the default pending record timeout (5~10min)
    int retries = 10; 
    long pendingNum = bm.getPendingReplicationBlocksCount();
    while (pendingNum != 0 && retries-- > 0) {
      Thread.sleep(1000);  // let NN do the deletion
      BlockManagerTestUtil.updateState(bm);
      pendingNum = bm.getPendingReplicationBlocksCount();
    }
    assertEquals(pendingNum, 0L);
  } finally {
    cluster.shutdown();
  }
}
 
Example 10
Source File: TestFileCreation.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * 1. Check the blocks of old file are cleaned after creating with overwrite
 * 2. Restart NN, check the file
 * 3. Save new checkpoint and restart NN, check the file
 */
@Test(timeout = 120000)
public void testFileCreationWithOverwrite() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt("dfs.blocksize", blockSize);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).
      numDataNodes(3).build();
  DistributedFileSystem dfs = cluster.getFileSystem();
  try {
    dfs.mkdirs(new Path("/foo/dir"));
    String file = "/foo/dir/file";
    Path filePath = new Path(file);
    
    // Case 1: Create file with overwrite, check the blocks of old file
    // are cleaned after creating with overwrite
    NameNode nn = cluster.getNameNode();
    FSNamesystem fsn = NameNodeAdapter.getNamesystem(nn);
    BlockManager bm = fsn.getBlockManager();
    
    FSDataOutputStream out = dfs.create(filePath);
    byte[] oldData = AppendTestUtil.randomBytes(seed, fileSize);
    try {
      out.write(oldData);
    } finally {
      out.close();
    }
    
    LocatedBlocks oldBlocks = NameNodeAdapter.getBlockLocations(
        nn, file, 0, fileSize);
    assertBlocks(bm, oldBlocks, true);
    
    out = dfs.create(filePath, true);
    byte[] newData = AppendTestUtil.randomBytes(seed, fileSize);
    try {
      out.write(newData);
    } finally {
      out.close();
    }
    dfs.deleteOnExit(filePath);
    
    LocatedBlocks newBlocks = NameNodeAdapter.getBlockLocations(
        nn, file, 0, fileSize);
    assertBlocks(bm, newBlocks, true);
    assertBlocks(bm, oldBlocks, false);
    
    FSDataInputStream in = dfs.open(filePath);
    byte[] result = null;
    try {
      result = readAll(in);
    } finally {
      in.close();
    }
    Assert.assertArrayEquals(newData, result);
    
    // Case 2: Restart NN, check the file
    cluster.restartNameNode();
    nn = cluster.getNameNode();
    in = dfs.open(filePath);
    try {
      result = readAll(in);
    } finally {
      in.close();
    }
    Assert.assertArrayEquals(newData, result);
    
    // Case 3: Save new checkpoint and restart NN, check the file
    NameNodeAdapter.enterSafeMode(nn, false);
    NameNodeAdapter.saveNamespace(nn);
    cluster.restartNameNode();
    nn = cluster.getNameNode();
    
    in = dfs.open(filePath);
    try {
      result = readAll(in);
    } finally {
      in.close();
    }
    Assert.assertArrayEquals(newData, result);
  } finally {
    if (dfs != null) {
      dfs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 11
Source File: TestNodeCount.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testNodeCount() throws Exception {
  // start a mini dfs cluster of 2 nodes
  final Configuration conf = new HdfsConfiguration();
  final MiniDFSCluster cluster = 
    new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR).build();
  try {
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    final FileSystem fs = cluster.getFileSystem();
    
    // populate the cluster with a one block file
    final Path FILE_PATH = new Path("/testfile");
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, FILE_PATH);

    // keep a copy of all datanode descriptor
    final DatanodeDescriptor[] datanodes = hm.getDatanodes();
    
    // start two new nodes
    cluster.startDataNodes(conf, 2, true, null, null);
    cluster.waitActive();
    
    // bring down first datanode
    DatanodeDescriptor datanode = datanodes[0];
    DataNodeProperties dnprop = cluster.stopDataNode(datanode.getXferAddr());
    
    // make sure that NN detects that the datanode is down
    BlockManagerTestUtil.noticeDeadDatanode(
        cluster.getNameNode(), datanode.getXferAddr());
    
    // the block will be replicated
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);

    // restart the first datanode
    cluster.restartDataNode(dnprop);
    cluster.waitActive();
    
    // check if excessive replica is detected (transient)
    initializeTimeout(TIMEOUT);
    while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() == 0) {
      checkTimeout("excess replicas not detected");
    }
    
    // find out a non-excess node
    DatanodeDescriptor nonExcessDN = null;
    for(DatanodeStorageInfo storage : bm.blocksMap.getStorages(block.getLocalBlock())) {
      final DatanodeDescriptor dn = storage.getDatanodeDescriptor();
      Collection<Block> blocks = bm.excessReplicateMap.get(dn.getDatanodeUuid());
      if (blocks == null || !blocks.contains(block.getLocalBlock()) ) {
        nonExcessDN = dn;
        break;
      }
    }
    assertTrue(nonExcessDN!=null);
    
    // bring down non excessive datanode
    dnprop = cluster.stopDataNode(nonExcessDN.getXferAddr());
    // make sure that NN detects that the datanode is down
    BlockManagerTestUtil.noticeDeadDatanode(
        cluster.getNameNode(), nonExcessDN.getXferAddr());

    // The block should be replicated
    initializeTimeout(TIMEOUT);
    while (countNodes(block.getLocalBlock(), namesystem).liveReplicas() != REPLICATION_FACTOR) {
      checkTimeout("live replica count not correct", 1000);
    }

    // restart the first datanode
    cluster.restartDataNode(dnprop);
    cluster.waitActive();

    // check if excessive replica is detected (transient)
    initializeTimeout(TIMEOUT);
    while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() != 2) {
      checkTimeout("excess replica count not equal to 2");
    }

  } finally {
    cluster.shutdown();
  }
}
 
Example 12
Source File: TestOverReplicatedBlocks.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
@Test
public void testProcesOverReplicateBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
      Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    assertTrue(cluster.corruptReplica(0, block));
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(
        cluster.getInstanceStorageDir(0, 0),
        cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(),
        "scanner.cursor");
    //wait for one minute for deletion to succeed;
    for(int i = 0; !scanCursor.delete(); i++) {
      assertTrue("Could not delete " + scanCursor.getAbsolutePath() +
          " in one minute", i < 60);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {}
    }
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    final DatanodeID corruptDataNode = 
      DataNodeTestUtils.getDNRegistrationForBP(
          cluster.getDataNodes().get(2), blockPoolId);
       
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    try {
      namesystem.writeLock();
      synchronized(hm) {
        // set live datanode's remaining space to be 0 
        // so they will be chosen to be deleted when over-replication occurs
        String corruptMachineName = corruptDataNode.getXferAddr();
        for (DatanodeDescriptor datanode : hm.getDatanodes()) {
          if (!corruptMachineName.equals(datanode.getXferAddr())) {
            datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
            datanode.updateHeartbeat(
                BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
                0L, 0L, 0, 0, null);
          }
        }

        // decrease the replication factor to 1; 
        NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1);

        // corrupt one won't be chosen to be excess one
        // without 4910 the number of live replicas would be 0: block gets lost
        assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas());
      }
    } finally {
      namesystem.writeUnlock();
    }
    
  } finally {
    cluster.shutdown();
  }
}
 
Example 13
Source File: TestPendingReplication.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test if BlockManager can correctly remove corresponding pending records
 * when a file is deleted
 * 
 * @throws Exception
 */
@Test
public void testPendingAndInvalidate() throws Exception {
  final Configuration CONF = new HdfsConfiguration();
  CONF.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
  CONF.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY,
      DFS_REPLICATION_INTERVAL);
  CONF.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 
      DFS_REPLICATION_INTERVAL);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(
      DATANODE_COUNT).build();
  cluster.waitActive();
  
  FSNamesystem namesystem = cluster.getNamesystem();
  BlockManager bm = namesystem.getBlockManager();
  DistributedFileSystem fs = cluster.getFileSystem();
  try {
    // 1. create a file
    Path filePath = new Path("/tmp.txt");
    DFSTestUtil.createFile(fs, filePath, 1024, (short) 3, 0L);
    
    // 2. disable the heartbeats
    for (DataNode dn : cluster.getDataNodes()) {
      DataNodeTestUtils.setHeartbeatsDisabledForTests(dn, true);
    }
    
    // 3. mark a couple of blocks as corrupt
    LocatedBlock block = NameNodeAdapter.getBlockLocations(
        cluster.getNameNode(), filePath.toString(), 0, 1).get(0);
    cluster.getNamesystem().writeLock();
    try {
      bm.findAndMarkBlockAsCorrupt(block.getBlock(), block.getLocations()[0],
          "STORAGE_ID", "TEST");
      bm.findAndMarkBlockAsCorrupt(block.getBlock(), block.getLocations()[1],
          "STORAGE_ID", "TEST");
    } finally {
      cluster.getNamesystem().writeUnlock();
    }
    BlockManagerTestUtil.computeAllPendingWork(bm);
    BlockManagerTestUtil.updateState(bm);
    assertEquals(bm.getPendingReplicationBlocksCount(), 1L);
    assertEquals(bm.pendingReplications.getNumReplicas(block.getBlock()
        .getLocalBlock()), 2);
    
    // 4. delete the file
    fs.delete(filePath, true);
    // retry at most 10 times, each time sleep for 1s. Note that 10s is much
    // less than the default pending record timeout (5~10min)
    int retries = 10; 
    long pendingNum = bm.getPendingReplicationBlocksCount();
    while (pendingNum != 0 && retries-- > 0) {
      Thread.sleep(1000);  // let NN do the deletion
      BlockManagerTestUtil.updateState(bm);
      pendingNum = bm.getPendingReplicationBlocksCount();
    }
    assertEquals(pendingNum, 0L);
  } finally {
    cluster.shutdown();
  }
}
 
Example 14
Source File: TestFileCreation.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * 1. Check the blocks of old file are cleaned after creating with overwrite
 * 2. Restart NN, check the file
 * 3. Save new checkpoint and restart NN, check the file
 */
@Test(timeout = 120000)
public void testFileCreationWithOverwrite() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt("dfs.blocksize", blockSize);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).
      numDataNodes(3).build();
  DistributedFileSystem dfs = cluster.getFileSystem();
  try {
    dfs.mkdirs(new Path("/foo/dir"));
    String file = "/foo/dir/file";
    Path filePath = new Path(file);
    
    // Case 1: Create file with overwrite, check the blocks of old file
    // are cleaned after creating with overwrite
    NameNode nn = cluster.getNameNode();
    FSNamesystem fsn = NameNodeAdapter.getNamesystem(nn);
    BlockManager bm = fsn.getBlockManager();
    
    FSDataOutputStream out = dfs.create(filePath);
    byte[] oldData = AppendTestUtil.randomBytes(seed, fileSize);
    try {
      out.write(oldData);
    } finally {
      out.close();
    }
    
    LocatedBlocks oldBlocks = NameNodeAdapter.getBlockLocations(
        nn, file, 0, fileSize);
    assertBlocks(bm, oldBlocks, true);
    
    out = dfs.create(filePath, true);
    byte[] newData = AppendTestUtil.randomBytes(seed, fileSize);
    try {
      out.write(newData);
    } finally {
      out.close();
    }
    dfs.deleteOnExit(filePath);
    
    LocatedBlocks newBlocks = NameNodeAdapter.getBlockLocations(
        nn, file, 0, fileSize);
    assertBlocks(bm, newBlocks, true);
    assertBlocks(bm, oldBlocks, false);
    
    FSDataInputStream in = dfs.open(filePath);
    byte[] result = null;
    try {
      result = readAll(in);
    } finally {
      in.close();
    }
    Assert.assertArrayEquals(newData, result);
    
    // Case 2: Restart NN, check the file
    cluster.restartNameNode();
    nn = cluster.getNameNode();
    in = dfs.open(filePath);
    try {
      result = readAll(in);
    } finally {
      in.close();
    }
    Assert.assertArrayEquals(newData, result);
    
    // Case 3: Save new checkpoint and restart NN, check the file
    NameNodeAdapter.enterSafeMode(nn, false);
    NameNodeAdapter.saveNamespace(nn);
    cluster.restartNameNode();
    nn = cluster.getNameNode();
    
    in = dfs.open(filePath);
    try {
      result = readAll(in);
    } finally {
      in.close();
    }
    Assert.assertArrayEquals(newData, result);
  } finally {
    if (dfs != null) {
      dfs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}