Java Code Examples for org.apache.hadoop.hdfs.DFSTestUtil#getFirstBlock()

The following examples show how to use org.apache.hadoop.hdfs.DFSTestUtil#getFirstBlock() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBlocksWithNotEnoughRacks.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testSufficientlySingleReplBlockUsesNewRack() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 1;
  final Path filePath = new Path("/testFile");

  String racks[] = {"/rack1", "/rack1", "/rack1", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block with a replication factor of 1
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 0);

    REPLICATION_FACTOR = 2;
    NameNodeAdapter.setReplication(ns, "/testFile", REPLICATION_FACTOR);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}
 
Example 2
Source File: TestProcessCorruptBlocks.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. In this test, the above 
 * condition is achieved by increasing the number of good replicas by 
 * replicating on a new Datanode. 
 * The test strategy : 
 *   Bring up Cluster with 3 DataNodes
 *   Create a file  of replication factor 3
 *   Corrupt one replica of a block of the file 
 *   Verify that there are still 2 good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good replicas
 *      (2) is less  than replication factor (3)) 
 *   Start a new data node 
 *   Verify that the a new replica is created and corrupt replica is
 *   removed.
 * 
 */
@Test
public void testByAddingAnExtraDataNode() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();
  DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    assertEquals(2, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    cluster.restartDataNode(dnPropsFourth);

    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    assertEquals(3, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
  } finally {
    cluster.shutdown();
  }
}
 
Example 3
Source File: TestUnderReplicatedBlocks.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public void testSetrepIncWithUnderReplicatedBlocks() throws Exception {
  Configuration conf = new Configuration();
  final short REPLICATION_FACTOR = 2;
  final String FILE_NAME = "/testFile";
  final Path FILE_PATH = new Path(FILE_NAME);
  MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR+1, true, null);
  try {
    // create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    
    // remove one replica from the blocksMap so block becomes under-replicated
    // but the block does not get put into the under-replicated blocks queue
    FSNamesystem namesystem = cluster.getNameNode().namesystem;
    Block b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
    DatanodeDescriptor dn = namesystem.blocksMap.nodeIterator(b).next();
    namesystem.addToInvalidates(b, dn);
    namesystem.blocksMap.removeNode(b, dn);
    
    // increment this file's replication factor
    FsShell shell = new FsShell(conf);
    assertEquals(0, shell.run(new String[]{
        "-setrep", "-w", Integer.toString(1+REPLICATION_FACTOR), FILE_NAME}));
  } finally {
    cluster.shutdown();
  }
  
}
 
Example 4
Source File: TestProcessCorruptBlocks.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. In this test, the above 
 * condition is achieved by increasing the number of good replicas by 
 * replicating on a new Datanode. 
 * The test strategy : 
 *   Bring up Cluster with 3 DataNodes
 *   Create a file  of replication factor 3
 *   Corrupt one replica of a block of the file 
 *   Verify that there are still 2 good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good replicas
 *      (2) is less  than replication factor (3)) 
 *   Start a new data node 
 *   Verify that the a new replica is created and corrupt replica is
 *   removed.
 * 
 */
@Test
public void testByAddingAnExtraDataNode() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();
  DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    assertEquals(2, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    cluster.restartDataNode(dnPropsFourth);

    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    assertEquals(3, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
  } finally {
    cluster.shutdown();
  }
}
 
Example 5
Source File: TestBlocksWithNotEnoughRacks.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testReduceReplFactorRespectsRackPolicy() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 3;
  final Path filePath = new Path("/testFile");
  String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Decrease the replication factor, make sure the deleted replica
    // was not the one that lived on the rack with only one replica,
    // ie we should still have 2 racks after reducing the repl factor.
    REPLICATION_FACTOR = 2;
    NameNodeAdapter.setReplication(ns, "/testFile", REPLICATION_FACTOR); 

    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}
 
Example 6
Source File: TestBlocksWithNotEnoughRacks.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testSufficientlyReplBlocksUsesNewRack() throws Exception {
  Configuration conf = getConf();
  final short REPLICATION_FACTOR = 3;
  final Path filePath = new Path("/testFile");
  // All datanodes are on the same rack
  String racks[] = {"/rack1", "/rack1", "/rack1"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();

  try {
    // Create a file with one block with a replication factor of 3
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 0);

    // Add a new datanode on a different rack
    String newRacks[] = {"/rack2"};
    cluster.startDataNodes(conf, 1, true, null, newRacks);
    cluster.waitActive();

    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}
 
Example 7
Source File: TestBlocksWithNotEnoughRacks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testNodeDecomissionRespectsRackPolicy() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  final Path filePath = new Path("/testFile");

  // Configure an excludes file
  FileSystem localFileSys = FileSystem.getLocal(conf);
  Path workingDir = localFileSys.getWorkingDirectory();
  Path dir = new Path(workingDir, "build/test/data/temp/decommission");
  Path excludeFile = new Path(dir, "exclude");
  Path includeFile = new Path(dir, "include");
  assertTrue(localFileSys.mkdirs(dir));
  DFSTestUtil.writeFile(localFileSys, excludeFile, "");
  DFSTestUtil.writeFile(localFileSys, includeFile, "");
  conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
  conf.set(DFSConfigKeys.DFS_HOSTS, includeFile.toUri().getPath());

  // Two blocks and four racks
  String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Decommission one of the hosts with the block, this should cause 
    // the block to get replicated to another host on the same rack,
    // otherwise the rack policy is violated.
    BlockLocation locs[] = fs.getFileBlockLocations(
        fs.getFileStatus(filePath), 0, Long.MAX_VALUE);
    String name = locs[0].getNames()[0];
    DFSTestUtil.writeFile(localFileSys, excludeFile, name);
    ns.getBlockManager().getDatanodeManager().refreshNodes(conf);
    DFSTestUtil.waitForDecommission(fs, name);

    // Check the block still has sufficient # replicas across racks
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}
 
Example 8
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 9
Source File: TestBlocksWithNotEnoughRacks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testReduceReplFactorDueToRejoinRespectsRackPolicy() 
    throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  final Path filePath = new Path("/testFile");
  // Last datanode is on a different rack
  String racks[] = {"/rack1", "/rack1", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();
  final DatanodeManager dm = ns.getBlockManager().getDatanodeManager();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Make the last (cross rack) datanode look like it failed
    // to heartbeat by stopping it and calling removeDatanode.
    ArrayList<DataNode> datanodes = cluster.getDataNodes();
    assertEquals(3, datanodes.size());
    DataNode dataNode = datanodes.get(2);
    DatanodeID dnId = dataNode.getDatanodeId();
    cluster.stopDataNode(2);
    dm.removeDatanode(dnId);

    // The block gets re-replicated to another datanode so it has a 
    // sufficient # replicas, but not across racks, so there should
    // be 1 rack, and 1 needed replica (even though there are 2 hosts 
    // available and only 2 replicas required).
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 1);

    // Start the "failed" datanode, which has a replica so the block is
    // now over-replicated and therefore a replica should be removed but
    // not on the restarted datanode as that would violate the rack policy.
    String rack2[] = {"/rack2"};
    cluster.startDataNodes(conf, 1, true, null, rack2);
    cluster.waitActive();      
    
    // The block now has sufficient # replicas, across racks
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}
 
Example 10
Source File: TestBlocksWithNotEnoughRacks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testReplDueToNodeFailRespectsRackPolicy() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 3;
  final Path filePath = new Path("/testFile");
  // Last datanode is on a different rack
  String racks[] = {"/rack1", "/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();
  final DatanodeManager dm = ns.getBlockManager().getDatanodeManager();

  try {
    // Create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Make the last datanode look like it failed to heartbeat by 
    // calling removeDatanode and stopping it.
    ArrayList<DataNode> datanodes = cluster.getDataNodes();
    int idx = datanodes.size() - 1;
    DataNode dataNode = datanodes.get(idx);
    DatanodeID dnId = dataNode.getDatanodeId();
    cluster.stopDataNode(idx);
    dm.removeDatanode(dnId);

    // The block should still have sufficient # replicas, across racks.
    // The last node may not have contained a replica, but if it did
    // it should have been replicated within the same rack.
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
    
    // Fail the last datanode again, it's also on rack2 so there is
    // only 1 rack for all the replicas
    datanodes = cluster.getDataNodes();
    idx = datanodes.size() - 1;
    dataNode = datanodes.get(idx);
    dnId = dataNode.getDatanodeId();
    cluster.stopDataNode(idx);
    dm.removeDatanode(dnId);

    // Make sure we have enough live replicas even though we are
    // short one rack and therefore need one replica
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 1);
  } finally {
    cluster.shutdown();
  }
}
 
Example 11
Source File: TestBlocksWithNotEnoughRacks.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testReduceReplFactorDueToRejoinRespectsRackPolicy() 
    throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  final Path filePath = new Path("/testFile");
  // Last datanode is on a different rack
  String racks[] = {"/rack1", "/rack1", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();
  final DatanodeManager dm = ns.getBlockManager().getDatanodeManager();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Make the last (cross rack) datanode look like it failed
    // to heartbeat by stopping it and calling removeDatanode.
    ArrayList<DataNode> datanodes = cluster.getDataNodes();
    assertEquals(3, datanodes.size());
    DataNode dataNode = datanodes.get(2);
    DatanodeID dnId = dataNode.getDatanodeId();
    cluster.stopDataNode(2);
    dm.removeDatanode(dnId);

    // The block gets re-replicated to another datanode so it has a 
    // sufficient # replicas, but not across racks, so there should
    // be 1 rack, and 1 needed replica (even though there are 2 hosts 
    // available and only 2 replicas required).
    DFSTestUtil.waitForReplication(cluster, b, 1, REPLICATION_FACTOR, 1);

    // Start the "failed" datanode, which has a replica so the block is
    // now over-replicated and therefore a replica should be removed but
    // not on the restarted datanode as that would violate the rack policy.
    String rack2[] = {"/rack2"};
    cluster.startDataNodes(conf, 1, true, null, rack2);
    cluster.waitActive();      
    
    // The block now has sufficient # replicas, across racks
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}
 
Example 12
Source File: TestBlocksWithNotEnoughRacks.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testCorruptBlockRereplicatedAcrossRacks() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  int fileLen = 512;
  final Path filePath = new Path("/testFile");
  // Datanodes are spread across two racks
  String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    
    DFSTestUtil.createFile(fs, filePath, fileLen, REPLICATION_FACTOR, 1L);
    final String fileContent = DFSTestUtil.readFile(fs, filePath);

    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Corrupt a replica of the block
    int dnToCorrupt = DFSTestUtil.firstDnWithBlock(cluster, b);
    assertTrue(cluster.corruptReplica(dnToCorrupt, b));

    // Restart the datanode so blocks are re-scanned, and the corrupt
    // block is detected.
    cluster.restartDataNode(dnToCorrupt);

    // Wait for the namenode to notice the corrupt replica
    DFSTestUtil.waitCorruptReplicas(fs, ns, filePath, b, 1);

    // The rack policy is still respected
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Ensure all replicas are valid (the corrupt replica may not
    // have been cleaned up yet).
    for (int i = 0; i < racks.length; i++) {
      String blockContent = cluster.readBlockOnDataNode(i, b);
      if (blockContent != null && i != dnToCorrupt) {
        assertEquals("Corrupt replica", fileContent, blockContent);
      }
    }
  } finally {
    cluster.shutdown();
  }
}
 
Example 13
Source File: TestUnderReplicatedBlocks.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * The test verifies the number of outstanding replication requests for a
 * given DN shouldn't exceed the limit set by configuration property
 * dfs.namenode.replication.max-streams-hard-limit.
 * The test does the followings:
 * 1. Create a mini cluster with 2 DNs. Set large heartbeat interval so that
 *    replication requests won't be picked by any DN right away.
 * 2. Create a file with 10 blocks and replication factor 2. Thus each
 *    of the 2 DNs have one replica of each block.
 * 3. Add a DN to the cluster for later replication.
 * 4. Remove a DN that has data.
 * 5. Ask BlockManager to compute the replication work. This will assign
 *    replication requests to the only DN that has data.
 * 6. Make sure the number of pending replication requests of that DN don't
 *    exceed the limit.
 * @throws Exception
 */
@Test(timeout=60000) // 1 min timeout
public void testNumberOfBlocksToBeReplicated() throws Exception {
  Configuration conf = new HdfsConfiguration();

  conf.setLong(DFSConfigKeys.DFS_NAMENODE_MIN_BLOCK_SIZE_KEY, 0);
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 1);

  // Large value to make sure the pending replication request can stay in
  // DatanodeDescriptor.replicateBlocks before test timeout.
  conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 100);

  // Make sure BlockManager can pull all blocks from UnderReplicatedBlocks via
  // chooseUnderReplicatedBlocks at once.
   conf.setInt(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION, 5);

  int NUM_OF_BLOCKS = 10;
  final short REP_FACTOR = 2;
  final String FILE_NAME = "/testFile";
  final Path FILE_PATH = new Path(FILE_NAME);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(
          REP_FACTOR).build();
  try {
    // create a file with 10 blocks with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, FILE_PATH, NUM_OF_BLOCKS, REP_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REP_FACTOR);

    cluster.startDataNodes(conf, 1, true, null, null, null, null);

    final BlockManager bm = cluster.getNamesystem().getBlockManager();
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
    Iterator<DatanodeStorageInfo> storageInfos =
        bm.blocksMap.getStorages(b.getLocalBlock())
        .iterator();
    DatanodeDescriptor firstDn = storageInfos.next().getDatanodeDescriptor();
    DatanodeDescriptor secondDn = storageInfos.next().getDatanodeDescriptor();

    bm.getDatanodeManager().removeDatanode(firstDn);

    assertEquals(NUM_OF_BLOCKS, bm.getUnderReplicatedNotMissingBlocks());
    bm.computeDatanodeWork();


    assertTrue("The number of blocks to be replicated should be less than "
        + "or equal to " + bm.replicationStreamsHardLimit,
        secondDn.getNumberOfBlocksToBeReplicated()
        <= bm.replicationStreamsHardLimit);
  } finally {
    cluster.shutdown();
  }

}
 
Example 14
Source File: TestProcessCorruptBlocks.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. The above condition should hold
 * true as long as there is one good replica. This test verifies that.
 * 
 * The test strategy : 
 *   Bring up Cluster with 2 DataNodes
 *   Create a file of replication factor 2 
 *   Corrupt one replica of a block of the file 
 *   Verify that there is  one good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good 
 *     replicas (1) is less than replication factor (2)).
 *   Set the replication factor to 1 
 *   Verify that the corrupt replica is removed. 
 *     (corrupt replica should  be removed since number of good
 *      replicas (1) is equal to replication factor (1))
 */
@Test(timeout=20000)
public void testWithReplicationFactorAsOne() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 2, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 1);

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    namesystem.setReplication(fileName.toString(), (short) 1);

    // wait for 3 seconds so that all block reports are processed.
    for (int i = 0; i < 10; i++) {
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {
      }
      if (countReplicas(namesystem, block).corruptReplicas() == 0) {
        break;
      }
    }

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());

  } finally {
    cluster.shutdown();
  }
}
 
Example 15
Source File: TestBlocksWithNotEnoughRacks.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testNodeDecomissionRespectsRackPolicy() throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 2;
  final Path filePath = new Path("/testFile");

  // Configure an excludes file
  FileSystem localFileSys = FileSystem.getLocal(conf);
  Path workingDir = localFileSys.getWorkingDirectory();
  Path dir = new Path(workingDir, "build/test/data/temp/decommission");
  Path excludeFile = new Path(dir, "exclude");
  Path includeFile = new Path(dir, "include");
  assertTrue(localFileSys.mkdirs(dir));
  DFSTestUtil.writeFile(localFileSys, excludeFile, "");
  DFSTestUtil.writeFile(localFileSys, includeFile, "");
  conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());
  conf.set(DFSConfigKeys.DFS_HOSTS, includeFile.toUri().getPath());

  // Two blocks and four racks
  String racks[] = {"/rack1", "/rack1", "/rack2", "/rack2"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    // Create a file with one block
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Decommission one of the hosts with the block, this should cause 
    // the block to get replicated to another host on the same rack,
    // otherwise the rack policy is violated.
    BlockLocation locs[] = fs.getFileBlockLocations(
        fs.getFileStatus(filePath), 0, Long.MAX_VALUE);
    String name = locs[0].getNames()[0];
    DFSTestUtil.writeFile(localFileSys, excludeFile, name);
    ns.getBlockManager().getDatanodeManager().refreshNodes(conf);
    DFSTestUtil.waitForDecommission(fs, name);

    // Check the block still has sufficient # replicas across racks
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}
 
Example 16
Source File: TestFsck.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test for blockIdCK with block corruption
 */
@Test
public void testBlockIdCKCorruption() throws Exception {
  short NUM_DN = 1;
  final long blockSize = 512;
  Random random = new Random();
  DFSClient dfsClient;
  LocatedBlocks blocks;
  ExtendedBlock block;
  short repFactor = 1;
  String [] racks = {"/rack1"};
  String [] hosts = {"host1"};

  Configuration conf = new Configuration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
  // Set short retry timeouts so this test runs faster
  conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE, 10);
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
  conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 1);

  MiniDFSCluster cluster = null;
  DistributedFileSystem dfs = null;
  try {
    cluster =
        new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DN).hosts(hosts)
            .racks(racks).build();

    assertNotNull("Failed Cluster Creation", cluster);
    cluster.waitClusterUp();
    dfs = cluster.getFileSystem();
    assertNotNull("Failed to get FileSystem", dfs);

    DFSTestUtil util = new DFSTestUtil.Builder().
      setName(getClass().getSimpleName()).setNumFiles(1).build();
    //create files
    final String pathString = new String("/testfile");
    final Path path = new Path(pathString);
    util.createFile(dfs, path, 1024, repFactor, 1000L);
    util.waitReplication(dfs, path, repFactor);
    StringBuilder sb = new StringBuilder();
    for (LocatedBlock lb: util.getAllBlocks(dfs, path)){
      sb.append(lb.getBlock().getLocalBlock().getBlockName()+" ");
    }
    String[] bIds = sb.toString().split(" ");

    //make sure block is healthy before we corrupt it
    String outStr = runFsck(conf, 0, true, "/", "-blockId", bIds[0]);
    System.out.println(outStr);
    assertTrue(outStr.contains(NamenodeFsck.HEALTHY_STATUS));

    // corrupt replicas
    block = DFSTestUtil.getFirstBlock(dfs, path);
    File blockFile = cluster.getBlockFile(0, block);
    if (blockFile != null && blockFile.exists()) {
      RandomAccessFile raFile = new RandomAccessFile(blockFile, "rw");
      FileChannel channel = raFile.getChannel();
      String badString = "BADBAD";
      int rand = random.nextInt((int) channel.size()/2);
      raFile.seek(rand);
      raFile.write(badString.getBytes());
      raFile.close();
    }

    util.waitCorruptReplicas(dfs, cluster.getNamesystem(), path, block, 1);

    outStr = runFsck(conf, 1, false, "/", "-blockId", block.getBlockName());
    System.out.println(outStr);
    assertTrue(outStr.contains(NamenodeFsck.CORRUPT_STATUS));
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 17
Source File: TestBlocksWithNotEnoughRacks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testNodeDecomissionWithOverreplicationRespectsRackPolicy() 
    throws Exception {
  Configuration conf = getConf();
  short REPLICATION_FACTOR = 5;
  final Path filePath = new Path("/testFile");

  // Configure an excludes file
  FileSystem localFileSys = FileSystem.getLocal(conf);
  Path workingDir = localFileSys.getWorkingDirectory();
  Path dir = new Path(workingDir, "build/test/data/temp/decommission");
  Path excludeFile = new Path(dir, "exclude");
  Path includeFile = new Path(dir, "include");
  assertTrue(localFileSys.mkdirs(dir));
  DFSTestUtil.writeFile(localFileSys, excludeFile, "");
  DFSTestUtil.writeFile(localFileSys, includeFile, "");
  conf.set(DFSConfigKeys.DFS_HOSTS, includeFile.toUri().getPath());
  conf.set(DFSConfigKeys.DFS_HOSTS_EXCLUDE, excludeFile.toUri().getPath());

  // All hosts are on two racks, only one host on /rack2
  String racks[] = {"/rack1", "/rack2", "/rack1", "/rack1", "/rack1"};
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(racks.length).racks(racks).build();
  final FSNamesystem ns = cluster.getNameNode().getNamesystem();

  try {
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, filePath, 1L, REPLICATION_FACTOR, 1L);
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, filePath);
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);

    // Lower the replication factor so the blocks are over replicated
    REPLICATION_FACTOR = 2;
    fs.setReplication(filePath, REPLICATION_FACTOR);

    // Decommission one of the hosts with the block that is not on
    // the lone host on rack2 (if we decomission that host it would
    // be impossible to respect the rack policy).
    BlockLocation locs[] = fs.getFileBlockLocations(
        fs.getFileStatus(filePath), 0, Long.MAX_VALUE);
    for (String top : locs[0].getTopologyPaths()) {
      if (!top.startsWith("/rack2")) {
        String name = top.substring("/rack1".length()+1);
        DFSTestUtil.writeFile(localFileSys, excludeFile, name);
        ns.getBlockManager().getDatanodeManager().refreshNodes(conf);
        DFSTestUtil.waitForDecommission(fs, name);
        break;
      }
    }

    // Check the block still has sufficient # replicas across racks,
    // ie we didn't remove the replica on the host on /rack1.
    DFSTestUtil.waitForReplication(cluster, b, 2, REPLICATION_FACTOR, 0);
  } finally {
    cluster.shutdown();
  }
}
 
Example 18
Source File: TestProcessCorruptBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. The above condition should hold
 * true as long as there is one good replica. This test verifies that.
 * 
 * The test strategy : 
 *   Bring up Cluster with 2 DataNodes
 *   Create a file of replication factor 2 
 *   Corrupt one replica of a block of the file 
 *   Verify that there is  one good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good 
 *     replicas (1) is less than replication factor (2)).
 *   Set the replication factor to 1 
 *   Verify that the corrupt replica is removed. 
 *     (corrupt replica should  be removed since number of good
 *      replicas (1) is equal to replication factor (1))
 */
@Test(timeout=20000)
public void testWithReplicationFactorAsOne() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 2, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 1);

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    namesystem.setReplication(fileName.toString(), (short) 1);

    // wait for 3 seconds so that all block reports are processed.
    for (int i = 0; i < 10; i++) {
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {
      }
      if (countReplicas(namesystem, block).corruptReplicas() == 0) {
        break;
      }
    }

    assertEquals(1, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());

  } finally {
    cluster.shutdown();
  }
}
 
Example 19
Source File: TestUnderReplicatedBlocks.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public void testUnderReplicationWithDecommissionDataNode() throws Exception {
  final Configuration conf = new Configuration();
  final short REPLICATION_FACTOR = (short)1;
  File f = new File(HOST_FILE_PATH);
  if (f.exists()) {
    f.delete();
  }
  conf.set("dfs.hosts.exclude", HOST_FILE_PATH);
  LOG.info("Start the cluster");
  final MiniDFSCluster cluster = 
    new MiniDFSCluster(conf, REPLICATION_FACTOR, true, null);
  try {
    final FSNamesystem namesystem = cluster.getNameNode().namesystem;
    final FileSystem fs = cluster.getFileSystem();
    DatanodeDescriptor[] datanodes = (DatanodeDescriptor[])
          namesystem.heartbeats.toArray(
              new DatanodeDescriptor[REPLICATION_FACTOR]);
    assertEquals(1, datanodes.length);
    // populate the cluster with a one block file
    final Path FILE_PATH = new Path("/testfile2");
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    Block block = DFSTestUtil.getFirstBlock(fs, FILE_PATH);

    // shutdown the datanode
    DataNodeProperties dnprop = shutdownDataNode(cluster, datanodes[0]);
    assertEquals(1, namesystem.getMissingBlocksCount()); // one missing block
    assertEquals(0, namesystem.getNonCorruptUnderReplicatedBlocks());

    // Make the only datanode to be decommissioned
    LOG.info("Decommission the datanode " + dnprop);
    addToExcludeFile(namesystem.getConf(), datanodes);
    namesystem.refreshNodes(namesystem.getConf());      
    
    // bring up the datanode
    cluster.restartDataNode(dnprop);

    // Wait for block report
    LOG.info("wait for its block report to come in");
    NumberReplicas num;
    long startTime = System.currentTimeMillis();
    do {
     namesystem.readLock();
     try {
       num = namesystem.countNodes(block);
     } finally {
       namesystem.readUnlock();
     }
     Thread.sleep(1000);
     LOG.info("live: " + num.liveReplicas() 
         + "Decom: " + num.decommissionedReplicas());
    } while (num.decommissionedReplicas() != 1 &&
        System.currentTimeMillis() - startTime < 30000);
    assertEquals("Decommissioning Replicas doesn't reach 1", 
        1, num.decommissionedReplicas());
    assertEquals(1, namesystem.getNonCorruptUnderReplicatedBlocks());
    assertEquals(0, namesystem.getMissingBlocksCount());
  } finally {
    cluster.shutdown();
  }
}
 
Example 20
Source File: TestPendingCorruptDnMessages.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testChangedStorageId() throws IOException, URISyntaxException,
    InterruptedException {
  HdfsConfiguration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(1)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .build();
  
  try {
    cluster.transitionToActive(0);
    
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    OutputStream out = fs.create(filePath);
    out.write("foo bar baz".getBytes());
    out.close();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
        cluster.getNameNode(1));
    
    // Change the gen stamp of the block on datanode to go back in time (gen
    // stamps start at 1000)
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
    assertTrue(cluster.changeGenStampOfBlock(0, block, 900));
    
    // Stop the DN so the replica with the changed gen stamp will be reported
    // when this DN starts up.
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    
    // Restart the namenode so that when the DN comes up it will see an initial
    // block report.
    cluster.restartNameNode(1, false);
    assertTrue(cluster.restartDataNode(dnProps, true));
    
    // Wait until the standby NN queues up the corrupt block in the pending DN
    // message queue.
    while (cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount() < 1) {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
    }
    
    assertEquals(1, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
    
    // Reformat/restart the DN.
    assertTrue(wipeAndRestartDn(cluster, 0));
    
    // Give the DN time to start up and register, which will cause the
    // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
    String newStorageId = "";
    do {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
      newStorageId = getRegisteredDatanodeUid(cluster, 1);
      System.out.println("====> oldStorageId: " + oldStorageId +
          " newStorageId: " + newStorageId);
    } while (newStorageId.equals(oldStorageId));
    
    assertEquals(0, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    
    // Now try to fail over.
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
  } finally {
    cluster.shutdown();
  }
}