Java Code Examples for org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties

The following examples show how to use org.apache.hadoop.hdfs.MiniDFSCluster.DataNodeProperties. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hbase   Source File: TestAsyncLogRolling.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testLogRollOnDatanodeDeath() throws IOException, InterruptedException {
  dfsCluster.startDataNodes(TEST_UTIL.getConfiguration(), 3, true, null, null);
  tableName = getName();
  Table table = createTestTable(tableName);
  TEST_UTIL.waitUntilAllRegionsAssigned(table.getName());
  doPut(table, 1);
  server = TEST_UTIL.getRSForFirstRegionInTable(table.getName());
  RegionInfo hri = server.getRegions(table.getName()).get(0).getRegionInfo();
  AsyncFSWAL wal = (AsyncFSWAL) server.getWAL(hri);
  int numRolledLogFiles = AsyncFSWALProvider.getNumRolledLogFiles(wal);
  DatanodeInfo[] dnInfos = wal.getPipeline();
  DataNodeProperties dnProp = TEST_UTIL.getDFSCluster().stopDataNode(dnInfos[0].getName());
  TEST_UTIL.getDFSCluster().restartDataNode(dnProp);
  doPut(table, 2);
  assertEquals(numRolledLogFiles + 1, AsyncFSWALProvider.getNumRolledLogFiles(wal));
}
 
Example 2
Source Project: hbase   Source File: TestFanOutOneBlockAsyncDFSOutput.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testConnectToDatanodeFailed()
  throws IOException, ClassNotFoundException, NoSuchMethodException, IllegalAccessException,
  InvocationTargetException, InterruptedException, NoSuchFieldException {
  Field xceiverServerDaemonField = DataNode.class.getDeclaredField("dataXceiverServer");
  xceiverServerDaemonField.setAccessible(true);
  Class<?> xceiverServerClass =
    Class.forName("org.apache.hadoop.hdfs.server.datanode.DataXceiverServer");
  Method numPeersMethod = xceiverServerClass.getDeclaredMethod("getNumPeers");
  numPeersMethod.setAccessible(true);
  // make one datanode broken
  DataNodeProperties dnProp = CLUSTER.stopDataNode(0);
  Path f = new Path("/test");
  EventLoop eventLoop = EVENT_LOOP_GROUP.next();
  try (FanOutOneBlockAsyncDFSOutput output = FanOutOneBlockAsyncDFSOutputHelper.createOutput(FS,
    f, true, false, (short) 3, FS.getDefaultBlockSize(), eventLoop, CHANNEL_CLASS)) {
    // should exclude the dead dn when retry so here we only have 2 DNs in pipeline
    assertEquals(2, output.getPipeline().length);
  } finally {
    CLUSTER.restartDataNode(dnProp);
  }
}
 
Example 3
Source Project: hadoop   Source File: TestRollingUpgrade.java    License: Apache License 2.0 5 votes vote down vote up
private static void rollbackRollingUpgrade(Path foo, Path bar,
    Path file, byte[] data,
    MiniDFSCluster cluster) throws IOException {
  final DataNodeProperties dnprop = cluster.stopDataNode(0);
  cluster.restartNameNode("-rollingUpgrade", "rollback");
  cluster.restartDataNode(dnprop, true);

  final DistributedFileSystem dfs = cluster.getFileSystem();
  Assert.assertTrue(dfs.exists(foo));
  Assert.assertFalse(dfs.exists(bar));
  AppendTestUtil.checkFullFile(dfs, file, data.length, data);
}
 
Example 4
Source Project: hadoop   Source File: TestPendingCorruptDnMessages.java    License: Apache License 2.0 5 votes vote down vote up
private static boolean wipeAndRestartDn(MiniDFSCluster cluster, int dnIndex)
    throws IOException {
  // stop the DN, reformat it, then start it again with the same xfer port.
  DataNodeProperties dnProps = cluster.stopDataNode(dnIndex);
  cluster.formatDataNodeDirs();
  return cluster.restartDataNode(dnProps, true);
}
 
Example 5
Source Project: hadoop   Source File: TestProcessCorruptBlocks.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. In this test, the above 
 * condition is achieved by increasing the number of good replicas by 
 * replicating on a new Datanode. 
 * The test strategy : 
 *   Bring up Cluster with 3 DataNodes
 *   Create a file  of replication factor 3
 *   Corrupt one replica of a block of the file 
 *   Verify that there are still 2 good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good replicas
 *      (2) is less  than replication factor (3)) 
 *   Start a new data node 
 *   Verify that the a new replica is created and corrupt replica is
 *   removed.
 * 
 */
@Test
public void testByAddingAnExtraDataNode() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();
  DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    assertEquals(2, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    cluster.restartDataNode(dnPropsFourth);

    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    assertEquals(3, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
  } finally {
    cluster.shutdown();
  }
}
 
Example 6
Source Project: hadoop   Source File: TestProcessCorruptBlocks.java    License: Apache License 2.0 5 votes vote down vote up
private void corruptBlock(MiniDFSCluster cluster, FileSystem fs, final Path fileName,
    int dnIndex, ExtendedBlock block) throws IOException {
  // corrupt the block on datanode dnIndex
  // the indexes change once the nodes are restarted.
  // But the datadirectory will not change
  assertTrue(cluster.corruptReplica(dnIndex, block));

  DataNodeProperties dnProps = cluster.stopDataNode(0);

  // Each datanode has multiple data dirs, check each
  for (int dirIndex = 0; dirIndex < 2; dirIndex++) {
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    File storageDir = cluster.getStorageDir(dnIndex, dirIndex);
    File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
    File scanLogFile = new File(dataDir, "dncp_block_verification.log.curr");
    if (scanLogFile.exists()) {
      // wait for one minute for deletion to succeed;
      for (int i = 0; !scanLogFile.delete(); i++) {
        assertTrue("Could not delete log file in one minute", i < 60);
        try {
          Thread.sleep(1000);
        } catch (InterruptedException ignored) {
        }
      }
    }
  }

  // restart the detained so the corrupt replica will be detected
  cluster.restartDataNode(dnProps);
}
 
Example 7
Source Project: hadoop   Source File: TestDecommissioningStatus.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Verify the support for decommissioning a datanode that is already dead.
 * Under this scenario the datanode should immediately be marked as
 * DECOMMISSIONED
 */
@Test(timeout=120000)
public void testDecommissionDeadDN() throws Exception {
  Logger log = Logger.getLogger(DecommissionManager.class);
  log.setLevel(Level.DEBUG);
  DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId();
  String dnName = dnID.getXferAddr();
  DataNodeProperties stoppedDN = cluster.stopDataNode(0);
  DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(),
      false, 30000);
  FSNamesystem fsn = cluster.getNamesystem();
  final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
  DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID);
  decommissionNode(fsn, localFileSys, dnName);
  dm.refreshNodes(conf);
  BlockManagerTestUtil.recheckDecommissionState(dm);
  assertTrue(dnDescriptor.isDecommissioned());

  // Add the node back
  cluster.restartDataNode(stoppedDN, true);
  cluster.waitActive();

  // Call refreshNodes on FSNamesystem with empty exclude file to remove the
  // datanode from decommissioning list and make it available again.
  writeConfigFile(localFileSys, excludeFile, null);
  dm.refreshNodes(conf);
}
 
Example 8
Source Project: hadoop   Source File: TestDataTransferKeepalive.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test for the case where the client beings to read a long block, but doesn't
 * read bytes off the stream quickly. The datanode should time out sending the
 * chunks and the transceiver should die, even if it has a long keepalive.
 */
@Test(timeout=300000)
public void testSlowReader() throws Exception {
  // Set a client socket cache expiry time much longer than 
  // the datanode-side expiration time.
  final long CLIENT_EXPIRY_MS = 600000L;
  Configuration clientConf = new Configuration(conf);
  clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
  clientConf.set(DFS_CLIENT_CONTEXT, "testSlowReader");
  DistributedFileSystem fs =
      (DistributedFileSystem)FileSystem.get(cluster.getURI(),
          clientConf);
  // Restart the DN with a shorter write timeout.
  DataNodeProperties props = cluster.stopDataNode(0);
  props.conf.setInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY,
      WRITE_TIMEOUT);
  props.conf.setInt(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY,
      120000);
  assertTrue(cluster.restartDataNode(props, true));
  dn = cluster.getDataNodes().get(0);
  // Wait for heartbeats to avoid a startup race where we
  // try to write the block while the DN is still starting.
  cluster.triggerHeartbeats();
  
  DFSTestUtil.createFile(fs, TEST_FILE, 1024*1024*8L, (short)1, 0L);
  FSDataInputStream stm = fs.open(TEST_FILE);
  stm.read();
  assertXceiverCount(1);

  GenericTestUtils.waitFor(new Supplier<Boolean>() {
    public Boolean get() {
      // DN should time out in sendChunks, and this should force
      // the xceiver to exit.
      return getXceiverCountWithoutServer() == 0;
    }
  }, 500, 50000);

  IOUtils.closeStream(stm);
}
 
Example 9
Source Project: big-c   Source File: TestRollingUpgrade.java    License: Apache License 2.0 5 votes vote down vote up
private static void rollbackRollingUpgrade(Path foo, Path bar,
    Path file, byte[] data,
    MiniDFSCluster cluster) throws IOException {
  final DataNodeProperties dnprop = cluster.stopDataNode(0);
  cluster.restartNameNode("-rollingUpgrade", "rollback");
  cluster.restartDataNode(dnprop, true);

  final DistributedFileSystem dfs = cluster.getFileSystem();
  Assert.assertTrue(dfs.exists(foo));
  Assert.assertFalse(dfs.exists(bar));
  AppendTestUtil.checkFullFile(dfs, file, data.length, data);
}
 
Example 10
Source Project: big-c   Source File: TestPendingCorruptDnMessages.java    License: Apache License 2.0 5 votes vote down vote up
private static boolean wipeAndRestartDn(MiniDFSCluster cluster, int dnIndex)
    throws IOException {
  // stop the DN, reformat it, then start it again with the same xfer port.
  DataNodeProperties dnProps = cluster.stopDataNode(dnIndex);
  cluster.formatDataNodeDirs();
  return cluster.restartDataNode(dnProps, true);
}
 
Example 11
Source Project: big-c   Source File: TestProcessCorruptBlocks.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * The corrupt block has to be removed when the number of valid replicas
 * matches replication factor for the file. In this test, the above 
 * condition is achieved by increasing the number of good replicas by 
 * replicating on a new Datanode. 
 * The test strategy : 
 *   Bring up Cluster with 3 DataNodes
 *   Create a file  of replication factor 3
 *   Corrupt one replica of a block of the file 
 *   Verify that there are still 2 good replicas and 1 corrupt replica 
 *     (corrupt replica should not be removed since number of good replicas
 *      (2) is less  than replication factor (3)) 
 *   Start a new data node 
 *   Verify that the a new replica is created and corrupt replica is
 *   removed.
 * 
 */
@Test
public void testByAddingAnExtraDataNode() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY, Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(4).build();
  FileSystem fs = cluster.getFileSystem();
  final FSNamesystem namesystem = cluster.getNamesystem();
  DataNodeProperties dnPropsFourth = cluster.stopDataNode(3);

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short) 3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    corruptBlock(cluster, fs, fileName, 0, block);

    DFSTestUtil.waitReplication(fs, fileName, (short) 2);

    assertEquals(2, countReplicas(namesystem, block).liveReplicas());
    assertEquals(1, countReplicas(namesystem, block).corruptReplicas());

    cluster.restartDataNode(dnPropsFourth);

    DFSTestUtil.waitReplication(fs, fileName, (short) 3);

    assertEquals(3, countReplicas(namesystem, block).liveReplicas());
    assertEquals(0, countReplicas(namesystem, block).corruptReplicas());
  } finally {
    cluster.shutdown();
  }
}
 
Example 12
Source Project: big-c   Source File: TestProcessCorruptBlocks.java    License: Apache License 2.0 5 votes vote down vote up
private void corruptBlock(MiniDFSCluster cluster, FileSystem fs, final Path fileName,
    int dnIndex, ExtendedBlock block) throws IOException {
  // corrupt the block on datanode dnIndex
  // the indexes change once the nodes are restarted.
  // But the datadirectory will not change
  assertTrue(cluster.corruptReplica(dnIndex, block));

  DataNodeProperties dnProps = cluster.stopDataNode(0);

  // Each datanode has multiple data dirs, check each
  for (int dirIndex = 0; dirIndex < 2; dirIndex++) {
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    File storageDir = cluster.getStorageDir(dnIndex, dirIndex);
    File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
    File scanLogFile = new File(dataDir, "dncp_block_verification.log.curr");
    if (scanLogFile.exists()) {
      // wait for one minute for deletion to succeed;
      for (int i = 0; !scanLogFile.delete(); i++) {
        assertTrue("Could not delete log file in one minute", i < 60);
        try {
          Thread.sleep(1000);
        } catch (InterruptedException ignored) {
        }
      }
    }
  }

  // restart the detained so the corrupt replica will be detected
  cluster.restartDataNode(dnProps);
}
 
Example 13
Source Project: big-c   Source File: TestDecommissioningStatus.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Verify the support for decommissioning a datanode that is already dead.
 * Under this scenario the datanode should immediately be marked as
 * DECOMMISSIONED
 */
@Test(timeout=120000)
public void testDecommissionDeadDN() throws Exception {
  Logger log = Logger.getLogger(DecommissionManager.class);
  log.setLevel(Level.DEBUG);
  DatanodeID dnID = cluster.getDataNodes().get(0).getDatanodeId();
  String dnName = dnID.getXferAddr();
  DataNodeProperties stoppedDN = cluster.stopDataNode(0);
  DFSTestUtil.waitForDatanodeState(cluster, dnID.getDatanodeUuid(),
      false, 30000);
  FSNamesystem fsn = cluster.getNamesystem();
  final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
  DatanodeDescriptor dnDescriptor = dm.getDatanode(dnID);
  decommissionNode(fsn, localFileSys, dnName);
  dm.refreshNodes(conf);
  BlockManagerTestUtil.recheckDecommissionState(dm);
  assertTrue(dnDescriptor.isDecommissioned());

  // Add the node back
  cluster.restartDataNode(stoppedDN, true);
  cluster.waitActive();

  // Call refreshNodes on FSNamesystem with empty exclude file to remove the
  // datanode from decommissioning list and make it available again.
  writeConfigFile(localFileSys, excludeFile, null);
  dm.refreshNodes(conf);
}
 
Example 14
Source Project: big-c   Source File: TestDataTransferKeepalive.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test for the case where the client beings to read a long block, but doesn't
 * read bytes off the stream quickly. The datanode should time out sending the
 * chunks and the transceiver should die, even if it has a long keepalive.
 */
@Test(timeout=300000)
public void testSlowReader() throws Exception {
  // Set a client socket cache expiry time much longer than 
  // the datanode-side expiration time.
  final long CLIENT_EXPIRY_MS = 600000L;
  Configuration clientConf = new Configuration(conf);
  clientConf.setLong(DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY, CLIENT_EXPIRY_MS);
  clientConf.set(DFS_CLIENT_CONTEXT, "testSlowReader");
  DistributedFileSystem fs =
      (DistributedFileSystem)FileSystem.get(cluster.getURI(),
          clientConf);
  // Restart the DN with a shorter write timeout.
  DataNodeProperties props = cluster.stopDataNode(0);
  props.conf.setInt(DFS_DATANODE_SOCKET_WRITE_TIMEOUT_KEY,
      WRITE_TIMEOUT);
  props.conf.setInt(DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY,
      120000);
  assertTrue(cluster.restartDataNode(props, true));
  dn = cluster.getDataNodes().get(0);
  // Wait for heartbeats to avoid a startup race where we
  // try to write the block while the DN is still starting.
  cluster.triggerHeartbeats();
  
  DFSTestUtil.createFile(fs, TEST_FILE, 1024*1024*8L, (short)1, 0L);
  FSDataInputStream stm = fs.open(TEST_FILE);
  stm.read();
  assertXceiverCount(1);

  GenericTestUtils.waitFor(new Supplier<Boolean>() {
    public Boolean get() {
      // DN should time out in sendChunks, and this should force
      // the xceiver to exit.
      return getXceiverCountWithoutServer() == 0;
    }
  }, 500, 50000);

  IOUtils.closeStream(stm);
}
 
Example 15
Source Project: RDFS   Source File: TestFileAppend4.java    License: Apache License 2.0 5 votes vote down vote up
private void runDNRestartCorruptType(CorruptionType corrupt) throws Exception {
  cluster = new MiniDFSCluster(conf, 3, true, null);
  FileSystem fs1 = cluster.getFileSystem();
  try {
    short rep = 3; // replication
    assertTrue(BLOCK_SIZE%4 == 0);

    file1 = new Path("/dnDeath.dat");

    // write 1/2 block & close
    stm = fs1.create(file1, true, 1024, rep, 4096);
    AppendTestUtil.write(stm, 0, 1024);
    stm.sync();
    loseLeases(fs1);

    DFSOutputStream dfso = (DFSOutputStream)stm.getWrappedStream();
    dfso.abortForTests();

    // close the primary DN
    DataNodeProperties badDN = cluster.stopDataNode(0);

    // Truncate the block on the primary DN
    corruptDataNode(0, corrupt);

    // Start the DN back up
    cluster.restartDataNode(badDN);

    // Recover the lease
    FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(fs1.getConf());
    recoverFile(fs2);

    assertFileSize(fs2, 1024);
    checkFile(fs2, 1024);
  } finally {
    // explicitly do not shut down fs1, since it's been frozen up by
    // killing the DataStreamer and not allowing recovery
    cluster.shutdown();
  }
}
 
Example 16
Source Project: RDFS   Source File: TestFileAppend4.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test that the restart of a DN and the subsequent pipeline recovery do not cause
 * a file to become prematurely considered "complete", when it's a fresh file
 * with no .append() called.
 */
public void testNotPrematurelyCompleteWithFailureNotReopened() throws Exception {
  LOG.info("START");
  cluster = new MiniDFSCluster(conf, 3, true, null);
  NameNode nn = cluster.getNameNode();
  FileSystem fs1 = cluster.getFileSystem();
  try {
    short rep = 3; // replication

    file1 = new Path("/delayedReceiveBlock");

    stm = fs1.create(file1, true, (int)BLOCK_SIZE*2, rep, 64*1024*1024);
    LOG.info("======== Writing");
    AppendTestUtil.write(stm, 0, 1024*1024);

    LOG.info("======== Waiting for a block allocation");
    waitForBlockReplication(fs1, "/delayedReceiveBlock", 0, 3000);

    LOG.info("======== Checking not complete");
    assertFalse(NameNodeAdapter.checkFileProgress(nn.namesystem, "/delayedReceiveBlock", true));

    // Stop one of the DNs, don't restart
    MiniDFSCluster.DataNodeProperties dnprops = cluster.stopDataNode(0);

    // Write some more data
    AppendTestUtil.write(stm, 0, 1024*1024);

    // Make sure we don't see the file as complete
    LOG.info("======== Checking progress");
    assertFalse(NameNodeAdapter.checkFileProgress(nn.namesystem, "/delayedReceiveBlock", true));
    LOG.info("======== Closing");
    stm.close();

  } finally {
    LOG.info("======== Cleaning up");
    fs1.close();
    cluster.shutdown();
  }
}
 
Example 17
Source Project: RDFS   Source File: TestUnderReplicatedBlocks.java    License: Apache License 2.0 5 votes vote down vote up
private DataNodeProperties shutdownDataNode(MiniDFSCluster cluster, DatanodeDescriptor datanode) {
  LOG.info("shutdown datanode: " + datanode.getName());
  DataNodeProperties dnprop = cluster.stopDataNode(datanode.getName());
  FSNamesystem namesystem = cluster.getNameNode().namesystem;
  // make sure that NN detects that the datanode is down
  synchronized (namesystem.heartbeats) {
    datanode.setLastUpdate(0); // mark it dead
    namesystem.heartbeatCheck();
  }
  return dnprop;
}
 
Example 18
Source Project: hadoop   Source File: TestLeaseRecovery.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Block Recovery when the meta file not having crcs for all chunks in block
 * file
 */
@Test
public void testBlockRecoveryWithLessMetafile() throws Exception {
  Configuration conf = new Configuration();
  conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
      UserGroupInformation.getCurrentUser().getShortUserName());
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
  Path file = new Path("/testRecoveryFile");
  DistributedFileSystem dfs = cluster.getFileSystem();
  FSDataOutputStream out = dfs.create(file);
  int count = 0;
  while (count < 2 * 1024 * 1024) {
    out.writeBytes("Data");
    count += 4;
  }
  out.hsync();
  // abort the original stream
  ((DFSOutputStream) out.getWrappedStream()).abort();

  LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations(
      file.toString(), 0, count);
  ExtendedBlock block = locations.get(0).getBlock();
  DataNode dn = cluster.getDataNodes().get(0);
  BlockLocalPathInfo localPathInfo = dn.getBlockLocalPathInfo(block, null);
  File metafile = new File(localPathInfo.getMetaPath());
  assertTrue(metafile.exists());

  // reduce the block meta file size
  RandomAccessFile raf = new RandomAccessFile(metafile, "rw");
  raf.setLength(metafile.length() - 20);
  raf.close();

  // restart DN to make replica to RWR
  DataNodeProperties dnProp = cluster.stopDataNode(0);
  cluster.restartDataNode(dnProp, true);

  // try to recover the lease
  DistributedFileSystem newdfs = (DistributedFileSystem) FileSystem
      .newInstance(cluster.getConfiguration(0));
  count = 0;
  while (++count < 10 && !newdfs.recoverLease(file)) {
    Thread.sleep(1000);
  }
  assertTrue("File should be closed", newdfs.recoverLease(file));

}
 
Example 19
Source Project: hadoop   Source File: TestFileAppend.java    License: Apache License 2.0 4 votes vote down vote up
/**
   * Old replica of the block should not be accepted as valid for append/read
   */
  @Test
  public void testFailedAppendBlockRejection() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.set("dfs.client.block.write.replace-datanode-on-failure.enable",
        "false");
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3)
        .build();
    DistributedFileSystem fs = null;
    try {
      fs = cluster.getFileSystem();
      Path path = new Path("/test");
      FSDataOutputStream out = fs.create(path);
      out.writeBytes("hello\n");
      out.close();

      // stop one datanode
      DataNodeProperties dnProp = cluster.stopDataNode(0);
      String dnAddress = dnProp.datanode.getXferAddress().toString();
      if (dnAddress.startsWith("/")) {
        dnAddress = dnAddress.substring(1);
}

      // append again to bump genstamps
      for (int i = 0; i < 2; i++) {
        out = fs.append(path);
        out.writeBytes("helloagain\n");
        out.close();
      }

      // re-open and make the block state as underconstruction
      out = fs.append(path);
      cluster.restartDataNode(dnProp, true);
      // wait till the block report comes
      Thread.sleep(2000);
      // check the block locations, this should not contain restarted datanode
      BlockLocation[] locations = fs.getFileBlockLocations(path, 0,
          Long.MAX_VALUE);
      String[] names = locations[0].getNames();
      for (String node : names) {
        if (node.equals(dnAddress)) {
          fail("Failed append should not be present in latest block locations.");
        }
      }
      out.close();
    } finally {
      IOUtils.closeStream(fs);
      cluster.shutdown();
    }
  }
 
Example 20
Source Project: hadoop   Source File: TestDFSClientExcludedNodes.java    License: Apache License 2.0 4 votes vote down vote up
@Test(timeout=60000)
public void testExcludedNodesForgiveness() throws IOException {
  // Forgive nodes in under 2.5s for this test case.
  conf.setLong(
      DFSConfigKeys.DFS_CLIENT_WRITE_EXCLUDE_NODES_CACHE_EXPIRY_INTERVAL,
      2500);
  // We'll be using a 512 bytes block size just for tests
  // so making sure the checksum bytes too match it.
  conf.setInt("io.bytes.per.checksum", 512);
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  List<DataNodeProperties> props = cluster.dataNodes;
  FileSystem fs = cluster.getFileSystem();
  Path filePath = new Path("/testForgivingExcludedNodes");

  // 256 bytes data chunk for writes
  byte[] bytes = new byte[256];
  for (int index=0; index<bytes.length; index++) {
    bytes[index] = '0';
  }

  // File with a 512 bytes block size
  FSDataOutputStream out = fs.create(filePath, true, 4096, (short) 3, 512);

  // Write a block to all 3 DNs (2x256bytes).
  out.write(bytes);
  out.write(bytes);
  out.hflush();

  // Remove two DNs, to put them into the exclude list.
  DataNodeProperties two = cluster.stopDataNode(2);
  DataNodeProperties one = cluster.stopDataNode(1);

  // Write another block.
  // At this point, we have two nodes already in excluded list.
  out.write(bytes);
  out.write(bytes);
  out.hflush();

  // Bring back the older DNs, since they are gonna be forgiven only
  // afterwards of this previous block write.
  Assert.assertEquals(true, cluster.restartDataNode(one, true));
  Assert.assertEquals(true, cluster.restartDataNode(two, true));
  cluster.waitActive();

  // Sleep for 5s, to let the excluded nodes be expired
  // from the excludes list (i.e. forgiven after the configured wait period).
  // [Sleeping just in case the restart of the DNs completed < 5s cause
  // otherwise, we'll end up quickly excluding those again.]
  ThreadUtil.sleepAtLeastIgnoreInterrupts(5000);

  // Terminate the last good DN, to assert that there's no
  // single-DN-available scenario, caused by not forgiving the other
  // two by now.
  cluster.stopDataNode(0);

  try {
    // Attempt writing another block, which should still pass
    // cause the previous two should have been forgiven by now,
    // while the last good DN added to excludes this time.
    out.write(bytes);
    out.hflush();
    out.close();
  } catch (Exception e) {
    fail("Excluded DataNodes should be forgiven after a while and " +
         "not cause file writing exception of: '" + e.getMessage() + "'");
  }
}
 
Example 21
Source Project: hadoop   Source File: TestSafeMode.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Test that the NN initializes its under-replicated blocks queue
 * before it is ready to exit safemode (HDFS-1476)
 */
@Test(timeout=45000)
public void testInitializeReplQueuesEarly() throws Exception {
  LOG.info("Starting testInitializeReplQueuesEarly");
  // Spray the blocks around the cluster when we add DNs instead of
  // concentrating all blocks on the first node.
  BlockManagerTestUtil.setWritingPrefersLocalNode(
      cluster.getNamesystem().getBlockManager(), false);
  
  cluster.startDataNodes(conf, 2, true, StartupOption.REGULAR, null);
  cluster.waitActive();

  LOG.info("Creating files");
  DFSTestUtil.createFile(fs, TEST_PATH, 15*BLOCK_SIZE, (short)1, 1L);
  
  LOG.info("Stopping all DataNodes");
  List<DataNodeProperties> dnprops = Lists.newLinkedList();
  dnprops.add(cluster.stopDataNode(0));
  dnprops.add(cluster.stopDataNode(0));
  dnprops.add(cluster.stopDataNode(0));
  
  cluster.getConfiguration(0).setFloat(
      DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 1f/15f);
  
  LOG.info("Restarting NameNode");
  cluster.restartNameNode();
  final NameNode nn = cluster.getNameNode();
  
  String status = nn.getNamesystem().getSafemode();
  assertEquals("Safe mode is ON. The reported blocks 0 needs additional " +
      "15 blocks to reach the threshold 0.9990 of total blocks 15." + NEWLINE +
      "The number of live datanodes 0 has reached the minimum number 0. " +
      "Safe mode will be turned off automatically once the thresholds " +
      "have been reached.", status);
  assertFalse("Mis-replicated block queues should not be initialized " +
      "until threshold is crossed",
      NameNodeAdapter.safeModeInitializedReplQueues(nn));
  
  LOG.info("Restarting one DataNode");
  cluster.restartDataNode(dnprops.remove(0));

  // Wait for block reports from all attached storages of
  // the restarted DN to come in.
  GenericTestUtils.waitFor(new Supplier<Boolean>() {
    @Override
    public Boolean get() {
      return getLongCounter("StorageBlockReportOps", getMetrics(NN_METRICS)) ==
          cluster.getStoragesPerDatanode();
    }
  }, 10, 10000);

  final int safe = NameNodeAdapter.getSafeModeSafeBlocks(nn);
  assertTrue("Expected first block report to make some blocks safe.", safe > 0);
  assertTrue("Did not expect first block report to make all blocks safe.", safe < 15);

  assertTrue(NameNodeAdapter.safeModeInitializedReplQueues(nn));

  // Ensure that UnderReplicatedBlocks goes up to 15 - safe. Misreplicated
  // blocks are processed asynchronously so this may take a few seconds.
  // Failure here will manifest as a test timeout.
  BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
  long underReplicatedBlocks = nn.getNamesystem().getUnderReplicatedBlocks();
  while (underReplicatedBlocks != (15 - safe)) {
    LOG.info("UnderReplicatedBlocks expected=" + (15 - safe) +
             ", actual=" + underReplicatedBlocks);
    Thread.sleep(100);
    BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
    underReplicatedBlocks = nn.getNamesystem().getUnderReplicatedBlocks();
  }
  
  cluster.restartDataNodes();
}
 
Example 22
Source Project: hadoop   Source File: TestStandbyIsHot.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Regression test for HDFS-2795:
 *  - Start an HA cluster with a DN.
 *  - Write several blocks to the FS with replication 1.
 *  - Shutdown the DN
 *  - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
 *  - Restart the DN.
 * In the bug, the standby node would only very slowly notice the blocks returning
 * to the cluster.
 */
@Test(timeout=60000)
public void testDatanodeRestarts() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
  // We read from the standby to watch block locations
  HAUtil.setAllowStandbyReads(conf, true);
  conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(1)
    .build();
  try {
    NameNode nn0 = cluster.getNameNode(0);
    NameNode nn1 = cluster.getNameNode(1);

    cluster.transitionToActive(0);
    
    // Create 5 blocks.
    DFSTestUtil.createFile(cluster.getFileSystem(0), 
        TEST_FILE_PATH, 5*1024, (short)1, 1L);
    
    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
    
    // Stop the DN.
    DataNode dn = cluster.getDataNodes().get(0);
    String dnName = dn.getDatanodeId().getXferAddr(); 
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    
    // Make sure both NNs register it as dead.
    BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
    BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
    
    BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
    
    // The SBN will not have any blocks in its neededReplication queue
    // since the SBN doesn't process replication.
    assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
    
    LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(
        TEST_FILE, 0, 1);
    assertEquals("Standby should have registered that the block has no replicas",
        0, locs.get(0).getLocations().length);
    
    cluster.restartDataNode(dnProps);
    // Wait for both NNs to re-register the DN.
    cluster.waitActive(0);
    cluster.waitActive(1);
    
    BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
    
    locs = nn1.getRpcServer().getBlockLocations(
        TEST_FILE, 0, 1);
    assertEquals("Standby should have registered that the block has replicas again",
        1, locs.get(0).getLocations().length);
  } finally {
    cluster.shutdown();
  }
}
 
Example 23
Source Project: hadoop   Source File: TestPendingCorruptDnMessages.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testChangedStorageId() throws IOException, URISyntaxException,
    InterruptedException {
  HdfsConfiguration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(1)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .build();
  
  try {
    cluster.transitionToActive(0);
    
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    OutputStream out = fs.create(filePath);
    out.write("foo bar baz".getBytes());
    out.close();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
        cluster.getNameNode(1));
    
    // Change the gen stamp of the block on datanode to go back in time (gen
    // stamps start at 1000)
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
    assertTrue(cluster.changeGenStampOfBlock(0, block, 900));
    
    // Stop the DN so the replica with the changed gen stamp will be reported
    // when this DN starts up.
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    
    // Restart the namenode so that when the DN comes up it will see an initial
    // block report.
    cluster.restartNameNode(1, false);
    assertTrue(cluster.restartDataNode(dnProps, true));
    
    // Wait until the standby NN queues up the corrupt block in the pending DN
    // message queue.
    while (cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount() < 1) {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
    }
    
    assertEquals(1, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
    
    // Reformat/restart the DN.
    assertTrue(wipeAndRestartDn(cluster, 0));
    
    // Give the DN time to start up and register, which will cause the
    // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
    String newStorageId = "";
    do {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
      newStorageId = getRegisteredDatanodeUid(cluster, 1);
      System.out.println("====> oldStorageId: " + oldStorageId +
          " newStorageId: " + newStorageId);
    } while (newStorageId.equals(oldStorageId));
    
    assertEquals(0, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    
    // Now try to fail over.
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
  } finally {
    cluster.shutdown();
  }
}
 
Example 24
Source Project: hadoop   Source File: TestDecommissioningStatus.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Verify a DN remains in DECOMMISSION_INPROGRESS state if it is marked
 * as dead before decommission has completed. That will allow DN to resume
 * the replication process after it rejoins the cluster.
 */
@Test(timeout=120000)
public void testDecommissionStatusAfterDNRestart() throws Exception {
  DistributedFileSystem fileSys =
      (DistributedFileSystem)cluster.getFileSystem();

  // Create a file with one block. That block has one replica.
  Path f = new Path("decommission.dat");
  DFSTestUtil.createFile(fileSys, f, fileSize, fileSize, fileSize,
      (short)1, seed);

  // Find the DN that owns the only replica.
  RemoteIterator<LocatedFileStatus> fileList = fileSys.listLocatedStatus(f);
  BlockLocation[] blockLocations = fileList.next().getBlockLocations();
  String dnName = blockLocations[0].getNames()[0];

  // Decommission the DN.
  FSNamesystem fsn = cluster.getNamesystem();
  final DatanodeManager dm = fsn.getBlockManager().getDatanodeManager();
  decommissionNode(fsn, localFileSys, dnName);
  dm.refreshNodes(conf);

  // Stop the DN when decommission is in progress.
  // Given DFS_DATANODE_BALANCE_BANDWIDTHPERSEC_KEY is to 1 and the size of
  // the block, it will take much longer time that test timeout value for
  // the decommission to complete. So when stopDataNode is called,
  // decommission should be in progress.
  DataNodeProperties dataNodeProperties = cluster.stopDataNode(dnName);
  final List<DatanodeDescriptor> dead = new ArrayList<DatanodeDescriptor>();
  while (true) {
    dm.fetchDatanodes(null, dead, false);
    if (dead.size() == 1) {
      break;
    }
    Thread.sleep(1000);
  }

  // Force removal of the dead node's blocks.
  BlockManagerTestUtil.checkHeartbeat(fsn.getBlockManager());

  // Force DatanodeManager to check decommission state.
  BlockManagerTestUtil.recheckDecommissionState(dm);

  // Verify that the DN remains in DECOMMISSION_INPROGRESS state.
  assertTrue("the node should be DECOMMISSION_IN_PROGRESSS",
      dead.get(0).isDecommissionInProgress());

  // Check DatanodeManager#getDecommissionNodes, make sure it returns
  // the node as decommissioning, even if it's dead
  List<DatanodeDescriptor> decomlist = dm.getDecommissioningNodes();
  assertTrue("The node should be be decommissioning", decomlist.size() == 1);
  
  // Delete the under-replicated file, which should let the 
  // DECOMMISSION_IN_PROGRESS node become DECOMMISSIONED
  cleanupFile(fileSys, f);
  BlockManagerTestUtil.recheckDecommissionState(dm);
  assertTrue("the node should be decommissioned",
      dead.get(0).isDecommissioned());

  // Add the node back
  cluster.restartDataNode(dataNodeProperties, true);
  cluster.waitActive();

  // Call refreshNodes on FSNamesystem with empty exclude file.
  // This will remove the datanodes from decommissioning list and
  // make them available again.
  writeConfigFile(localFileSys, excludeFile, null);
  dm.refreshNodes(conf);
}
 
Example 25
Source Project: hadoop   Source File: TestNodeCount.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testNodeCount() throws Exception {
  // start a mini dfs cluster of 2 nodes
  final Configuration conf = new HdfsConfiguration();
  final MiniDFSCluster cluster = 
    new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR).build();
  try {
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    final FileSystem fs = cluster.getFileSystem();
    
    // populate the cluster with a one block file
    final Path FILE_PATH = new Path("/testfile");
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, FILE_PATH);

    // keep a copy of all datanode descriptor
    final DatanodeDescriptor[] datanodes = hm.getDatanodes();
    
    // start two new nodes
    cluster.startDataNodes(conf, 2, true, null, null);
    cluster.waitActive();
    
    // bring down first datanode
    DatanodeDescriptor datanode = datanodes[0];
    DataNodeProperties dnprop = cluster.stopDataNode(datanode.getXferAddr());
    
    // make sure that NN detects that the datanode is down
    BlockManagerTestUtil.noticeDeadDatanode(
        cluster.getNameNode(), datanode.getXferAddr());
    
    // the block will be replicated
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);

    // restart the first datanode
    cluster.restartDataNode(dnprop);
    cluster.waitActive();
    
    // check if excessive replica is detected (transient)
    initializeTimeout(TIMEOUT);
    while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() == 0) {
      checkTimeout("excess replicas not detected");
    }
    
    // find out a non-excess node
    DatanodeDescriptor nonExcessDN = null;
    for(DatanodeStorageInfo storage : bm.blocksMap.getStorages(block.getLocalBlock())) {
      final DatanodeDescriptor dn = storage.getDatanodeDescriptor();
      Collection<Block> blocks = bm.excessReplicateMap.get(dn.getDatanodeUuid());
      if (blocks == null || !blocks.contains(block.getLocalBlock()) ) {
        nonExcessDN = dn;
        break;
      }
    }
    assertTrue(nonExcessDN!=null);
    
    // bring down non excessive datanode
    dnprop = cluster.stopDataNode(nonExcessDN.getXferAddr());
    // make sure that NN detects that the datanode is down
    BlockManagerTestUtil.noticeDeadDatanode(
        cluster.getNameNode(), nonExcessDN.getXferAddr());

    // The block should be replicated
    initializeTimeout(TIMEOUT);
    while (countNodes(block.getLocalBlock(), namesystem).liveReplicas() != REPLICATION_FACTOR) {
      checkTimeout("live replica count not correct", 1000);
    }

    // restart the first datanode
    cluster.restartDataNode(dnprop);
    cluster.waitActive();

    // check if excessive replica is detected (transient)
    initializeTimeout(TIMEOUT);
    while (countNodes(block.getLocalBlock(), namesystem).excessReplicas() != 2) {
      checkTimeout("excess replica count not equal to 2");
    }

  } finally {
    cluster.shutdown();
  }
}
 
Example 26
Source Project: hadoop   Source File: TestOverReplicatedBlocks.java    License: Apache License 2.0 4 votes vote down vote up
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
@Test
public void testProcesOverReplicateBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
      Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    assertTrue(cluster.corruptReplica(0, block));
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(
        cluster.getInstanceStorageDir(0, 0),
        cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(),
        "scanner.cursor");
    //wait for one minute for deletion to succeed;
    for(int i = 0; !scanCursor.delete(); i++) {
      assertTrue("Could not delete " + scanCursor.getAbsolutePath() +
          " in one minute", i < 60);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {}
    }
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    final DatanodeID corruptDataNode = 
      DataNodeTestUtils.getDNRegistrationForBP(
          cluster.getDataNodes().get(2), blockPoolId);
       
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    try {
      namesystem.writeLock();
      synchronized(hm) {
        // set live datanode's remaining space to be 0 
        // so they will be chosen to be deleted when over-replication occurs
        String corruptMachineName = corruptDataNode.getXferAddr();
        for (DatanodeDescriptor datanode : hm.getDatanodes()) {
          if (!corruptMachineName.equals(datanode.getXferAddr())) {
            datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
            datanode.updateHeartbeat(
                BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
                0L, 0L, 0, 0, null);
          }
        }

        // decrease the replication factor to 1; 
        NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1);

        // corrupt one won't be chosen to be excess one
        // without 4910 the number of live replicas would be 0: block gets lost
        assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas());
      }
    } finally {
      namesystem.writeUnlock();
    }
    
  } finally {
    cluster.shutdown();
  }
}
 
Example 27
Source Project: hadoop   Source File: TestPendingInvalidateBlock.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Test whether we can delay the deletion of unknown blocks in DataNode's
 * first several block reports.
 */
@Test
public void testPendingDeleteUnknownBlocks() throws Exception {
  final int fileNum = 5; // 5 files
  final Path[] files = new Path[fileNum];
  final DataNodeProperties[] dnprops = new DataNodeProperties[REPLICATION];
  // create a group of files, each file contains 1 block
  for (int i = 0; i < fileNum; i++) {
    files[i] = new Path("/file" + i);
    DFSTestUtil.createFile(dfs, files[i], BLOCKSIZE, REPLICATION, i);
  }
  // wait until all DataNodes have replicas
  waitForReplication();
  for (int i = REPLICATION - 1; i >= 0; i--) {
    dnprops[i] = cluster.stopDataNode(i);
  }
  Thread.sleep(2000);
  // delete 2 files, we still have 3 files remaining so that we can cover
  // every DN storage
  for (int i = 0; i < 2; i++) {
    dfs.delete(files[i], true);
  }

  // restart NameNode
  cluster.restartNameNode(false);
  InvalidateBlocks invalidateBlocks = (InvalidateBlocks) Whitebox
      .getInternalState(cluster.getNamesystem().getBlockManager(),
          "invalidateBlocks");
  InvalidateBlocks mockIb = Mockito.spy(invalidateBlocks);
  Mockito.doReturn(1L).when(mockIb).getInvalidationDelay();
  Whitebox.setInternalState(cluster.getNamesystem().getBlockManager(),
      "invalidateBlocks", mockIb);

  Assert.assertEquals(0L, cluster.getNamesystem().getPendingDeletionBlocks());
  // restart DataNodes
  for (int i = 0; i < REPLICATION; i++) {
    cluster.restartDataNode(dnprops[i], true);
  }
  cluster.waitActive();

  for (int i = 0; i < REPLICATION; i++) {
    DataNodeTestUtils.triggerBlockReport(cluster.getDataNodes().get(i));
  }
  Thread.sleep(2000);
  // make sure we have received block reports by checking the total block #
  Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal());
  Assert.assertEquals(4, cluster.getNamesystem().getPendingDeletionBlocks());

  cluster.restartNameNode(true);
  Thread.sleep(6000);
  Assert.assertEquals(3, cluster.getNamesystem().getBlocksTotal());
  Assert.assertEquals(0, cluster.getNamesystem().getPendingDeletionBlocks());
}
 
Example 28
Source Project: hadoop   Source File: TestDataNodeMultipleRegistrations.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testDNWithInvalidStorageWithHA() throws Exception {
  MiniDFSNNTopology top = new MiniDFSNNTopology()
    .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
      .addNN(new MiniDFSNNTopology.NNConf("nn0").setClusterId("cluster-1"))
      .addNN(new MiniDFSNNTopology.NNConf("nn1").setClusterId("cluster-1")));

  top.setFederation(true);

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(top)
      .numDataNodes(0).build();
  try {
    cluster.startDataNodes(conf, 1, true, null, null);
    // let the initialization be complete
    Thread.sleep(10000);
    DataNode dn = cluster.getDataNodes().get(0);
    assertTrue("Datanode should be running", dn.isDatanodeUp());
    assertEquals("BPOfferService should be running", 1,
        dn.getAllBpOs().length);
    DataNodeProperties dnProp = cluster.stopDataNode(0);

    cluster.getNameNode(0).stop();
    cluster.getNameNode(1).stop();
    Configuration nn1 = cluster.getConfiguration(0);
    Configuration nn2 = cluster.getConfiguration(1);
    // setting up invalid cluster
    StartupOption.FORMAT.setClusterId("cluster-2");
    DFSTestUtil.formatNameNode(nn1);
    MiniDFSCluster.copyNameDirs(FSNamesystem.getNamespaceDirs(nn1),
        FSNamesystem.getNamespaceDirs(nn2), nn2);
    cluster.restartNameNode(0, false);
    cluster.restartNameNode(1, false);
    cluster.restartDataNode(dnProp);
    
    // let the initialization be complete
    Thread.sleep(10000);
    dn = cluster.getDataNodes().get(0);
    assertFalse("Datanode should have shutdown as only service failed",
        dn.isDatanodeUp());
  } finally {
    cluster.shutdown();
  }
}
 
Example 29
Source Project: big-c   Source File: TestLeaseRecovery.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Block Recovery when the meta file not having crcs for all chunks in block
 * file
 */
@Test
public void testBlockRecoveryWithLessMetafile() throws Exception {
  Configuration conf = new Configuration();
  conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
      UserGroupInformation.getCurrentUser().getShortUserName());
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
  Path file = new Path("/testRecoveryFile");
  DistributedFileSystem dfs = cluster.getFileSystem();
  FSDataOutputStream out = dfs.create(file);
  int count = 0;
  while (count < 2 * 1024 * 1024) {
    out.writeBytes("Data");
    count += 4;
  }
  out.hsync();
  // abort the original stream
  ((DFSOutputStream) out.getWrappedStream()).abort();

  LocatedBlocks locations = cluster.getNameNodeRpc().getBlockLocations(
      file.toString(), 0, count);
  ExtendedBlock block = locations.get(0).getBlock();
  DataNode dn = cluster.getDataNodes().get(0);
  BlockLocalPathInfo localPathInfo = dn.getBlockLocalPathInfo(block, null);
  File metafile = new File(localPathInfo.getMetaPath());
  assertTrue(metafile.exists());

  // reduce the block meta file size
  RandomAccessFile raf = new RandomAccessFile(metafile, "rw");
  raf.setLength(metafile.length() - 20);
  raf.close();

  // restart DN to make replica to RWR
  DataNodeProperties dnProp = cluster.stopDataNode(0);
  cluster.restartDataNode(dnProp, true);

  // try to recover the lease
  DistributedFileSystem newdfs = (DistributedFileSystem) FileSystem
      .newInstance(cluster.getConfiguration(0));
  count = 0;
  while (++count < 10 && !newdfs.recoverLease(file)) {
    Thread.sleep(1000);
  }
  assertTrue("File should be closed", newdfs.recoverLease(file));

}
 
Example 30
Source Project: big-c   Source File: TestFileAppend.java    License: Apache License 2.0 4 votes vote down vote up
/**
   * Old replica of the block should not be accepted as valid for append/read
   */
  @Test
  public void testFailedAppendBlockRejection() throws Exception {
    Configuration conf = new HdfsConfiguration();
    conf.set("dfs.client.block.write.replace-datanode-on-failure.enable",
        "false");
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3)
        .build();
    DistributedFileSystem fs = null;
    try {
      fs = cluster.getFileSystem();
      Path path = new Path("/test");
      FSDataOutputStream out = fs.create(path);
      out.writeBytes("hello\n");
      out.close();

      // stop one datanode
      DataNodeProperties dnProp = cluster.stopDataNode(0);
      String dnAddress = dnProp.datanode.getXferAddress().toString();
      if (dnAddress.startsWith("/")) {
        dnAddress = dnAddress.substring(1);
}

      // append again to bump genstamps
      for (int i = 0; i < 2; i++) {
        out = fs.append(path);
        out.writeBytes("helloagain\n");
        out.close();
      }

      // re-open and make the block state as underconstruction
      out = fs.append(path);
      cluster.restartDataNode(dnProp, true);
      // wait till the block report comes
      Thread.sleep(2000);
      // check the block locations, this should not contain restarted datanode
      BlockLocation[] locations = fs.getFileBlockLocations(path, 0,
          Long.MAX_VALUE);
      String[] names = locations[0].getNames();
      for (String node : names) {
        if (node.equals(dnAddress)) {
          fail("Failed append should not be present in latest block locations.");
        }
      }
      out.close();
    } finally {
      IOUtils.closeStream(fs);
      cluster.shutdown();
    }
  }