Java Code Examples for org.apache.hadoop.hdfs.DFSTestUtil#getLiveDatanodeCapacity()

The following examples show how to use org.apache.hadoop.hdfs.DFSTestUtil#getLiveDatanodeCapacity() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestDataNodeVolumeFailureToleration.java From hadoop with Apache License 2.0

6 votes

/**
 * Test that a volume that is considered failed on startup is seen as
 *  a failed volume by the NN.
 */
@Test
public void testFailedVolumeOnStartupIsCounted() throws Exception {
  assumeTrue(!System.getProperty("os.name").startsWith("Windows"));
  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
  ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  File dir = new File(cluster.getInstanceStorageDir(0, 0), "current");

  try {
    prepareDirToFail(dir);
    restartDatanodes(1, false);
    // The cluster is up..
    assertEquals(true, cluster.getDataNodes().get(0)
        .isBPServiceAlive(cluster.getNamesystem().getBlockPoolId()));
    // but there has been a single volume failure
    DFSTestUtil.waitForDatanodeStatus(dm, 1, 0, 1,
        origCapacity / 2, WAIT_FOR_HEARTBEATS);
  } finally {
    FileUtil.chmod(dir.toString(), "755");
  }
}

Example 2

Source File: TestDataNodeVolumeFailureToleration.java From big-c with Apache License 2.0

6 votes

/**
 * Test that a volume that is considered failed on startup is seen as
 *  a failed volume by the NN.
 */
@Test
public void testFailedVolumeOnStartupIsCounted() throws Exception {
  assumeTrue(!System.getProperty("os.name").startsWith("Windows"));
  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
  ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  File dir = new File(cluster.getInstanceStorageDir(0, 0), "current");

  try {
    prepareDirToFail(dir);
    restartDatanodes(1, false);
    // The cluster is up..
    assertEquals(true, cluster.getDataNodes().get(0)
        .isBPServiceAlive(cluster.getNamesystem().getBlockPoolId()));
    // but there has been a single volume failure
    DFSTestUtil.waitForDatanodeStatus(dm, 1, 0, 1,
        origCapacity / 2, WAIT_FOR_HEARTBEATS);
  } finally {
    FileUtil.chmod(dir.toString(), "755");
  }
}

Example 3

Source File: TestDataNodeVolumeFailureReporting.java From hadoop with Apache License 2.0

5 votes

/**
 * Test that the NN re-learns of volume failures after restart.
 */
@Test
public void testVolFailureStatsPreservedOnNNRestart() throws Exception {
  // Bring up two more datanodes that can tolerate 1 failure
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();

  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
      ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

  // Fail the first volume on both datanodes (we have to keep the 
  // third healthy so one node in the pipeline will not fail). 
  File dn1Vol1 = new File(dataDir, "data"+(2*0+1));
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);

  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)2, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)2);
  ArrayList<DataNode> dns = cluster.getDataNodes();

  // The NN reports two volumes failures
  DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2, 
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
  checkAggregateFailuresAtNameNode(true, 2);
  checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
  checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());

  // After restarting the NN it still see the two failures
  cluster.restartNameNode(0);
  cluster.waitActive();
  DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
  checkAggregateFailuresAtNameNode(true, 2);
  checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
  checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
}

Example 4

Source File: TestDataNodeVolumeFailureToleration.java From hadoop with Apache License 2.0

5 votes

/**
 * Test the DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY configuration
 * option, ie the DN shuts itself down when the number of failures
 * experienced drops below the tolerated amount.
 */
@Test
public void testConfigureMinValidVolumes() throws Exception {
  assumeTrue(!System.getProperty("os.name").startsWith("Windows"));

  // Bring up two additional datanodes that need both of their volumes
  // functioning in order to stay up.
  conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 0);
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();
  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
      ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

  // Fail a volume on the 2nd DN
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn2Vol1);

  // Should only get two replicas (the first DN and the 3rd)
  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)2);

  // Check that this single failure caused a DN to die.
  DFSTestUtil.waitForDatanodeStatus(dm, 2, 1, 0, 
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);

  // If we restore the volume we should still only be able to get
  // two replicas since the DN is still considered dead.
  DataNodeTestUtils.restoreDataDirFromFailure(dn2Vol1);
  Path file2 = new Path("/test2");
  DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file2, (short)2);
}

Example 5

Source File: TestDataNodeVolumeFailureReporting.java From big-c with Apache License 2.0

5 votes

/**
 * Test that the NN re-learns of volume failures after restart.
 */
@Test
public void testVolFailureStatsPreservedOnNNRestart() throws Exception {
  // Bring up two more datanodes that can tolerate 1 failure
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();

  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
      ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

  // Fail the first volume on both datanodes (we have to keep the 
  // third healthy so one node in the pipeline will not fail). 
  File dn1Vol1 = new File(dataDir, "data"+(2*0+1));
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);

  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)2, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)2);
  ArrayList<DataNode> dns = cluster.getDataNodes();

  // The NN reports two volumes failures
  DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2, 
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
  checkAggregateFailuresAtNameNode(true, 2);
  checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
  checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());

  // After restarting the NN it still see the two failures
  cluster.restartNameNode(0);
  cluster.waitActive();
  DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
  checkAggregateFailuresAtNameNode(true, 2);
  checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
  checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
}

Example 6

Source File: TestDataNodeVolumeFailureToleration.java From big-c with Apache License 2.0

5 votes

/**
 * Test the DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY configuration
 * option, ie the DN shuts itself down when the number of failures
 * experienced drops below the tolerated amount.
 */
@Test
public void testConfigureMinValidVolumes() throws Exception {
  assumeTrue(!System.getProperty("os.name").startsWith("Windows"));

  // Bring up two additional datanodes that need both of their volumes
  // functioning in order to stay up.
  conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 0);
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();
  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
      ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

  // Fail a volume on the 2nd DN
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn2Vol1);

  // Should only get two replicas (the first DN and the 3rd)
  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)2);

  // Check that this single failure caused a DN to die.
  DFSTestUtil.waitForDatanodeStatus(dm, 2, 1, 0, 
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);

  // If we restore the volume we should still only be able to get
  // two replicas since the DN is still considered dead.
  DataNodeTestUtils.restoreDataDirFromFailure(dn2Vol1);
  Path file2 = new Path("/test2");
  DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file2, (short)2);
}