Java Code Examples for org.apache.hadoop.hdfs.DFSTestUtil#waitReplication()

The following examples show how to use org.apache.hadoop.hdfs.DFSTestUtil#waitReplication() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestUnderReplicatedBlocks.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testSetrepIncWithUnderReplicatedBlocks() throws Exception {
  Configuration conf = new Configuration();
  final short REPLICATION_FACTOR = 2;
  final String FILE_NAME = "/testFile";
  final Path FILE_PATH = new Path(FILE_NAME);
  MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR+1, true, null);
  try {
    // create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    
    // remove one replica from the blocksMap so block becomes under-replicated
    // but the block does not get put into the under-replicated blocks queue
    FSNamesystem namesystem = cluster.getNameNode().namesystem;
    Block b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
    DatanodeDescriptor dn = namesystem.blocksMap.nodeIterator(b).next();
    namesystem.addToInvalidates(b, dn, true);
    namesystem.blocksMap.removeNode(b, dn);
    
    // increment this file's replication factor
    FsShell shell = new FsShell(conf);
    assertEquals(0, shell.run(new String[]{
        "-setrep", "-w", Integer.toString(1+REPLICATION_FACTOR), FILE_NAME}));
  } finally {
    cluster.shutdown();
  }
  
}
 
Example 2
Source File: TestBalancer.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
private void createFile(long fileLen, short replicationFactor)
throws IOException {
  FileSystem fs = cluster.getFileSystem();
  DFSTestUtil.createFile(fs, filePath, fileLen, 
      replicationFactor, r.nextLong());
  DFSTestUtil.waitReplication(fs, filePath, replicationFactor);
}
 
Example 3
Source File: TestDataNodeVolumeFailureToleration.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Test the dfs.datanode.failed.volumes.tolerated configuration
 * option, ie the DN shuts itself down when the number of failures
 * experienced drops below the tolerated amount.
 */
@Test
public void testConfigureMinValidVolumes() throws Exception {
  assumeTrue(!System.getProperty("os.name").startsWith("Windows"));

  // Bring up two additional datanodes that need both of their volumes
  // functioning in order to stay up
  conf.setInt("dfs.datanode.failed.volumes.tolerated", 0);
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();

  // Fail a volume on the 2nd DN
  File dn2Vol1 = new File(new File(dataDir, "data"+(2*1+1)), "current");
  try {
    assertTrue("Couldn't chmod local vol", dn2Vol1.setExecutable(false));

    // Should only get two replicas (the first DN and the 3rd)
    Path file1 = new Path("/test1");
    DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
    DFSTestUtil.waitReplication(fs, file1, (short)2);
  
    assertFalse("2nd DN should be dead", cluster.getDataNodes().get(1).isDatanodeUp());
  
    // If we restore the volume we should still only be able to get
    // two replicas since the DN is still considered dead.
    assertTrue("Couldn't chmod local vol", dn2Vol1.setExecutable(true));
    Path file2 = new Path("/test2");
    DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
    DFSTestUtil.waitReplication(fs, file2, (short)2);

    assertFalse("2nd DN should be dead", cluster.getDataNodes().get(1).isDatanodeUp());
  }
  finally {
    dn2Vol1.setExecutable(true);
  }
}
 
Example 4
Source File: TestDataNodeVolumeFailure.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Test that there are under replication blocks after vol failures
 */
@Test
public void testUnderReplicationAfterVolFailure() throws Exception {
  // This test relies on denying access to data volumes to simulate data volume
  // failure.  This doesn't work on Windows, because an owner of an object
  // always has the ability to read and change permissions on the object.
  assumeTrue(!Path.WINDOWS);

  // Bring up one more datanode
  cluster.startDataNodes(conf, 1, true, null, null);
  cluster.waitActive();

  final BlockManager bm = cluster.getNamesystem().getBlockManager();

  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)3);

  // Fail the first volume on both datanodes
  File dn1Vol1 = new File(dataDir, "data"+(2*0+1));
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);

  Path file2 = new Path("/test2");
  DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file2, (short)3);

  // underReplicatedBlocks are due to failed volumes
  int underReplicatedBlocks =
      BlockManagerTestUtil.checkHeartbeatAndGetUnderReplicatedBlocksCount(
          cluster.getNamesystem(), bm);
  assertTrue("There is no under replicated block after volume failure",
      underReplicatedBlocks > 0);
}
 
Example 5
Source File: TestDataNodeVolumeFailureToleration.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Test the DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY configuration
 * option, ie the DN shuts itself down when the number of failures
 * experienced drops below the tolerated amount.
 */
@Test
public void testConfigureMinValidVolumes() throws Exception {
  assumeTrue(!System.getProperty("os.name").startsWith("Windows"));

  // Bring up two additional datanodes that need both of their volumes
  // functioning in order to stay up.
  conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 0);
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();
  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
      ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

  // Fail a volume on the 2nd DN
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn2Vol1);

  // Should only get two replicas (the first DN and the 3rd)
  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)2);

  // Check that this single failure caused a DN to die.
  DFSTestUtil.waitForDatanodeStatus(dm, 2, 1, 0, 
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);

  // If we restore the volume we should still only be able to get
  // two replicas since the DN is still considered dead.
  DataNodeTestUtils.restoreDataDirFromFailure(dn2Vol1);
  Path file2 = new Path("/test2");
  DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file2, (short)2);
}
 
Example 6
Source File: TestDataNodeVolumeFailureReporting.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Test that the NN re-learns of volume failures after restart.
 */
@Test
public void testVolFailureStatsPreservedOnNNRestart() throws Exception {
  // Bring up two more datanodes that can tolerate 1 failure
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();

  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
      ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

  // Fail the first volume on both datanodes (we have to keep the 
  // third healthy so one node in the pipeline will not fail). 
  File dn1Vol1 = new File(dataDir, "data"+(2*0+1));
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);

  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)2, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)2);
  ArrayList<DataNode> dns = cluster.getDataNodes();

  // The NN reports two volumes failures
  DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2, 
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
  checkAggregateFailuresAtNameNode(true, 2);
  checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
  checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());

  // After restarting the NN it still see the two failures
  cluster.restartNameNode(0);
  cluster.waitActive();
  DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
  checkAggregateFailuresAtNameNode(true, 2);
  checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
  checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
}
 
Example 7
Source File: TestFsckWithMultipleNameNodes.java    From big-c with Apache License 2.0 5 votes vote down vote up
/** create a file with a length of <code>fileLen</code> */
private void createFile(int index, long len
    ) throws IOException, InterruptedException, TimeoutException {
  final FileSystem fs = cluster.getFileSystem(index);
  DFSTestUtil.createFile(fs, FILE_PATH, len, replication, RANDOM.nextLong());
  DFSTestUtil.waitReplication(fs, FILE_PATH, replication);
}
 
Example 8
Source File: TestUnderReplicatedBlocks.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test(timeout=60000) // 1 min timeout
public void testSetrepIncWithUnderReplicatedBlocks() throws Exception {
  Configuration conf = new HdfsConfiguration();
  final short REPLICATION_FACTOR = 2;
  final String FILE_NAME = "/testFile";
  final Path FILE_PATH = new Path(FILE_NAME);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR + 1).build();
  try {
    // create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    
    // remove one replica from the blocksMap so block becomes under-replicated
    // but the block does not get put into the under-replicated blocks queue
    final BlockManager bm = cluster.getNamesystem().getBlockManager();
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
    DatanodeDescriptor dn = bm.blocksMap.getStorages(b.getLocalBlock())
        .iterator().next().getDatanodeDescriptor();
    bm.addToInvalidates(b.getLocalBlock(), dn);
    Thread.sleep(5000);
    bm.blocksMap.removeNode(b.getLocalBlock(), dn);
    
    // increment this file's replication factor
    FsShell shell = new FsShell(conf);
    assertEquals(0, shell.run(new String[]{
        "-setrep", "-w", Integer.toString(1+REPLICATION_FACTOR), FILE_NAME}));
  } finally {
    cluster.shutdown();
  }
  
}
 
Example 9
Source File: UtilsForTests.java    From RDFS with Apache License 2.0 5 votes vote down vote up
static void writeFile(NameNode namenode, Configuration conf, Path name, 
    short replication) throws IOException {
  FileSystem fileSys = FileSystem.get(conf);
  SequenceFile.Writer writer = 
    SequenceFile.createWriter(fileSys, conf, name, 
                              BytesWritable.class, BytesWritable.class,
                              CompressionType.NONE);
  writer.append(new BytesWritable(), new BytesWritable());
  writer.close();
  fileSys.setReplication(name, replication);
  DFSTestUtil.waitReplication(fileSys, name, replication);
}
 
Example 10
Source File: TestDiskError.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Test to check that a DN goes down when all its volumes have failed.
 */
@Test
public void testShutdown() throws Exception {
  if (System.getProperty("os.name").startsWith("Windows")) {
    /**
     * This test depends on OS not allowing file creations on a directory
     * that does not have write permissions for the user. Apparently it is 
     * not the case on Windows (at least under Cygwin), and possibly AIX.
     * This is disabled on Windows.
     */
    return;
  }
  // Bring up two more datanodes
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();
  final int dnIndex = 0;
  String bpid = cluster.getNamesystem().getBlockPoolId();
  File storageDir = cluster.getInstanceStorageDir(dnIndex, 0);
  File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid);
  storageDir = cluster.getInstanceStorageDir(dnIndex, 1);
  File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid);
  try {
    // make the data directory of the first datanode to be readonly
    assertTrue("Couldn't chmod local vol", dir1.setReadOnly());
    assertTrue("Couldn't chmod local vol", dir2.setReadOnly());

    // create files and make sure that first datanode will be down
    DataNode dn = cluster.getDataNodes().get(dnIndex);
    for (int i=0; dn.isDatanodeUp(); i++) {
      Path fileName = new Path("/test.txt"+i);
      DFSTestUtil.createFile(fs, fileName, 1024, (short)2, 1L);
      DFSTestUtil.waitReplication(fs, fileName, (short)2);
      fs.delete(fileName, true);
    }
  } finally {
    // restore its old permission
    FileUtil.setWritable(dir1, true);
    FileUtil.setWritable(dir2, true);
  }
}
 
Example 11
Source File: TestFSEditLogLoader.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that, if the NN restarts with a new minimum replication,
 * any files created with the old replication count will get
 * automatically bumped up to the new minimum upon restart.
 */
@Test
public void testReplicationAdjusted() throws Exception {
  // start a cluster 
  Configuration conf = new HdfsConfiguration();
  // Replicate and heartbeat fast to shave a few seconds off test
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);

  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
        .build();
    cluster.waitActive();
    FileSystem fs = cluster.getFileSystem();

    // Create a file with replication count 1
    Path p = new Path("/testfile");
    DFSTestUtil.createFile(fs, p, 10, /*repl*/ (short)1, 1);
    DFSTestUtil.waitReplication(fs, p, (short)1);

    // Shut down and restart cluster with new minimum replication of 2
    cluster.shutdown();
    cluster = null;
    
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, 2);

    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
      .format(false).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    
    // The file should get adjusted to replication 2 when
    // the edit log is replayed.
    DFSTestUtil.waitReplication(fs, p, (short)2);
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 12
Source File: TestOverReplicatedBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
@Test
public void testProcesOverReplicateBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
      Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    assertTrue(cluster.corruptReplica(0, block));
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(
        cluster.getInstanceStorageDir(0, 0),
        cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(),
        "scanner.cursor");
    //wait for one minute for deletion to succeed;
    for(int i = 0; !scanCursor.delete(); i++) {
      assertTrue("Could not delete " + scanCursor.getAbsolutePath() +
          " in one minute", i < 60);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {}
    }
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    final DatanodeID corruptDataNode = 
      DataNodeTestUtils.getDNRegistrationForBP(
          cluster.getDataNodes().get(2), blockPoolId);
       
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    try {
      namesystem.writeLock();
      synchronized(hm) {
        // set live datanode's remaining space to be 0 
        // so they will be chosen to be deleted when over-replication occurs
        String corruptMachineName = corruptDataNode.getXferAddr();
        for (DatanodeDescriptor datanode : hm.getDatanodes()) {
          if (!corruptMachineName.equals(datanode.getXferAddr())) {
            datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
            datanode.updateHeartbeat(
                BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
                0L, 0L, 0, 0, null);
          }
        }

        // decrease the replication factor to 1; 
        NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1);

        // corrupt one won't be chosen to be excess one
        // without 4910 the number of live replicas would be 0: block gets lost
        assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas());
      }
    } finally {
      namesystem.writeUnlock();
    }
    
  } finally {
    cluster.shutdown();
  }
}
 
Example 13
Source File: TestFsck.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** check if option -list-corruptfiles of fsck command works properly */
@Test
public void testFsckListCorruptFilesBlocks() throws Exception {
  Configuration conf = new Configuration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
  conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
  FileSystem fs = null;

  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    DFSTestUtil util = new DFSTestUtil.Builder().
        setName("testGetCorruptFiles").setNumFiles(3).setMaxLevels(1).
        setMaxSize(1024).build();
    util.createFiles(fs, "/corruptData", (short) 1);
    util.waitReplication(fs, "/corruptData", (short) 1);

    // String outStr = runFsck(conf, 0, true, "/corruptData", "-list-corruptfileblocks");
    String outStr = runFsck(conf, 0, false, "/corruptData", "-list-corruptfileblocks");
    System.out.println("1. good fsck out: " + outStr);
    assertTrue(outStr.contains("has 0 CORRUPT files"));
    // delete the blocks
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    for (int i=0; i<4; i++) {
      for (int j=0; j<=1; j++) {
        File storageDir = cluster.getInstanceStorageDir(i, j);
        File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
        List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(
            data_dir);
        if (metadataFiles == null)
          continue;
        for (File metadataFile : metadataFiles) {
          File blockFile = Block.metaToBlockFile(metadataFile);
          assertTrue("Cannot remove file.", blockFile.delete());
          assertTrue("Cannot remove file.", metadataFile.delete());
        }
      }
    }

    // wait for the namenode to see the corruption
    final NamenodeProtocols namenode = cluster.getNameNodeRpc();
    CorruptFileBlocks corruptFileBlocks = namenode
        .listCorruptFileBlocks("/corruptData", null);
    int numCorrupt = corruptFileBlocks.getFiles().length;
    while (numCorrupt == 0) {
      Thread.sleep(1000);
      corruptFileBlocks = namenode
          .listCorruptFileBlocks("/corruptData", null);
      numCorrupt = corruptFileBlocks.getFiles().length;
    }
    outStr = runFsck(conf, -1, true, "/corruptData", "-list-corruptfileblocks");
    System.out.println("2. bad fsck out: " + outStr);
    assertTrue(outStr.contains("has 3 CORRUPT files"));

    // Do a listing on a dir which doesn't have any corrupt blocks and validate
    util.createFiles(fs, "/goodData");
    outStr = runFsck(conf, 0, true, "/goodData", "-list-corruptfileblocks");
    System.out.println("3. good fsck out: " + outStr);
    assertTrue(outStr.contains("has 0 CORRUPT files"));
    util.cleanup(fs,"/corruptData");
    util.cleanup(fs, "/goodData");
  } finally {
    if (cluster != null) {cluster.shutdown();}
  }
}
 
Example 14
Source File: TestNameNodeReconfigure.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * Test that we can change the block placement policy through the
 * reconfigurable API.
 */
@Test
public void testChangeBlockPlacementPolicy()
  throws IOException, ReconfigurationException {
  AtomicInteger callCounter = new AtomicInteger(0);
  MockPlacementPolicy.setCallCounter(callCounter);

  DFSTestUtil util = new DFSTestUtil("", 2, 1, 512);

  // write some files with the default block placement policy
  util.createFiles(fs, "/reconfdat1", (short) 3);
  util.waitReplication(fs, "/reconfdat1", (short) 3);

  assertTrue("calls already made to MockPlacementPolicy",
             callCounter.get() == 0);

  // switch over to the mock placement policy
  cluster.getNameNode().reconfigureProperty("dfs.block.replicator.classname",
                                            "org.apache.hadoop.hdfs.server." +
                                            "namenode." +
                                            "TestNameNodeReconfigure$" +
                                            "MockPlacementPolicy");

  // write some files with the mock placement policy
  util.createFiles(fs, "/reconfdat2", (short) 3);
  util.waitReplication(fs, "/reconfdat2", (short) 3);

  int callsMade1 = callCounter.get();
  
  // check that calls were made to mock placement policy
  assertTrue("no calls made to MockPlacementPolicy",
             callsMade1 > 0);
  LOG.info("" + callsMade1 + " calls made to MockPlacementPolicy");

  // now try to change it to a non-existent class
  try {
    cluster.getNameNode().
      reconfigureProperty("dfs.block.replicator.classname",
                          "does.not.exist");
    fail("ReconfigurationException expected");
  } catch (RuntimeException expected) {
    assertTrue("exception should have cause", expected.getCause() != null);
    assertTrue("exception's cause should have cause",
               expected.getCause().getCause() != null);
    assertTrue("ClassNotFoundException expected but got " +
               expected.getCause().getCause().getClass().getCanonicalName(),
               expected.getCause().getCause() instanceof
               ClassNotFoundException);
  }

  // write some files, they should still go to the mock placemeny policy
  util.createFiles(fs, "/reconfdat3", (short) 3);
  util.waitReplication(fs, "/reconfdat3", (short) 3);

  int callsMade2 = callCounter.get();

  // check that more calls were made to mock placement policy
  assertTrue("no calls made to MockPlacementPolicy",
             callsMade2 > callsMade1);
  LOG.info("" + (callsMade2 - callsMade1) +
    " calls made to MockPlacementPolicy");

  // now revert back to the default policy
  cluster.getNameNode().reconfigureProperty("dfs.block.replicator.classname",
                                            null);

  // write some files with the default block placement policy
  util.createFiles(fs, "/reconfdat4", (short) 3);
  util.waitReplication(fs, "/reconfdat4", (short) 3);

  // make sure that no more calls were made to mock placement policy
  assertTrue("more calls made to MockPlacementPolicy",
             callCounter.get() == callsMade2);

  util.cleanup(fs, "/reconfdat1");
  util.cleanup(fs, "/reconfdat2");
  util.cleanup(fs, "/reconfdat3");
  util.cleanup(fs, "/reconfdat4");
}
 
Example 15
Source File: TestListCorruptFileBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test if NN.listCorruptFiles() returns the right number of results.
 * The corrupt blocks are detected by the BlockPoolSliceScanner.
 * Also, test that DFS.listCorruptFileBlocks can make multiple successive
 * calls.
 */
@Test (timeout=300000)
public void testMaxCorruptFiles() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 3 * 1000); // datanode sends block reports
    cluster = new MiniDFSCluster.Builder(conf).build();
    FileSystem fs = cluster.getFileSystem();
    final int maxCorruptFileBlocks = 
      FSNamesystem.DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED;

    // create 110 files with one block each
    DFSTestUtil util = new DFSTestUtil.Builder().setName("testMaxCorruptFiles").
        setNumFiles(maxCorruptFileBlocks * 3).setMaxLevels(1).setMaxSize(512).
        build();
    util.createFiles(fs, "/srcdat2", (short) 1);
    util.waitReplication(fs, "/srcdat2", (short) 1);

    // verify that there are no bad blocks.
    final NameNode namenode = cluster.getNameNode();
    Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode.
      getNamesystem().listCorruptFileBlocks("/srcdat2", null);
    assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting none.",
        badFiles.size() == 0);

    // Now deliberately blocks from all files
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    for (int i=0; i<4; i++) {
      for (int j=0; j<=1; j++) {
        File storageDir = cluster.getInstanceStorageDir(i, j);
        File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
        LOG.info("Removing files from " + data_dir);
        List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(
            data_dir);
        if (metadataFiles == null)
          continue;
        for (File metadataFile : metadataFiles) {
          File blockFile = Block.metaToBlockFile(metadataFile);
          assertTrue("Cannot remove file.", blockFile.delete());
          assertTrue("Cannot remove file.", metadataFile.delete());
        }
      }
    }

    // Occasionally the BlockPoolSliceScanner can run before we have removed
    // the blocks. Restart the Datanode to trigger the scanner into running
    // once more.
    LOG.info("Restarting Datanode to trigger BlockPoolSliceScanner");
    cluster.restartDataNodes();
    cluster.waitActive();

    badFiles = 
      namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
      
     while (badFiles.size() < maxCorruptFileBlocks) {
      LOG.info("# of corrupt files is: " + badFiles.size());
      Thread.sleep(10000);
      badFiles = namenode.getNamesystem().
        listCorruptFileBlocks("/srcdat2", null);
    }
    badFiles = namenode.getNamesystem().
      listCorruptFileBlocks("/srcdat2", null); 
    LOG.info("Namenode has bad files. " + badFiles.size());
    assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " + 
        maxCorruptFileBlocks + ".",
        badFiles.size() == maxCorruptFileBlocks);

    CorruptFileBlockIterator iter = (CorruptFileBlockIterator)
      fs.listCorruptFileBlocks(new Path("/srcdat2"));
    int corruptPaths = countPaths(iter);
    assertTrue("Expected more than " + maxCorruptFileBlocks +
               " corrupt file blocks but got " + corruptPaths,
               corruptPaths > maxCorruptFileBlocks);
    assertTrue("Iterator should have made more than 1 call but made " +
               iter.getCallsMade(),
               iter.getCallsMade() > 1);

    util.cleanup(fs, "/srcdat2");
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
}
 
Example 16
Source File: TestDiskError.java    From hadoop-gpu with Apache License 2.0 4 votes vote down vote up
public void testReplicationError() throws Exception {
  // bring up a cluster of 1
  Configuration conf = new Configuration();
  MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
  cluster.waitActive();
  FileSystem fs = cluster.getFileSystem();
  
  try {
    // create a file of replication factor of 1
    final Path fileName = new Path("/test.txt");
    final int fileLen = 1;
    DFSTestUtil.createFile(fs, fileName, 1, (short)1, 1L);
    DFSTestUtil.waitReplication(fs, fileName, (short)1);

    // get the block belonged to the created file
    LocatedBlocks blocks = cluster.getNameNode().namesystem.getBlockLocations(
        fileName.toString(), 0, (long)fileLen);
    assertEquals(blocks.locatedBlockCount(), 1);
    LocatedBlock block = blocks.get(0);
    
    // bring up a second datanode
    cluster.startDataNodes(conf, 1, true, null, null);
    cluster.waitActive();
    final int sndNode = 1;
    DataNode datanode = cluster.getDataNodes().get(sndNode);
    
    // replicate the block to the second datanode
    InetSocketAddress target = datanode.getSelfAddr();
    Socket s = new Socket(target.getAddress(), target.getPort());
      //write the header.
    DataOutputStream out = new DataOutputStream(
        s.getOutputStream());

    out.writeShort( DataTransferProtocol.DATA_TRANSFER_VERSION );
    out.write( DataTransferProtocol.OP_WRITE_BLOCK );
    out.writeLong( block.getBlock().getBlockId());
    out.writeLong( block.getBlock().getGenerationStamp() );
    out.writeInt(1);
    out.writeBoolean( false );       // recovery flag
    Text.writeString( out, "" );
    out.writeBoolean(false); // Not sending src node information
    out.writeInt(0);
    
    // write check header
    out.writeByte( 1 );
    out.writeInt( 512 );

    out.flush();

    // close the connection before sending the content of the block
    out.close();
    
    // the temporary block & meta files should be deleted
    String dataDir = cluster.getDataDirectory();
    File dir1 = new File(new File(dataDir, "data"+(2*sndNode+1)), "tmp");
    File dir2 = new File(new File(dataDir, "data"+(2*sndNode+2)), "tmp");
    while (dir1.listFiles().length != 0 || dir2.listFiles().length != 0) {
      Thread.sleep(100);
    }
    
    // then increase the file's replication factor
    fs.setReplication(fileName, (short)2);
    // replication should succeed
    DFSTestUtil.waitReplication(fs, fileName, (short)1);
    
    // clean up the file
    fs.delete(fileName, false);
  } finally {
    cluster.shutdown();
  }
}
 
Example 17
Source File: TestFileTruncate.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * The last block is truncated at mid. (copy-on-truncate)
 * dn1 is shutdown before truncate and restart after truncate successful.
 */
@Test(timeout=60000)
public void testCopyOnTruncateWithDataNodesRestart() throws Exception {
  int startingFileSize = 3 * BLOCK_SIZE;
  byte[] contents = AppendTestUtil.initBuffer(startingFileSize);
  final Path parent = new Path("/test");
  final Path p = new Path(parent, "testCopyOnTruncateWithDataNodesRestart");

  writeContents(contents, startingFileSize, p);
  LocatedBlock oldBlock = getLocatedBlocks(p).getLastLocatedBlock();
  fs.allowSnapshot(parent);
  fs.createSnapshot(parent, "ss0");

  int dn = 1;
  int toTruncateLength = 1;
  int newLength = startingFileSize - toTruncateLength;
  cluster.getDataNodes().get(dn).shutdown();
  try {
    boolean isReady = fs.truncate(p, newLength);
    assertFalse(isReady);
  } finally {
    cluster.restartDataNode(dn, true, true);
    cluster.waitActive();
  }
  checkBlockRecovery(p);

  LocatedBlock newBlock = getLocatedBlocks(p).getLastLocatedBlock();
  /*
   * For copy-on-truncate, new block is made with new block id and new GS.
   * The replicas of the new block is 2, then it will be replicated to dn1.
   */
  assertNotEquals(newBlock.getBlock().getBlockId(), 
      oldBlock.getBlock().getBlockId());
  assertEquals(newBlock.getBlock().getGenerationStamp(),
      oldBlock.getBlock().getGenerationStamp() + 1);

  // Wait replicas come to 3
  DFSTestUtil.waitReplication(fs, p, REPLICATION);
  // New block is replicated to dn1
  assertEquals(cluster.getBlockFile(dn, newBlock.getBlock()).length(), 
      newBlock.getBlockSize());
  // Old replica exists too since there is snapshot
  assertEquals(cluster.getBlockFile(dn, oldBlock.getBlock()).length(), 
      oldBlock.getBlockSize());
  assertTrue(cluster.getBlockMetadataFile(dn, 
      oldBlock.getBlock()).getName().endsWith(
          oldBlock.getBlock().getGenerationStamp() + ".meta"));

  // Validate the file
  FileStatus fileStatus = fs.getFileStatus(p);
  assertThat(fileStatus.getLen(), is((long) newLength));
  checkFullFile(p, newLength, contents);

  fs.deleteSnapshot(parent, "ss0");
  fs.delete(parent, true);
}
 
Example 18
Source File: TestOverReplicatedBlocks.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
public void testProcesOverReplicateBlock() throws IOException {
  Configuration conf = new Configuration();
  conf.setLong("dfs.blockreport.intervalMsec", 1000L);
  conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null);
  FileSystem fs = cluster.getFileSystem();

  try {
    int namespaceId = cluster.getNameNode().getNamespaceID();
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    Block block = DFSTestUtil.getFirstBlock(fs, fileName);
    TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0, cluster);
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanLog = new File(cluster.getBlockDirectory("data1").getParent(), "dncp_block_verification.log.curr");
    //wait for one minute for deletion to succeed;
    scanLog.delete();
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    final DatanodeID corruptDataNode = 
      cluster.getDataNodes().get(2).getDNRegistrationForNS(namespaceId);
    final FSNamesystem namesystem = cluster.getNameNode().getNamesystem();
    synchronized (namesystem.heartbeats) {
      // set live datanode's remaining space to be 0 
      // so they will be chosen to be deleted when over-replication occurs
      for (DatanodeDescriptor datanode : namesystem.heartbeats) {
        if (!corruptDataNode.equals(datanode)) {
          datanode.updateHeartbeat(100L, 100L, 0L, 100L, 0);
        }
      }
    }
      
    // decrease the replication factor to 1; 
    namesystem.setReplication(fileName.toString(), (short)1);
    waitReplication(namesystem, block, (short)1);
    
    // corrupt one won't be chosen to be excess one
    // without 4910 the number of live replicas would be 0: block gets lost
    assertEquals(1, namesystem.countNodes(block).liveReplicas());

    // Test the case when multiple calls to setReplication still succeeds.
    System.out.println("Starting next test with file foo2.");
    final Path fileName2 = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName2, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName2, (short)3);
    LocatedBlocks lbs = namesystem.getBlockLocations(
               fileName2.toString(), 0, 10);
    Block firstBlock = lbs.get(0).getBlock();
    namesystem.setReplication(fileName2.toString(), (short)2);
    namesystem.setReplication(fileName2.toString(), (short)1);
    
    // wait upto one minute for excess replicas to get deleted. It is not
    // immediate because excess replicas are being handled asyncronously.
    waitReplication(namesystem, firstBlock, (short)1);
    assertEquals(1, namesystem.countNodes(firstBlock).liveReplicas());
  } finally {
    cluster.shutdown();
  }
}
 
Example 19
Source File: TestOverReplicatedBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * The test verifies that replica for deletion is chosen on a node,
 * with the oldest heartbeat, when this heartbeat is larger than the
 * tolerable heartbeat interval.
 * It creates a file with several blocks and replication 4.
 * The last DN is configured to send heartbeats rarely.
 * 
 * Test waits until the tolerable heartbeat interval expires, and reduces
 * replication of the file. All replica deletions should be scheduled for the
 * last node. No replicas will actually be deleted, since last DN doesn't
 * send heartbeats. 
 */
@Test
public void testChooseReplicaToDelete() throws Exception {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, SMALL_BLOCK_SIZE);
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    fs = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();

    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 300);
    cluster.startDataNodes(conf, 1, true, null, null, null);
    DataNode lastDN = cluster.getDataNodes().get(3);
    DatanodeRegistration dnReg = DataNodeTestUtils.getDNRegistrationForBP(
        lastDN, namesystem.getBlockPoolId());
    String lastDNid = dnReg.getDatanodeUuid();

    final Path fileName = new Path("/foo2");
    DFSTestUtil.createFile(fs, fileName, SMALL_FILE_LENGTH, (short)4, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)4);

    // Wait for tolerable number of heartbeats plus one
    DatanodeDescriptor nodeInfo = null;
    long lastHeartbeat = 0;
    long waitTime = DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT * 1000 *
      (DFSConfigKeys.DFS_NAMENODE_TOLERATE_HEARTBEAT_MULTIPLIER_DEFAULT + 1);
    do {
      nodeInfo = namesystem.getBlockManager().getDatanodeManager()
          .getDatanode(dnReg);
      lastHeartbeat = nodeInfo.getLastUpdateMonotonic();
    } while (monotonicNow() - lastHeartbeat < waitTime);
    fs.setReplication(fileName, (short)3);

    BlockLocation locs[] = fs.getFileBlockLocations(
        fs.getFileStatus(fileName), 0, Long.MAX_VALUE);

    // All replicas for deletion should be scheduled on lastDN.
    // And should not actually be deleted, because lastDN does not heartbeat.
    namesystem.readLock();
    Collection<Block> dnBlocks = 
      namesystem.getBlockManager().excessReplicateMap.get(lastDNid);
    assertEquals("Replicas on node " + lastDNid + " should have been deleted",
        SMALL_FILE_LENGTH / SMALL_BLOCK_SIZE, dnBlocks.size());
    namesystem.readUnlock();
    for(BlockLocation location : locs)
      assertEquals("Block should still have 4 replicas",
          4, location.getNames().length);
  } finally {
    if(fs != null) fs.close();
    if(cluster != null) cluster.shutdown();
  }
}
 
Example 20
Source File: TestFileTruncate.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * The last block is truncated at mid. (non copy-on-truncate)
 * dn0, dn1 are restarted immediately after truncate.
 */
@Test(timeout=60000)
public void testTruncateWithDataNodesRestartImmediately() throws Exception {
  int startingFileSize = 3 * BLOCK_SIZE;
  byte[] contents = AppendTestUtil.initBuffer(startingFileSize);
  final Path parent = new Path("/test");
  final Path p = new Path(parent, "testTruncateWithDataNodesRestartImmediately");

  writeContents(contents, startingFileSize, p);
  LocatedBlock oldBlock = getLocatedBlocks(p).getLastLocatedBlock();

  int dn0 = 0;
  int dn1 = 1;
  int toTruncateLength = 1;
  int newLength = startingFileSize - toTruncateLength;
  boolean isReady = fs.truncate(p, newLength);
  assertFalse(isReady);

  cluster.restartDataNode(dn0, true, true);
  cluster.restartDataNode(dn1, true, true);
  cluster.waitActive();
  checkBlockRecovery(p);

  LocatedBlock newBlock = getLocatedBlocks(p).getLastLocatedBlock();
  /*
   * For non copy-on-truncate, the truncated block id is the same, but the 
   * GS should increase.
   */
  assertEquals(newBlock.getBlock().getBlockId(), 
      oldBlock.getBlock().getBlockId());
  assertEquals(newBlock.getBlock().getGenerationStamp(),
      oldBlock.getBlock().getGenerationStamp() + 1);

  // Wait replicas come to 3
  DFSTestUtil.waitReplication(fs, p, REPLICATION);
  // Old replica is disregarded and replaced with the truncated one on dn0
  assertEquals(cluster.getBlockFile(dn0, newBlock.getBlock()).length(), 
      newBlock.getBlockSize());
  assertTrue(cluster.getBlockMetadataFile(dn0, 
      newBlock.getBlock()).getName().endsWith(
          newBlock.getBlock().getGenerationStamp() + ".meta"));

  // Old replica is disregarded and replaced with the truncated one on dn1
  assertEquals(cluster.getBlockFile(dn1, newBlock.getBlock()).length(), 
      newBlock.getBlockSize());
  assertTrue(cluster.getBlockMetadataFile(dn1, 
      newBlock.getBlock()).getName().endsWith(
          newBlock.getBlock().getGenerationStamp() + ".meta"));

  // Validate the file
  FileStatus fileStatus = fs.getFileStatus(p);
  assertThat(fileStatus.getLen(), is((long) newLength));
  checkFullFile(p, newLength, contents);

  fs.delete(parent, true);
}