org.apache.hadoop.hdfs.DFSTestUtil#waitReplication

Source File: TestUnderReplicatedBlocks.java From RDFS with Apache License 2.0

5 votes

public void testSetrepIncWithUnderReplicatedBlocks() throws Exception {
  Configuration conf = new Configuration();
  final short REPLICATION_FACTOR = 2;
  final String FILE_NAME = "/testFile";
  final Path FILE_PATH = new Path(FILE_NAME);
  MiniDFSCluster cluster = new MiniDFSCluster(conf, REPLICATION_FACTOR+1, true, null);
  try {
    // create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    
    // remove one replica from the blocksMap so block becomes under-replicated
    // but the block does not get put into the under-replicated blocks queue
    FSNamesystem namesystem = cluster.getNameNode().namesystem;
    Block b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
    DatanodeDescriptor dn = namesystem.blocksMap.nodeIterator(b).next();
    namesystem.addToInvalidates(b, dn, true);
    namesystem.blocksMap.removeNode(b, dn);
    
    // increment this file's replication factor
    FsShell shell = new FsShell(conf);
    assertEquals(0, shell.run(new String[]{
        "-setrep", "-w", Integer.toString(1+REPLICATION_FACTOR), FILE_NAME}));
  } finally {
    cluster.shutdown();
  }
  
}

Source File: TestBalancer.java From hadoop-gpu with Apache License 2.0

5 votes

private void createFile(long fileLen, short replicationFactor)
throws IOException {
  FileSystem fs = cluster.getFileSystem();
  DFSTestUtil.createFile(fs, filePath, fileLen, 
      replicationFactor, r.nextLong());
  DFSTestUtil.waitReplication(fs, filePath, replicationFactor);
}

Source File: TestDataNodeVolumeFailureToleration.java From RDFS with Apache License 2.0

5 votes

/**
 * Test the dfs.datanode.failed.volumes.tolerated configuration
 * option, ie the DN shuts itself down when the number of failures
 * experienced drops below the tolerated amount.
 */
@Test
public void testConfigureMinValidVolumes() throws Exception {
  assumeTrue(!System.getProperty("os.name").startsWith("Windows"));

  // Bring up two additional datanodes that need both of their volumes
  // functioning in order to stay up
  conf.setInt("dfs.datanode.failed.volumes.tolerated", 0);
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();

  // Fail a volume on the 2nd DN
  File dn2Vol1 = new File(new File(dataDir, "data"+(2*1+1)), "current");
  try {
    assertTrue("Couldn't chmod local vol", dn2Vol1.setExecutable(false));

    // Should only get two replicas (the first DN and the 3rd)
    Path file1 = new Path("/test1");
    DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
    DFSTestUtil.waitReplication(fs, file1, (short)2);
  
    assertFalse("2nd DN should be dead", cluster.getDataNodes().get(1).isDatanodeUp());
  
    // If we restore the volume we should still only be able to get
    // two replicas since the DN is still considered dead.
    assertTrue("Couldn't chmod local vol", dn2Vol1.setExecutable(true));
    Path file2 = new Path("/test2");
    DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
    DFSTestUtil.waitReplication(fs, file2, (short)2);

    assertFalse("2nd DN should be dead", cluster.getDataNodes().get(1).isDatanodeUp());
  }
  finally {
    dn2Vol1.setExecutable(true);
  }
}

Source File: TestDataNodeVolumeFailure.java From big-c with Apache License 2.0

5 votes

/**
 * Test that there are under replication blocks after vol failures
 */
@Test
public void testUnderReplicationAfterVolFailure() throws Exception {
  // This test relies on denying access to data volumes to simulate data volume
  // failure.  This doesn't work on Windows, because an owner of an object
  // always has the ability to read and change permissions on the object.
  assumeTrue(!Path.WINDOWS);

  // Bring up one more datanode
  cluster.startDataNodes(conf, 1, true, null, null);
  cluster.waitActive();

  final BlockManager bm = cluster.getNamesystem().getBlockManager();

  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)3);

  // Fail the first volume on both datanodes
  File dn1Vol1 = new File(dataDir, "data"+(2*0+1));
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);

  Path file2 = new Path("/test2");
  DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file2, (short)3);

  // underReplicatedBlocks are due to failed volumes
  int underReplicatedBlocks =
      BlockManagerTestUtil.checkHeartbeatAndGetUnderReplicatedBlocksCount(
          cluster.getNamesystem(), bm);
  assertTrue("There is no under replicated block after volume failure",
      underReplicatedBlocks > 0);
}

Source File: TestDataNodeVolumeFailureToleration.java From big-c with Apache License 2.0

5 votes

/**
 * Test the DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY configuration
 * option, ie the DN shuts itself down when the number of failures
 * experienced drops below the tolerated amount.
 */
@Test
public void testConfigureMinValidVolumes() throws Exception {
  assumeTrue(!System.getProperty("os.name").startsWith("Windows"));

  // Bring up two additional datanodes that need both of their volumes
  // functioning in order to stay up.
  conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, 0);
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();
  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
      ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

  // Fail a volume on the 2nd DN
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn2Vol1);

  // Should only get two replicas (the first DN and the 3rd)
  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)2);

  // Check that this single failure caused a DN to die.
  DFSTestUtil.waitForDatanodeStatus(dm, 2, 1, 0, 
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);

  // If we restore the volume we should still only be able to get
  // two replicas since the DN is still considered dead.
  DataNodeTestUtils.restoreDataDirFromFailure(dn2Vol1);
  Path file2 = new Path("/test2");
  DFSTestUtil.createFile(fs, file2, 1024, (short)3, 1L);
  DFSTestUtil.waitReplication(fs, file2, (short)2);
}

Source File: TestDataNodeVolumeFailureReporting.java From big-c with Apache License 2.0

5 votes

/**
 * Test that the NN re-learns of volume failures after restart.
 */
@Test
public void testVolFailureStatsPreservedOnNNRestart() throws Exception {
  // Bring up two more datanodes that can tolerate 1 failure
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();

  final DatanodeManager dm = cluster.getNamesystem().getBlockManager(
      ).getDatanodeManager();
  long origCapacity = DFSTestUtil.getLiveDatanodeCapacity(dm);
  long dnCapacity = DFSTestUtil.getDatanodeCapacity(dm, 0);

  // Fail the first volume on both datanodes (we have to keep the 
  // third healthy so one node in the pipeline will not fail). 
  File dn1Vol1 = new File(dataDir, "data"+(2*0+1));
  File dn2Vol1 = new File(dataDir, "data"+(2*1+1));
  DataNodeTestUtils.injectDataDirFailure(dn1Vol1, dn2Vol1);

  Path file1 = new Path("/test1");
  DFSTestUtil.createFile(fs, file1, 1024, (short)2, 1L);
  DFSTestUtil.waitReplication(fs, file1, (short)2);
  ArrayList<DataNode> dns = cluster.getDataNodes();

  // The NN reports two volumes failures
  DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2, 
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
  checkAggregateFailuresAtNameNode(true, 2);
  checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
  checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());

  // After restarting the NN it still see the two failures
  cluster.restartNameNode(0);
  cluster.waitActive();
  DFSTestUtil.waitForDatanodeStatus(dm, 3, 0, 2,
      origCapacity - (1*dnCapacity), WAIT_FOR_HEARTBEATS);
  checkAggregateFailuresAtNameNode(true, 2);
  checkFailuresAtNameNode(dm, dns.get(0), true, dn1Vol1.getAbsolutePath());
  checkFailuresAtNameNode(dm, dns.get(1), true, dn2Vol1.getAbsolutePath());
}

Source File: TestFsckWithMultipleNameNodes.java From big-c with Apache License 2.0

5 votes

/** create a file with a length of <code>fileLen</code> */
private void createFile(int index, long len
    ) throws IOException, InterruptedException, TimeoutException {
  final FileSystem fs = cluster.getFileSystem(index);
  DFSTestUtil.createFile(fs, FILE_PATH, len, replication, RANDOM.nextLong());
  DFSTestUtil.waitReplication(fs, FILE_PATH, replication);
}

Source File: TestUnderReplicatedBlocks.java From hadoop with Apache License 2.0

5 votes

@Test(timeout=60000) // 1 min timeout
public void testSetrepIncWithUnderReplicatedBlocks() throws Exception {
  Configuration conf = new HdfsConfiguration();
  final short REPLICATION_FACTOR = 2;
  final String FILE_NAME = "/testFile";
  final Path FILE_PATH = new Path(FILE_NAME);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(REPLICATION_FACTOR + 1).build();
  try {
    // create a file with one block with a replication factor of 2
    final FileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, FILE_PATH, 1L, REPLICATION_FACTOR, 1L);
    DFSTestUtil.waitReplication(fs, FILE_PATH, REPLICATION_FACTOR);
    
    // remove one replica from the blocksMap so block becomes under-replicated
    // but the block does not get put into the under-replicated blocks queue
    final BlockManager bm = cluster.getNamesystem().getBlockManager();
    ExtendedBlock b = DFSTestUtil.getFirstBlock(fs, FILE_PATH);
    DatanodeDescriptor dn = bm.blocksMap.getStorages(b.getLocalBlock())
        .iterator().next().getDatanodeDescriptor();
    bm.addToInvalidates(b.getLocalBlock(), dn);
    Thread.sleep(5000);
    bm.blocksMap.removeNode(b.getLocalBlock(), dn);
    
    // increment this file's replication factor
    FsShell shell = new FsShell(conf);
    assertEquals(0, shell.run(new String[]{
        "-setrep", "-w", Integer.toString(1+REPLICATION_FACTOR), FILE_NAME}));
  } finally {
    cluster.shutdown();
  }
  
}

Source File: UtilsForTests.java From RDFS with Apache License 2.0

5 votes

static void writeFile(NameNode namenode, Configuration conf, Path name, 
    short replication) throws IOException {
  FileSystem fileSys = FileSystem.get(conf);
  SequenceFile.Writer writer = 
    SequenceFile.createWriter(fileSys, conf, name, 
                              BytesWritable.class, BytesWritable.class,
                              CompressionType.NONE);
  writer.append(new BytesWritable(), new BytesWritable());
  writer.close();
  fileSys.setReplication(name, replication);
  DFSTestUtil.waitReplication(fileSys, name, replication);
}

Source File: TestDiskError.java From big-c with Apache License 2.0

5 votes

/**
 * Test to check that a DN goes down when all its volumes have failed.
 */
@Test
public void testShutdown() throws Exception {
  if (System.getProperty("os.name").startsWith("Windows")) {
    /**
     * This test depends on OS not allowing file creations on a directory
     * that does not have write permissions for the user. Apparently it is 
     * not the case on Windows (at least under Cygwin), and possibly AIX.
     * This is disabled on Windows.
     */
    return;
  }
  // Bring up two more datanodes
  cluster.startDataNodes(conf, 2, true, null, null);
  cluster.waitActive();
  final int dnIndex = 0;
  String bpid = cluster.getNamesystem().getBlockPoolId();
  File storageDir = cluster.getInstanceStorageDir(dnIndex, 0);
  File dir1 = MiniDFSCluster.getRbwDir(storageDir, bpid);
  storageDir = cluster.getInstanceStorageDir(dnIndex, 1);
  File dir2 = MiniDFSCluster.getRbwDir(storageDir, bpid);
  try {
    // make the data directory of the first datanode to be readonly
    assertTrue("Couldn't chmod local vol", dir1.setReadOnly());
    assertTrue("Couldn't chmod local vol", dir2.setReadOnly());

    // create files and make sure that first datanode will be down
    DataNode dn = cluster.getDataNodes().get(dnIndex);
    for (int i=0; dn.isDatanodeUp(); i++) {
      Path fileName = new Path("/test.txt"+i);
      DFSTestUtil.createFile(fs, fileName, 1024, (short)2, 1L);
      DFSTestUtil.waitReplication(fs, fileName, (short)2);
      fs.delete(fileName, true);
    }
  } finally {
    // restore its old permission
    FileUtil.setWritable(dir1, true);
    FileUtil.setWritable(dir2, true);
  }
}

Source File: TestFSEditLogLoader.java From big-c with Apache License 2.0

4 votes

/**
 * Test that, if the NN restarts with a new minimum replication,
 * any files created with the old replication count will get
 * automatically bumped up to the new minimum upon restart.
 */
@Test
public void testReplicationAdjusted() throws Exception {
  // start a cluster 
  Configuration conf = new HdfsConfiguration();
  // Replicate and heartbeat fast to shave a few seconds off test
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);

  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
        .build();
    cluster.waitActive();
    FileSystem fs = cluster.getFileSystem();

    // Create a file with replication count 1
    Path p = new Path("/testfile");
    DFSTestUtil.createFile(fs, p, 10, /*repl*/ (short)1, 1);
    DFSTestUtil.waitReplication(fs, p, (short)1);

    // Shut down and restart cluster with new minimum replication of 2
    cluster.shutdown();
    cluster = null;
    
    conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MIN_KEY, 2);

    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2)
      .format(false).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    
    // The file should get adjusted to replication 2 when
    // the edit log is replayed.
    DFSTestUtil.waitReplication(fs, p, (short)2);
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestOverReplicatedBlocks.java From hadoop with Apache License 2.0

4 votes

/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
@Test
public void testProcesOverReplicateBlock() throws Exception {
  Configuration conf = new HdfsConfiguration();
  conf.setLong(DFSConfigKeys.DFS_DATANODE_SCAN_PERIOD_HOURS_KEY, 100L);
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000L);
  conf.set(
      DFSConfigKeys.DFS_NAMENODE_REPLICATION_PENDING_TIMEOUT_SEC_KEY,
      Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  FileSystem fs = cluster.getFileSystem();

  try {
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, fileName);
    assertTrue(cluster.corruptReplica(0, block));
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanCursor = new File(new File(MiniDFSCluster.getFinalizedDir(
        cluster.getInstanceStorageDir(0, 0),
        cluster.getNamesystem().getBlockPoolId()).getParent()).getParent(),
        "scanner.cursor");
    //wait for one minute for deletion to succeed;
    for(int i = 0; !scanCursor.delete(); i++) {
      assertTrue("Could not delete " + scanCursor.getAbsolutePath() +
          " in one minute", i < 60);
      try {
        Thread.sleep(1000);
      } catch (InterruptedException ignored) {}
    }
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    String blockPoolId = cluster.getNamesystem().getBlockPoolId();
    final DatanodeID corruptDataNode = 
      DataNodeTestUtils.getDNRegistrationForBP(
          cluster.getDataNodes().get(2), blockPoolId);
       
    final FSNamesystem namesystem = cluster.getNamesystem();
    final BlockManager bm = namesystem.getBlockManager();
    final HeartbeatManager hm = bm.getDatanodeManager().getHeartbeatManager();
    try {
      namesystem.writeLock();
      synchronized(hm) {
        // set live datanode's remaining space to be 0 
        // so they will be chosen to be deleted when over-replication occurs
        String corruptMachineName = corruptDataNode.getXferAddr();
        for (DatanodeDescriptor datanode : hm.getDatanodes()) {
          if (!corruptMachineName.equals(datanode.getXferAddr())) {
            datanode.getStorageInfos()[0].setUtilizationForTesting(100L, 100L, 0, 100L);
            datanode.updateHeartbeat(
                BlockManagerTestUtil.getStorageReportsForDatanode(datanode),
                0L, 0L, 0, 0, null);
          }
        }

        // decrease the replication factor to 1; 
        NameNodeAdapter.setReplication(namesystem, fileName.toString(), (short)1);

        // corrupt one won't be chosen to be excess one
        // without 4910 the number of live replicas would be 0: block gets lost
        assertEquals(1, bm.countNodes(block.getLocalBlock()).liveReplicas());
      }
    } finally {
      namesystem.writeUnlock();
    }
    
  } finally {
    cluster.shutdown();
  }
}

Source File: TestFsck.java From hadoop with Apache License 2.0

4 votes

/** check if option -list-corruptfiles of fsck command works properly */
@Test
public void testFsckListCorruptFilesBlocks() throws Exception {
  Configuration conf = new Configuration();
  conf.setLong(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 1000);
  conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
  FileSystem fs = null;

  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    DFSTestUtil util = new DFSTestUtil.Builder().
        setName("testGetCorruptFiles").setNumFiles(3).setMaxLevels(1).
        setMaxSize(1024).build();
    util.createFiles(fs, "/corruptData", (short) 1);
    util.waitReplication(fs, "/corruptData", (short) 1);

    // String outStr = runFsck(conf, 0, true, "/corruptData", "-list-corruptfileblocks");
    String outStr = runFsck(conf, 0, false, "/corruptData", "-list-corruptfileblocks");
    System.out.println("1. good fsck out: " + outStr);
    assertTrue(outStr.contains("has 0 CORRUPT files"));
    // delete the blocks
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    for (int i=0; i<4; i++) {
      for (int j=0; j<=1; j++) {
        File storageDir = cluster.getInstanceStorageDir(i, j);
        File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
        List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(
            data_dir);
        if (metadataFiles == null)
          continue;
        for (File metadataFile : metadataFiles) {
          File blockFile = Block.metaToBlockFile(metadataFile);
          assertTrue("Cannot remove file.", blockFile.delete());
          assertTrue("Cannot remove file.", metadataFile.delete());
        }
      }
    }

    // wait for the namenode to see the corruption
    final NamenodeProtocols namenode = cluster.getNameNodeRpc();
    CorruptFileBlocks corruptFileBlocks = namenode
        .listCorruptFileBlocks("/corruptData", null);
    int numCorrupt = corruptFileBlocks.getFiles().length;
    while (numCorrupt == 0) {
      Thread.sleep(1000);
      corruptFileBlocks = namenode
          .listCorruptFileBlocks("/corruptData", null);
      numCorrupt = corruptFileBlocks.getFiles().length;
    }
    outStr = runFsck(conf, -1, true, "/corruptData", "-list-corruptfileblocks");
    System.out.println("2. bad fsck out: " + outStr);
    assertTrue(outStr.contains("has 3 CORRUPT files"));

    // Do a listing on a dir which doesn't have any corrupt blocks and validate
    util.createFiles(fs, "/goodData");
    outStr = runFsck(conf, 0, true, "/goodData", "-list-corruptfileblocks");
    System.out.println("3. good fsck out: " + outStr);
    assertTrue(outStr.contains("has 0 CORRUPT files"));
    util.cleanup(fs,"/corruptData");
    util.cleanup(fs, "/goodData");
  } finally {
    if (cluster != null) {cluster.shutdown();}
  }
}

Source File: TestNameNodeReconfigure.java From RDFS with Apache License 2.0

4 votes

/**
 * Test that we can change the block placement policy through the
 * reconfigurable API.
 */
@Test
public void testChangeBlockPlacementPolicy()
  throws IOException, ReconfigurationException {
  AtomicInteger callCounter = new AtomicInteger(0);
  MockPlacementPolicy.setCallCounter(callCounter);

  DFSTestUtil util = new DFSTestUtil("", 2, 1, 512);

  // write some files with the default block placement policy
  util.createFiles(fs, "/reconfdat1", (short) 3);
  util.waitReplication(fs, "/reconfdat1", (short) 3);

  assertTrue("calls already made to MockPlacementPolicy",
             callCounter.get() == 0);

  // switch over to the mock placement policy
  cluster.getNameNode().reconfigureProperty("dfs.block.replicator.classname",
                                            "org.apache.hadoop.hdfs.server." +
                                            "namenode." +
                                            "TestNameNodeReconfigure$" +
                                            "MockPlacementPolicy");

  // write some files with the mock placement policy
  util.createFiles(fs, "/reconfdat2", (short) 3);
  util.waitReplication(fs, "/reconfdat2", (short) 3);

  int callsMade1 = callCounter.get();
  
  // check that calls were made to mock placement policy
  assertTrue("no calls made to MockPlacementPolicy",
             callsMade1 > 0);
  LOG.info("" + callsMade1 + " calls made to MockPlacementPolicy");

  // now try to change it to a non-existent class
  try {
    cluster.getNameNode().
      reconfigureProperty("dfs.block.replicator.classname",
                          "does.not.exist");
    fail("ReconfigurationException expected");
  } catch (RuntimeException expected) {
    assertTrue("exception should have cause", expected.getCause() != null);
    assertTrue("exception's cause should have cause",
               expected.getCause().getCause() != null);
    assertTrue("ClassNotFoundException expected but got " +
               expected.getCause().getCause().getClass().getCanonicalName(),
               expected.getCause().getCause() instanceof
               ClassNotFoundException);
  }

  // write some files, they should still go to the mock placemeny policy
  util.createFiles(fs, "/reconfdat3", (short) 3);
  util.waitReplication(fs, "/reconfdat3", (short) 3);

  int callsMade2 = callCounter.get();

  // check that more calls were made to mock placement policy
  assertTrue("no calls made to MockPlacementPolicy",
             callsMade2 > callsMade1);
  LOG.info("" + (callsMade2 - callsMade1) +
    " calls made to MockPlacementPolicy");

  // now revert back to the default policy
  cluster.getNameNode().reconfigureProperty("dfs.block.replicator.classname",
                                            null);

  // write some files with the default block placement policy
  util.createFiles(fs, "/reconfdat4", (short) 3);
  util.waitReplication(fs, "/reconfdat4", (short) 3);

  // make sure that no more calls were made to mock placement policy
  assertTrue("more calls made to MockPlacementPolicy",
             callCounter.get() == callsMade2);

  util.cleanup(fs, "/reconfdat1");
  util.cleanup(fs, "/reconfdat2");
  util.cleanup(fs, "/reconfdat3");
  util.cleanup(fs, "/reconfdat4");
}

Source File: TestListCorruptFileBlocks.java From hadoop with Apache License 2.0

4 votes

/**
 * Test if NN.listCorruptFiles() returns the right number of results.
 * The corrupt blocks are detected by the BlockPoolSliceScanner.
 * Also, test that DFS.listCorruptFileBlocks can make multiple successive
 * calls.
 */
@Test (timeout=300000)
public void testMaxCorruptFiles() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 3 * 1000); // datanode sends block reports
    cluster = new MiniDFSCluster.Builder(conf).build();
    FileSystem fs = cluster.getFileSystem();
    final int maxCorruptFileBlocks = 
      FSNamesystem.DEFAULT_MAX_CORRUPT_FILEBLOCKS_RETURNED;

    // create 110 files with one block each
    DFSTestUtil util = new DFSTestUtil.Builder().setName("testMaxCorruptFiles").
        setNumFiles(maxCorruptFileBlocks * 3).setMaxLevels(1).setMaxSize(512).
        build();
    util.createFiles(fs, "/srcdat2", (short) 1);
    util.waitReplication(fs, "/srcdat2", (short) 1);

    // verify that there are no bad blocks.
    final NameNode namenode = cluster.getNameNode();
    Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode.
      getNamesystem().listCorruptFileBlocks("/srcdat2", null);
    assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting none.",
        badFiles.size() == 0);

    // Now deliberately blocks from all files
    final String bpid = cluster.getNamesystem().getBlockPoolId();
    for (int i=0; i<4; i++) {
      for (int j=0; j<=1; j++) {
        File storageDir = cluster.getInstanceStorageDir(i, j);
        File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
        LOG.info("Removing files from " + data_dir);
        List<File> metadataFiles = MiniDFSCluster.getAllBlockMetadataFiles(
            data_dir);
        if (metadataFiles == null)
          continue;
        for (File metadataFile : metadataFiles) {
          File blockFile = Block.metaToBlockFile(metadataFile);
          assertTrue("Cannot remove file.", blockFile.delete());
          assertTrue("Cannot remove file.", metadataFile.delete());
        }
      }
    }

    // Occasionally the BlockPoolSliceScanner can run before we have removed
    // the blocks. Restart the Datanode to trigger the scanner into running
    // once more.
    LOG.info("Restarting Datanode to trigger BlockPoolSliceScanner");
    cluster.restartDataNodes();
    cluster.waitActive();

    badFiles = 
      namenode.getNamesystem().listCorruptFileBlocks("/srcdat2", null);
      
     while (badFiles.size() < maxCorruptFileBlocks) {
      LOG.info("# of corrupt files is: " + badFiles.size());
      Thread.sleep(10000);
      badFiles = namenode.getNamesystem().
        listCorruptFileBlocks("/srcdat2", null);
    }
    badFiles = namenode.getNamesystem().
      listCorruptFileBlocks("/srcdat2", null); 
    LOG.info("Namenode has bad files. " + badFiles.size());
    assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting " + 
        maxCorruptFileBlocks + ".",
        badFiles.size() == maxCorruptFileBlocks);

    CorruptFileBlockIterator iter = (CorruptFileBlockIterator)
      fs.listCorruptFileBlocks(new Path("/srcdat2"));
    int corruptPaths = countPaths(iter);
    assertTrue("Expected more than " + maxCorruptFileBlocks +
               " corrupt file blocks but got " + corruptPaths,
               corruptPaths > maxCorruptFileBlocks);
    assertTrue("Iterator should have made more than 1 call but made " +
               iter.getCallsMade(),
               iter.getCallsMade() > 1);

    util.cleanup(fs, "/srcdat2");
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
}

Source File: TestDiskError.java From hadoop-gpu with Apache License 2.0

4 votes

public void testReplicationError() throws Exception {
  // bring up a cluster of 1
  Configuration conf = new Configuration();
  MiniDFSCluster cluster = new MiniDFSCluster(conf, 1, true, null);
  cluster.waitActive();
  FileSystem fs = cluster.getFileSystem();
  
  try {
    // create a file of replication factor of 1
    final Path fileName = new Path("/test.txt");
    final int fileLen = 1;
    DFSTestUtil.createFile(fs, fileName, 1, (short)1, 1L);
    DFSTestUtil.waitReplication(fs, fileName, (short)1);

    // get the block belonged to the created file
    LocatedBlocks blocks = cluster.getNameNode().namesystem.getBlockLocations(
        fileName.toString(), 0, (long)fileLen);
    assertEquals(blocks.locatedBlockCount(), 1);
    LocatedBlock block = blocks.get(0);
    
    // bring up a second datanode
    cluster.startDataNodes(conf, 1, true, null, null);
    cluster.waitActive();
    final int sndNode = 1;
    DataNode datanode = cluster.getDataNodes().get(sndNode);
    
    // replicate the block to the second datanode
    InetSocketAddress target = datanode.getSelfAddr();
    Socket s = new Socket(target.getAddress(), target.getPort());
      //write the header.
    DataOutputStream out = new DataOutputStream(
        s.getOutputStream());

    out.writeShort( DataTransferProtocol.DATA_TRANSFER_VERSION );
    out.write( DataTransferProtocol.OP_WRITE_BLOCK );
    out.writeLong( block.getBlock().getBlockId());
    out.writeLong( block.getBlock().getGenerationStamp() );
    out.writeInt(1);
    out.writeBoolean( false );       // recovery flag
    Text.writeString( out, "" );
    out.writeBoolean(false); // Not sending src node information
    out.writeInt(0);
    
    // write check header
    out.writeByte( 1 );
    out.writeInt( 512 );

    out.flush();

    // close the connection before sending the content of the block
    out.close();
    
    // the temporary block & meta files should be deleted
    String dataDir = cluster.getDataDirectory();
    File dir1 = new File(new File(dataDir, "data"+(2*sndNode+1)), "tmp");
    File dir2 = new File(new File(dataDir, "data"+(2*sndNode+2)), "tmp");
    while (dir1.listFiles().length != 0 || dir2.listFiles().length != 0) {
      Thread.sleep(100);
    }
    
    // then increase the file's replication factor
    fs.setReplication(fileName, (short)2);
    // replication should succeed
    DFSTestUtil.waitReplication(fs, fileName, (short)1);
    
    // clean up the file
    fs.delete(fileName, false);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestFileTruncate.java From hadoop with Apache License 2.0

4 votes

/**
 * The last block is truncated at mid. (copy-on-truncate)
 * dn1 is shutdown before truncate and restart after truncate successful.
 */
@Test(timeout=60000)
public void testCopyOnTruncateWithDataNodesRestart() throws Exception {
  int startingFileSize = 3 * BLOCK_SIZE;
  byte[] contents = AppendTestUtil.initBuffer(startingFileSize);
  final Path parent = new Path("/test");
  final Path p = new Path(parent, "testCopyOnTruncateWithDataNodesRestart");

  writeContents(contents, startingFileSize, p);
  LocatedBlock oldBlock = getLocatedBlocks(p).getLastLocatedBlock();
  fs.allowSnapshot(parent);
  fs.createSnapshot(parent, "ss0");

  int dn = 1;
  int toTruncateLength = 1;
  int newLength = startingFileSize - toTruncateLength;
  cluster.getDataNodes().get(dn).shutdown();
  try {
    boolean isReady = fs.truncate(p, newLength);
    assertFalse(isReady);
  } finally {
    cluster.restartDataNode(dn, true, true);
    cluster.waitActive();
  }
  checkBlockRecovery(p);

  LocatedBlock newBlock = getLocatedBlocks(p).getLastLocatedBlock();
  /*
   * For copy-on-truncate, new block is made with new block id and new GS.
   * The replicas of the new block is 2, then it will be replicated to dn1.
   */
  assertNotEquals(newBlock.getBlock().getBlockId(), 
      oldBlock.getBlock().getBlockId());
  assertEquals(newBlock.getBlock().getGenerationStamp(),
      oldBlock.getBlock().getGenerationStamp() + 1);

  // Wait replicas come to 3
  DFSTestUtil.waitReplication(fs, p, REPLICATION);
  // New block is replicated to dn1
  assertEquals(cluster.getBlockFile(dn, newBlock.getBlock()).length(), 
      newBlock.getBlockSize());
  // Old replica exists too since there is snapshot
  assertEquals(cluster.getBlockFile(dn, oldBlock.getBlock()).length(), 
      oldBlock.getBlockSize());
  assertTrue(cluster.getBlockMetadataFile(dn, 
      oldBlock.getBlock()).getName().endsWith(
          oldBlock.getBlock().getGenerationStamp() + ".meta"));

  // Validate the file
  FileStatus fileStatus = fs.getFileStatus(p);
  assertThat(fileStatus.getLen(), is((long) newLength));
  checkFullFile(p, newLength, contents);

  fs.deleteSnapshot(parent, "ss0");
  fs.delete(parent, true);
}

Source File: TestOverReplicatedBlocks.java From RDFS with Apache License 2.0

4 votes

/** Test processOverReplicatedBlock can handle corrupt replicas fine.
 * It make sure that it won't treat corrupt replicas as valid ones 
 * thus prevents NN deleting valid replicas but keeping
 * corrupt ones.
 */
public void testProcesOverReplicateBlock() throws IOException {
  Configuration conf = new Configuration();
  conf.setLong("dfs.blockreport.intervalMsec", 1000L);
  conf.set("dfs.replication.pending.timeout.sec", Integer.toString(2));
  MiniDFSCluster cluster = new MiniDFSCluster(conf, 3, true, null);
  FileSystem fs = cluster.getFileSystem();

  try {
    int namespaceId = cluster.getNameNode().getNamespaceID();
    final Path fileName = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)3);
    
    // corrupt the block on datanode 0
    Block block = DFSTestUtil.getFirstBlock(fs, fileName);
    TestDatanodeBlockScanner.corruptReplica(block.getBlockName(), 0, cluster);
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    // remove block scanner log to trigger block scanning
    File scanLog = new File(cluster.getBlockDirectory("data1").getParent(), "dncp_block_verification.log.curr");
    //wait for one minute for deletion to succeed;
    scanLog.delete();
    
    // restart the datanode so the corrupt replica will be detected
    cluster.restartDataNode(dnProps);
    DFSTestUtil.waitReplication(fs, fileName, (short)2);
    
    final DatanodeID corruptDataNode = 
      cluster.getDataNodes().get(2).getDNRegistrationForNS(namespaceId);
    final FSNamesystem namesystem = cluster.getNameNode().getNamesystem();
    synchronized (namesystem.heartbeats) {
      // set live datanode's remaining space to be 0 
      // so they will be chosen to be deleted when over-replication occurs
      for (DatanodeDescriptor datanode : namesystem.heartbeats) {
        if (!corruptDataNode.equals(datanode)) {
          datanode.updateHeartbeat(100L, 100L, 0L, 100L, 0);
        }
      }
    }
      
    // decrease the replication factor to 1; 
    namesystem.setReplication(fileName.toString(), (short)1);
    waitReplication(namesystem, block, (short)1);
    
    // corrupt one won't be chosen to be excess one
    // without 4910 the number of live replicas would be 0: block gets lost
    assertEquals(1, namesystem.countNodes(block).liveReplicas());

    // Test the case when multiple calls to setReplication still succeeds.
    System.out.println("Starting next test with file foo2.");
    final Path fileName2 = new Path("/foo1");
    DFSTestUtil.createFile(fs, fileName2, 2, (short)3, 0L);
    DFSTestUtil.waitReplication(fs, fileName2, (short)3);
    LocatedBlocks lbs = namesystem.getBlockLocations(
               fileName2.toString(), 0, 10);
    Block firstBlock = lbs.get(0).getBlock();
    namesystem.setReplication(fileName2.toString(), (short)2);
    namesystem.setReplication(fileName2.toString(), (short)1);
    
    // wait upto one minute for excess replicas to get deleted. It is not
    // immediate because excess replicas are being handled asyncronously.
    waitReplication(namesystem, firstBlock, (short)1);
    assertEquals(1, namesystem.countNodes(firstBlock).liveReplicas());
  } finally {
    cluster.shutdown();
  }
}

Source File: TestOverReplicatedBlocks.java From hadoop with Apache License 2.0

4 votes

/**
 * The test verifies that replica for deletion is chosen on a node,
 * with the oldest heartbeat, when this heartbeat is larger than the
 * tolerable heartbeat interval.
 * It creates a file with several blocks and replication 4.
 * The last DN is configured to send heartbeats rarely.
 * 
 * Test waits until the tolerable heartbeat interval expires, and reduces
 * replication of the file. All replica deletions should be scheduled for the
 * last node. No replicas will actually be deleted, since last DN doesn't
 * send heartbeats. 
 */
@Test
public void testChooseReplicaToDelete() throws Exception {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, SMALL_BLOCK_SIZE);
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    fs = cluster.getFileSystem();
    final FSNamesystem namesystem = cluster.getNamesystem();

    conf.setLong(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 300);
    cluster.startDataNodes(conf, 1, true, null, null, null);
    DataNode lastDN = cluster.getDataNodes().get(3);
    DatanodeRegistration dnReg = DataNodeTestUtils.getDNRegistrationForBP(
        lastDN, namesystem.getBlockPoolId());
    String lastDNid = dnReg.getDatanodeUuid();

    final Path fileName = new Path("/foo2");
    DFSTestUtil.createFile(fs, fileName, SMALL_FILE_LENGTH, (short)4, 0L);
    DFSTestUtil.waitReplication(fs, fileName, (short)4);

    // Wait for tolerable number of heartbeats plus one
    DatanodeDescriptor nodeInfo = null;
    long lastHeartbeat = 0;
    long waitTime = DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_DEFAULT * 1000 *
      (DFSConfigKeys.DFS_NAMENODE_TOLERATE_HEARTBEAT_MULTIPLIER_DEFAULT + 1);
    do {
      nodeInfo = namesystem.getBlockManager().getDatanodeManager()
          .getDatanode(dnReg);
      lastHeartbeat = nodeInfo.getLastUpdateMonotonic();
    } while (monotonicNow() - lastHeartbeat < waitTime);
    fs.setReplication(fileName, (short)3);

    BlockLocation locs[] = fs.getFileBlockLocations(
        fs.getFileStatus(fileName), 0, Long.MAX_VALUE);

    // All replicas for deletion should be scheduled on lastDN.
    // And should not actually be deleted, because lastDN does not heartbeat.
    namesystem.readLock();
    Collection<Block> dnBlocks = 
      namesystem.getBlockManager().excessReplicateMap.get(lastDNid);
    assertEquals("Replicas on node " + lastDNid + " should have been deleted",
        SMALL_FILE_LENGTH / SMALL_BLOCK_SIZE, dnBlocks.size());
    namesystem.readUnlock();
    for(BlockLocation location : locs)
      assertEquals("Block should still have 4 replicas",
          4, location.getNames().length);
  } finally {
    if(fs != null) fs.close();
    if(cluster != null) cluster.shutdown();
  }
}

Source File: TestFileTruncate.java From big-c with Apache License 2.0

4 votes

/**
 * The last block is truncated at mid. (non copy-on-truncate)
 * dn0, dn1 are restarted immediately after truncate.
 */
@Test(timeout=60000)
public void testTruncateWithDataNodesRestartImmediately() throws Exception {
  int startingFileSize = 3 * BLOCK_SIZE;
  byte[] contents = AppendTestUtil.initBuffer(startingFileSize);
  final Path parent = new Path("/test");
  final Path p = new Path(parent, "testTruncateWithDataNodesRestartImmediately");

  writeContents(contents, startingFileSize, p);
  LocatedBlock oldBlock = getLocatedBlocks(p).getLastLocatedBlock();

  int dn0 = 0;
  int dn1 = 1;
  int toTruncateLength = 1;
  int newLength = startingFileSize - toTruncateLength;
  boolean isReady = fs.truncate(p, newLength);
  assertFalse(isReady);

  cluster.restartDataNode(dn0, true, true);
  cluster.restartDataNode(dn1, true, true);
  cluster.waitActive();
  checkBlockRecovery(p);

  LocatedBlock newBlock = getLocatedBlocks(p).getLastLocatedBlock();
  /*
   * For non copy-on-truncate, the truncated block id is the same, but the 
   * GS should increase.
   */
  assertEquals(newBlock.getBlock().getBlockId(), 
      oldBlock.getBlock().getBlockId());
  assertEquals(newBlock.getBlock().getGenerationStamp(),
      oldBlock.getBlock().getGenerationStamp() + 1);

  // Wait replicas come to 3
  DFSTestUtil.waitReplication(fs, p, REPLICATION);
  // Old replica is disregarded and replaced with the truncated one on dn0
  assertEquals(cluster.getBlockFile(dn0, newBlock.getBlock()).length(), 
      newBlock.getBlockSize());
  assertTrue(cluster.getBlockMetadataFile(dn0, 
      newBlock.getBlock()).getName().endsWith(
          newBlock.getBlock().getGenerationStamp() + ".meta"));

  // Old replica is disregarded and replaced with the truncated one on dn1
  assertEquals(cluster.getBlockFile(dn1, newBlock.getBlock()).length(), 
      newBlock.getBlockSize());
  assertTrue(cluster.getBlockMetadataFile(dn1, 
      newBlock.getBlock()).getName().endsWith(
          newBlock.getBlock().getGenerationStamp() + ".meta"));

  // Validate the file
  FileStatus fileStatus = fs.getFileStatus(p);
  assertThat(fileStatus.getLen(), is((long) newLength));
  checkFullFile(p, newLength, contents);

  fs.delete(parent, true);
}

Java Code Examples for org.apache.hadoop.hdfs.DFSTestUtil#waitReplication()