Java Code Examples for org.apache.hadoop.hdfs.HAUtil#setAllowStandbyReads()

The following examples show how to use org.apache.hadoop.hdfs.HAUtil#setAllowStandbyReads() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestInitializeSharedEdits.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Before
public void setupCluster() throws IOException {
  conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  HAUtil.setAllowStandbyReads(conf, true);
  
  MiniDFSNNTopology topology = MiniDFSNNTopology.simpleHATopology();
  
  cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(topology)
    .numDataNodes(0)
    .build();
  cluster.waitActive();

  shutdownClusterAndRemoveSharedEditsDir();
}
 
Example 2
Source File: TestQuotasWithHA.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Before
public void setupCluster() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  HAUtil.setAllowStandbyReads(conf, true);
  
  cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(1)
    .waitSafeMode(false)
    .build();
  cluster.waitActive();
  
  nn0 = cluster.getNameNode(0);
  nn1 = cluster.getNameNode(1);
  fs = HATestUtil.configureFailoverFs(cluster, conf);
  
  cluster.transitionToActive(0);
}
 
Example 3
Source File: TestQuotasWithHA.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Before
public void setupCluster() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  HAUtil.setAllowStandbyReads(conf, true);
  
  cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(1)
    .waitSafeMode(false)
    .build();
  cluster.waitActive();
  
  nn0 = cluster.getNameNode(0);
  nn1 = cluster.getNameNode(1);
  fs = HATestUtil.configureFailoverFs(cluster, conf);
  
  cluster.transitionToActive(0);
}
 
Example 4
Source File: TestInitializeSharedEdits.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Before
public void setupCluster() throws IOException {
  conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  HAUtil.setAllowStandbyReads(conf, true);
  
  MiniDFSNNTopology topology = MiniDFSNNTopology.simpleHATopology();
  
  cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(topology)
    .numDataNodes(0)
    .build();
  cluster.waitActive();

  shutdownClusterAndRemoveSharedEditsDir();
}
 
Example 5
Source File: TestXAttrsWithHA.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Before
public void setupCluster() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  HAUtil.setAllowStandbyReads(conf, true);
  
  cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(1)
    .waitSafeMode(false)
    .build();
  cluster.waitActive();
  
  nn0 = cluster.getNameNode(0);
  nn1 = cluster.getNameNode(1);
  fs = HATestUtil.configureFailoverFs(cluster, conf);
  
  cluster.transitionToActive(0);
}
 
Example 6
Source File: TestFailureToReadEdits.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Before
public void setUpCluster() throws Exception {
  conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 10);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  HAUtil.setAllowStandbyReads(conf, true);
  
  if (clusterType == TestType.SHARED_DIR_HA) {
    MiniDFSNNTopology topology = MiniQJMHACluster.createDefaultTopology(10000);
    cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(topology)
      .numDataNodes(0)
      .checkExitOnShutdown(false)
      .build();
  } else {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder().numDataNodes(0).checkExitOnShutdown(false);
    miniQjmHaCluster = builder.build();
    cluster = miniQjmHaCluster.getDfsCluster();
  }
  cluster.waitActive();
  
  nn0 = cluster.getNameNode(0);
  nn1 = cluster.getNameNode(1);
  
  cluster.transitionToActive(0);
  fs = HATestUtil.configureFailoverFs(cluster, conf);
}
 
Example 7
Source File: TestFailureToReadEdits.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Before
public void setUpCluster() throws Exception {
  conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 10);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  HAUtil.setAllowStandbyReads(conf, true);
  
  if (clusterType == TestType.SHARED_DIR_HA) {
    MiniDFSNNTopology topology = MiniQJMHACluster.createDefaultTopology(10000);
    cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(topology)
      .numDataNodes(0)
      .checkExitOnShutdown(false)
      .build();
  } else {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder().numDataNodes(0).checkExitOnShutdown(false);
    miniQjmHaCluster = builder.build();
    cluster = miniQjmHaCluster.getDfsCluster();
  }
  cluster.waitActive();
  
  nn0 = cluster.getNameNode(0);
  nn1 = cluster.getNameNode(1);
  
  cluster.transitionToActive(0);
  fs = HATestUtil.configureFailoverFs(cluster, conf);
}
 
Example 8
Source File: TestBookKeeperAsHASharedDir.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Use NameNode INTIALIZESHAREDEDITS to initialize the shared edits. i.e. copy
 * the edits log segments to new bkjm shared edits.
 * 
 * @throws Exception
 */
@Test
public void testInitializeBKSharedEdits() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    HAUtil.setAllowStandbyReads(conf, true);
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);

    MiniDFSNNTopology topology = MiniDFSNNTopology.simpleHATopology();
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(topology)
        .numDataNodes(0).build();
    cluster.waitActive();
    // Shutdown and clear the current filebased shared dir.
    cluster.shutdownNameNodes();
    File shareddir = new File(cluster.getSharedEditsDir(0, 1));
    assertTrue("Initial Shared edits dir not fully deleted",
        FileUtil.fullyDelete(shareddir));

    // Check namenodes should not start without shared dir.
    assertCanNotStartNamenode(cluster, 0);
    assertCanNotStartNamenode(cluster, 1);

    // Configure bkjm as new shared edits dir in both namenodes
    Configuration nn1Conf = cluster.getConfiguration(0);
    Configuration nn2Conf = cluster.getConfiguration(1);
    nn1Conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil
        .createJournalURI("/initializeSharedEdits").toString());
    nn2Conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil
        .createJournalURI("/initializeSharedEdits").toString());
    BKJMUtil.addJournalManagerDefinition(nn1Conf);
    BKJMUtil.addJournalManagerDefinition(nn2Conf);

    // Initialize the BKJM shared edits.
    assertFalse(NameNode.initializeSharedEdits(nn1Conf));

    // NameNode should be able to start and should be in sync with BKJM as
    // shared dir
    assertCanStartHANameNodes(cluster, conf, "/testBKJMInitialize");
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 9
Source File: TestStandbyBlockManagement.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test(timeout=60000)
public void testInvalidateBlock() throws Exception {
  Configuration conf = new Configuration();
  HAUtil.setAllowStandbyReads(conf, true);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .numDataNodes(3)
      .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);

    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);

    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);

    Thread.sleep(1000);
    LOG.info("==================================");
    DFSTestUtil.writeFile(fs, TEST_FILE_PATH, TEST_FILE_DATA);
    // Have to force an edit log roll so that the standby catches up
    nn1.getRpcServer().rollEditLog();
    LOG.info("==================================");

    // delete the file
    fs.delete(TEST_FILE_PATH, false);
    BlockManagerTestUtil.computeAllPendingWork(
        nn1.getNamesystem().getBlockManager());

    nn1.getRpcServer().rollEditLog();

    // standby nn doesn't need to invalidate blocks.
    assertEquals(0,
        nn2.getNamesystem().getBlockManager().getPendingDeletionBlocksCount());

    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    // standby nn doesn't need to invalidate blocks.
    assertEquals(0,
        nn2.getNamesystem().getBlockManager().getPendingDeletionBlocksCount());

  } finally {
    cluster.shutdown();
  }
}
 
Example 10
Source File: TestStandbyIsHot.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Regression test for HDFS-2795:
 *  - Start an HA cluster with a DN.
 *  - Write several blocks to the FS with replication 1.
 *  - Shutdown the DN
 *  - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
 *  - Restart the DN.
 * In the bug, the standby node would only very slowly notice the blocks returning
 * to the cluster.
 */
@Test(timeout=60000)
public void testDatanodeRestarts() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
  // We read from the standby to watch block locations
  HAUtil.setAllowStandbyReads(conf, true);
  conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(1)
    .build();
  try {
    NameNode nn0 = cluster.getNameNode(0);
    NameNode nn1 = cluster.getNameNode(1);

    cluster.transitionToActive(0);
    
    // Create 5 blocks.
    DFSTestUtil.createFile(cluster.getFileSystem(0), 
        TEST_FILE_PATH, 5*1024, (short)1, 1L);
    
    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
    
    // Stop the DN.
    DataNode dn = cluster.getDataNodes().get(0);
    String dnName = dn.getDatanodeId().getXferAddr(); 
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    
    // Make sure both NNs register it as dead.
    BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
    BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
    
    BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
    
    // The SBN will not have any blocks in its neededReplication queue
    // since the SBN doesn't process replication.
    assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
    
    LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(
        TEST_FILE, 0, 1);
    assertEquals("Standby should have registered that the block has no replicas",
        0, locs.get(0).getLocations().length);
    
    cluster.restartDataNode(dnProps);
    // Wait for both NNs to re-register the DN.
    cluster.waitActive(0);
    cluster.waitActive(1);
    
    BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
    
    locs = nn1.getRpcServer().getBlockLocations(
        TEST_FILE, 0, 1);
    assertEquals("Standby should have registered that the block has replicas again",
        1, locs.get(0).getLocations().length);
  } finally {
    cluster.shutdown();
  }
}
 
Example 11
Source File: TestStandbyIsHot.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test(timeout=60000)
public void testStandbyIsHot() throws Exception {
  Configuration conf = new Configuration();
  // We read from the standby to watch block locations
  HAUtil.setAllowStandbyReads(conf, true);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    
    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);
    
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    
    Thread.sleep(1000);
    System.err.println("==================================");
    DFSTestUtil.writeFile(fs, TEST_FILE_PATH, TEST_FILE_DATA);
    // Have to force an edit log roll so that the standby catches up
    nn1.getRpcServer().rollEditLog();
    System.err.println("==================================");

    // Block locations should show up on standby.
    LOG.info("Waiting for block locations to appear on standby node");
    waitForBlockLocations(cluster, nn2, TEST_FILE, 3);

    // Trigger immediate heartbeats and block reports so
    // that the active "trusts" all of the DNs
    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    // Change replication
    LOG.info("Changing replication to 1");
    fs.setReplication(TEST_FILE_PATH, (short)1);
    BlockManagerTestUtil.computeAllPendingWork(
        nn1.getNamesystem().getBlockManager());
    waitForBlockLocations(cluster, nn1, TEST_FILE, 1);

    nn1.getRpcServer().rollEditLog();
    
    LOG.info("Waiting for lowered replication to show up on standby");
    waitForBlockLocations(cluster, nn2, TEST_FILE, 1);
    
    // Change back to 3
    LOG.info("Changing replication to 3");
    fs.setReplication(TEST_FILE_PATH, (short)3);
    BlockManagerTestUtil.computeAllPendingWork(
        nn1.getNamesystem().getBlockManager());
    nn1.getRpcServer().rollEditLog();
    
    LOG.info("Waiting for higher replication to show up on standby");
    waitForBlockLocations(cluster, nn2, TEST_FILE, 3);
    
  } finally {
    cluster.shutdown();
  }
}
 
Example 12
Source File: TestEditLogsDuringFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testStartup() throws Exception {
  Configuration conf = new Configuration();
  HAUtil.setAllowStandbyReads(conf, true);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(0)
    .build();
  try {
    // During HA startup, both nodes should be in
    // standby and we shouldn't have any edits files
    // in any edits directory!
    List<URI> allDirs = Lists.newArrayList();
    allDirs.addAll(cluster.getNameDirs(0));
    allDirs.addAll(cluster.getNameDirs(1));
    allDirs.add(cluster.getSharedEditsDir(0, 1));
    assertNoEditFiles(allDirs);
    
    // Set the first NN to active, make sure it creates edits
    // in its own dirs and the shared dir. The standby
    // should still have no edits!
    cluster.transitionToActive(0);
    
    assertEditFiles(cluster.getNameDirs(0),
        NNStorage.getInProgressEditsFileName(1));
    assertEditFiles(
        Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
        NNStorage.getInProgressEditsFileName(1));
    assertNoEditFiles(cluster.getNameDirs(1));
    
    cluster.getNameNode(0).getRpcServer().mkdirs("/test",
        FsPermission.createImmutable((short)0755), true);

    // Restarting the standby should not finalize any edits files
    // in the shared directory when it starts up!
    cluster.restartNameNode(1);
    
    assertEditFiles(cluster.getNameDirs(0),
        NNStorage.getInProgressEditsFileName(1));
    assertEditFiles(
        Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
        NNStorage.getInProgressEditsFileName(1));
    assertNoEditFiles(cluster.getNameDirs(1));
    
    // Additionally it should not have applied any in-progress logs
    // at start-up -- otherwise, it would have read half-way into
    // the current log segment, and on the next roll, it would have to
    // either replay starting in the middle of the segment (not allowed)
    // or double-replay the edits (incorrect).
    assertNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
    
    cluster.getNameNode(0).getRpcServer().mkdirs("/test2",
        FsPermission.createImmutable((short)0755), true);

    // If we restart NN0, it'll come back as standby, and we can
    // transition NN1 to active and make sure it reads edits correctly at this point.
    cluster.restartNameNode(0);
    cluster.transitionToActive(1);

    // NN1 should have both the edits that came before its restart, and the edits that
    // came after its restart.
    assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
    assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test2", true));
  } finally {
    cluster.shutdown();
  }
}
 
Example 13
Source File: TestEditLogTailer.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testTailer() throws IOException, InterruptedException,
    ServiceFailedException {
  Configuration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);

  HAUtil.setAllowStandbyReads(conf, true);
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(0)
    .build();
  cluster.waitActive();
  
  cluster.transitionToActive(0);
  
  NameNode nn1 = cluster.getNameNode(0);
  NameNode nn2 = cluster.getNameNode(1);
  try {
    for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
      NameNodeAdapter.mkdirs(nn1, getDirPath(i),
          new PermissionStatus("test","test", new FsPermission((short)00755)),
          true);
    }
    
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
    
    for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
      assertTrue(NameNodeAdapter.getFileInfo(nn2,
          getDirPath(i), false).isDir());
    }
    
    for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
      NameNodeAdapter.mkdirs(nn1, getDirPath(i),
          new PermissionStatus("test","test", new FsPermission((short)00755)),
          true);
    }
    
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
    
    for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
      assertTrue(NameNodeAdapter.getFileInfo(nn2,
          getDirPath(i), false).isDir());
    }
  } finally {
    cluster.shutdown();
  }
}
 
Example 14
Source File: TestBackupNode.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Ensure that the backupnode will tail edits from the NN
 * and keep in sync, even while the NN rolls, checkpoints
 * occur, etc.
 */
@Test
public void testBackupNodeTailsEdits() throws Exception {
  Configuration conf = new HdfsConfiguration();
  HAUtil.setAllowStandbyReads(conf, true);
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  BackupNode backup = null;

  try {
    cluster = new MiniDFSCluster.Builder(conf)
                                .numDataNodes(0).build();
    fileSys = cluster.getFileSystem();
    backup = startBackupNode(conf, StartupOption.BACKUP, 1);
    
    BackupImage bnImage = (BackupImage) backup.getFSImage();
    testBNInSync(cluster, backup, 1);
    
    // Force a roll -- BN should roll with NN.
    NameNode nn = cluster.getNameNode();
    NamenodeProtocols nnRpc = nn.getRpcServer();
    nnRpc.rollEditLog();
    assertEquals(bnImage.getEditLog().getCurSegmentTxId(),
        nn.getFSImage().getEditLog().getCurSegmentTxId());
    
    // BN should stay in sync after roll
    testBNInSync(cluster, backup, 2);
    
    long nnImageBefore =
      nn.getFSImage().getStorage().getMostRecentCheckpointTxId();
    // BN checkpoint
    backup.doCheckpoint();
    
    // NN should have received a new image
    long nnImageAfter =
      nn.getFSImage().getStorage().getMostRecentCheckpointTxId();
    
    assertTrue("nn should have received new checkpoint. before: " +
        nnImageBefore + " after: " + nnImageAfter,
        nnImageAfter > nnImageBefore);

    // BN should stay in sync after checkpoint
    testBNInSync(cluster, backup, 3);

    // Stop BN
    StorageDirectory sd = bnImage.getStorage().getStorageDir(0);
    backup.stop();
    backup = null;
    
    // When shutting down the BN, it shouldn't finalize logs that are
    // still open on the NN
    EditLogFile editsLog = FSImageTestUtil.findLatestEditsLog(sd);
    assertEquals(editsLog.getFirstTxId(),
        nn.getFSImage().getEditLog().getCurSegmentTxId());
    assertTrue("Should not have finalized " + editsLog,
        editsLog.isInProgress());
    
    // do some edits
    assertTrue(fileSys.mkdirs(new Path("/edit-while-bn-down")));
    
    // start a new backup node
    backup = startBackupNode(conf, StartupOption.BACKUP, 1);

    testBNInSync(cluster, backup, 4);
    assertNotNull(backup.getNamesystem().getFileInfo("/edit-while-bn-down", false));
  } finally {
    LOG.info("Shutting down...");
    if (backup != null) backup.stop();
    if (fileSys != null) fileSys.close();
    if (cluster != null) cluster.shutdown();
  }
  
  assertStorageDirsMatch(cluster.getNameNode(), backup);
}
 
Example 15
Source File: TestBackupNode.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Ensure that the backupnode will tail edits from the NN
 * and keep in sync, even while the NN rolls, checkpoints
 * occur, etc.
 */
@Test
public void testBackupNodeTailsEdits() throws Exception {
  Configuration conf = new HdfsConfiguration();
  HAUtil.setAllowStandbyReads(conf, true);
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  BackupNode backup = null;

  try {
    cluster = new MiniDFSCluster.Builder(conf)
                                .numDataNodes(0).build();
    fileSys = cluster.getFileSystem();
    backup = startBackupNode(conf, StartupOption.BACKUP, 1);
    
    BackupImage bnImage = (BackupImage) backup.getFSImage();
    testBNInSync(cluster, backup, 1);
    
    // Force a roll -- BN should roll with NN.
    NameNode nn = cluster.getNameNode();
    NamenodeProtocols nnRpc = nn.getRpcServer();
    nnRpc.rollEditLog();
    assertEquals(bnImage.getEditLog().getCurSegmentTxId(),
        nn.getFSImage().getEditLog().getCurSegmentTxId());
    
    // BN should stay in sync after roll
    testBNInSync(cluster, backup, 2);
    
    long nnImageBefore =
      nn.getFSImage().getStorage().getMostRecentCheckpointTxId();
    // BN checkpoint
    backup.doCheckpoint();
    
    // NN should have received a new image
    long nnImageAfter =
      nn.getFSImage().getStorage().getMostRecentCheckpointTxId();
    
    assertTrue("nn should have received new checkpoint. before: " +
        nnImageBefore + " after: " + nnImageAfter,
        nnImageAfter > nnImageBefore);

    // BN should stay in sync after checkpoint
    testBNInSync(cluster, backup, 3);

    // Stop BN
    StorageDirectory sd = bnImage.getStorage().getStorageDir(0);
    backup.stop();
    backup = null;
    
    // When shutting down the BN, it shouldn't finalize logs that are
    // still open on the NN
    EditLogFile editsLog = FSImageTestUtil.findLatestEditsLog(sd);
    assertEquals(editsLog.getFirstTxId(),
        nn.getFSImage().getEditLog().getCurSegmentTxId());
    assertTrue("Should not have finalized " + editsLog,
        editsLog.isInProgress());
    
    // do some edits
    assertTrue(fileSys.mkdirs(new Path("/edit-while-bn-down")));
    
    // start a new backup node
    backup = startBackupNode(conf, StartupOption.BACKUP, 1);

    testBNInSync(cluster, backup, 4);
    assertNotNull(backup.getNamesystem().getFileInfo("/edit-while-bn-down", false));
  } finally {
    LOG.info("Shutting down...");
    if (backup != null) backup.stop();
    if (fileSys != null) fileSys.close();
    if (cluster != null) cluster.shutdown();
  }
  
  assertStorageDirsMatch(cluster.getNameNode(), backup);
}
 
Example 16
Source File: TestStandbyBlockManagement.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test(timeout=60000)
public void testInvalidateBlock() throws Exception {
  Configuration conf = new Configuration();
  HAUtil.setAllowStandbyReads(conf, true);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .numDataNodes(3)
      .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);

    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);

    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);

    Thread.sleep(1000);
    LOG.info("==================================");
    DFSTestUtil.writeFile(fs, TEST_FILE_PATH, TEST_FILE_DATA);
    // Have to force an edit log roll so that the standby catches up
    nn1.getRpcServer().rollEditLog();
    LOG.info("==================================");

    // delete the file
    fs.delete(TEST_FILE_PATH, false);
    BlockManagerTestUtil.computeAllPendingWork(
        nn1.getNamesystem().getBlockManager());

    nn1.getRpcServer().rollEditLog();

    // standby nn doesn't need to invalidate blocks.
    assertEquals(0,
        nn2.getNamesystem().getBlockManager().getPendingDeletionBlocksCount());

    cluster.triggerHeartbeats();
    cluster.triggerBlockReports();

    // standby nn doesn't need to invalidate blocks.
    assertEquals(0,
        nn2.getNamesystem().getBlockManager().getPendingDeletionBlocksCount());

  } finally {
    cluster.shutdown();
  }
}
 
Example 17
Source File: TestStandbyIsHot.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Regression test for HDFS-2795:
 *  - Start an HA cluster with a DN.
 *  - Write several blocks to the FS with replication 1.
 *  - Shutdown the DN
 *  - Wait for the NNs to declare the DN dead. All blocks will be under-replicated.
 *  - Restart the DN.
 * In the bug, the standby node would only very slowly notice the blocks returning
 * to the cluster.
 */
@Test(timeout=60000)
public void testDatanodeRestarts() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 1024);
  // We read from the standby to watch block locations
  HAUtil.setAllowStandbyReads(conf, true);
  conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 0);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(1)
    .build();
  try {
    NameNode nn0 = cluster.getNameNode(0);
    NameNode nn1 = cluster.getNameNode(1);

    cluster.transitionToActive(0);
    
    // Create 5 blocks.
    DFSTestUtil.createFile(cluster.getFileSystem(0), 
        TEST_FILE_PATH, 5*1024, (short)1, 1L);
    
    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
    
    // Stop the DN.
    DataNode dn = cluster.getDataNodes().get(0);
    String dnName = dn.getDatanodeId().getXferAddr(); 
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    
    // Make sure both NNs register it as dead.
    BlockManagerTestUtil.noticeDeadDatanode(nn0, dnName);
    BlockManagerTestUtil.noticeDeadDatanode(nn1, dnName);
    
    BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    assertEquals(5, nn0.getNamesystem().getUnderReplicatedBlocks());
    
    // The SBN will not have any blocks in its neededReplication queue
    // since the SBN doesn't process replication.
    assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
    
    LocatedBlocks locs = nn1.getRpcServer().getBlockLocations(
        TEST_FILE, 0, 1);
    assertEquals("Standby should have registered that the block has no replicas",
        0, locs.get(0).getLocations().length);
    
    cluster.restartDataNode(dnProps);
    // Wait for both NNs to re-register the DN.
    cluster.waitActive(0);
    cluster.waitActive(1);
    
    BlockManagerTestUtil.updateState(nn0.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    assertEquals(0, nn0.getNamesystem().getUnderReplicatedBlocks());
    assertEquals(0, nn1.getNamesystem().getUnderReplicatedBlocks());
    
    locs = nn1.getRpcServer().getBlockLocations(
        TEST_FILE, 0, 1);
    assertEquals("Standby should have registered that the block has replicas again",
        1, locs.get(0).getLocations().length);
  } finally {
    cluster.shutdown();
  }
}
 
Example 18
Source File: TestEditLogsDuringFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testStartup() throws Exception {
  Configuration conf = new Configuration();
  HAUtil.setAllowStandbyReads(conf, true);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(0)
    .build();
  try {
    // During HA startup, both nodes should be in
    // standby and we shouldn't have any edits files
    // in any edits directory!
    List<URI> allDirs = Lists.newArrayList();
    allDirs.addAll(cluster.getNameDirs(0));
    allDirs.addAll(cluster.getNameDirs(1));
    allDirs.add(cluster.getSharedEditsDir(0, 1));
    assertNoEditFiles(allDirs);
    
    // Set the first NN to active, make sure it creates edits
    // in its own dirs and the shared dir. The standby
    // should still have no edits!
    cluster.transitionToActive(0);
    
    assertEditFiles(cluster.getNameDirs(0),
        NNStorage.getInProgressEditsFileName(1));
    assertEditFiles(
        Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
        NNStorage.getInProgressEditsFileName(1));
    assertNoEditFiles(cluster.getNameDirs(1));
    
    cluster.getNameNode(0).getRpcServer().mkdirs("/test",
        FsPermission.createImmutable((short)0755), true);

    // Restarting the standby should not finalize any edits files
    // in the shared directory when it starts up!
    cluster.restartNameNode(1);
    
    assertEditFiles(cluster.getNameDirs(0),
        NNStorage.getInProgressEditsFileName(1));
    assertEditFiles(
        Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
        NNStorage.getInProgressEditsFileName(1));
    assertNoEditFiles(cluster.getNameDirs(1));
    
    // Additionally it should not have applied any in-progress logs
    // at start-up -- otherwise, it would have read half-way into
    // the current log segment, and on the next roll, it would have to
    // either replay starting in the middle of the segment (not allowed)
    // or double-replay the edits (incorrect).
    assertNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
    
    cluster.getNameNode(0).getRpcServer().mkdirs("/test2",
        FsPermission.createImmutable((short)0755), true);

    // If we restart NN0, it'll come back as standby, and we can
    // transition NN1 to active and make sure it reads edits correctly at this point.
    cluster.restartNameNode(0);
    cluster.transitionToActive(1);

    // NN1 should have both the edits that came before its restart, and the edits that
    // came after its restart.
    assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
    assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test2", true));
  } finally {
    cluster.shutdown();
  }
}
 
Example 19
Source File: TestEditLogTailer.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testTailer() throws IOException, InterruptedException,
    ServiceFailedException {
  Configuration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);

  HAUtil.setAllowStandbyReads(conf, true);
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(0)
    .build();
  cluster.waitActive();
  
  cluster.transitionToActive(0);
  
  NameNode nn1 = cluster.getNameNode(0);
  NameNode nn2 = cluster.getNameNode(1);
  try {
    for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
      NameNodeAdapter.mkdirs(nn1, getDirPath(i),
          new PermissionStatus("test","test", new FsPermission((short)00755)),
          true);
    }
    
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
    
    for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
      assertTrue(NameNodeAdapter.getFileInfo(nn2,
          getDirPath(i), false).isDir());
    }
    
    for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
      NameNodeAdapter.mkdirs(nn1, getDirPath(i),
          new PermissionStatus("test","test", new FsPermission((short)00755)),
          true);
    }
    
    HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
    
    for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
      assertTrue(NameNodeAdapter.getFileInfo(nn2,
          getDirPath(i), false).isDir());
    }
  } finally {
    cluster.shutdown();
  }
}
 
Example 20
Source File: TestBookKeeperAsHASharedDir.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Use NameNode INTIALIZESHAREDEDITS to initialize the shared edits. i.e. copy
 * the edits log segments to new bkjm shared edits.
 * 
 * @throws Exception
 */
@Test
public void testInitializeBKSharedEdits() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    HAUtil.setAllowStandbyReads(conf, true);
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);

    MiniDFSNNTopology topology = MiniDFSNNTopology.simpleHATopology();
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(topology)
        .numDataNodes(0).build();
    cluster.waitActive();
    // Shutdown and clear the current filebased shared dir.
    cluster.shutdownNameNodes();
    File shareddir = new File(cluster.getSharedEditsDir(0, 1));
    assertTrue("Initial Shared edits dir not fully deleted",
        FileUtil.fullyDelete(shareddir));

    // Check namenodes should not start without shared dir.
    assertCanNotStartNamenode(cluster, 0);
    assertCanNotStartNamenode(cluster, 1);

    // Configure bkjm as new shared edits dir in both namenodes
    Configuration nn1Conf = cluster.getConfiguration(0);
    Configuration nn2Conf = cluster.getConfiguration(1);
    nn1Conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil
        .createJournalURI("/initializeSharedEdits").toString());
    nn2Conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil
        .createJournalURI("/initializeSharedEdits").toString());
    BKJMUtil.addJournalManagerDefinition(nn1Conf);
    BKJMUtil.addJournalManagerDefinition(nn2Conf);

    // Initialize the BKJM shared edits.
    assertFalse(NameNode.initializeSharedEdits(nn1Conf));

    // NameNode should be able to start and should be in sync with BKJM as
    // shared dir
    assertCanStartHANameNodes(cluster, conf, "/testBKJMInitialize");
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}