Java Code Examples for org.apache.hadoop.hdfs.MiniDFSCluster#transitionToStandby()

The following examples show how to use org.apache.hadoop.hdfs.MiniDFSCluster#transitionToStandby() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestBookKeeperAsHASharedDir.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * NameNode should load the edits correctly if the applicable edits are
 * present in the BKJM.
 */
@Test
public void testNameNodeMultipleSwitchesUsingBKJM() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil
        .createJournalURI("/correctEditLogSelection").toString());
    BKJMUtil.addJournalManagerDefinition(conf);

    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
        .manageNameDfsSharedDirs(false).build();
    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);
    cluster.waitActive();
    cluster.transitionToActive(0);
    nn1.getRpcServer().rollEditLog(); // Roll Edits from current Active.
    // Transition to standby current active gracefully.
    cluster.transitionToStandby(0);
    // Make the other Active and Roll edits multiple times
    cluster.transitionToActive(1);
    nn2.getRpcServer().rollEditLog();
    nn2.getRpcServer().rollEditLog();
    // Now One more failover. So NN1 should be able to failover successfully.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 2
Source File: TestBookKeeperAsHASharedDir.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * NameNode should load the edits correctly if the applicable edits are
 * present in the BKJM.
 */
@Test
public void testNameNodeMultipleSwitchesUsingBKJM() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil
        .createJournalURI("/correctEditLogSelection").toString());
    BKJMUtil.addJournalManagerDefinition(conf);

    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0)
        .manageNameDfsSharedDirs(false).build();
    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);
    cluster.waitActive();
    cluster.transitionToActive(0);
    nn1.getRpcServer().rollEditLog(); // Roll Edits from current Active.
    // Transition to standby current active gracefully.
    cluster.transitionToStandby(0);
    // Make the other Active and Roll edits multiple times
    cluster.transitionToActive(1);
    nn2.getRpcServer().rollEditLog();
    nn2.getRpcServer().rollEditLog();
    // Now One more failover. So NN1 should be able to failover successfully.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 3
Source File: TestPendingCorruptDnMessages.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testChangedStorageId() throws IOException, URISyntaxException,
    InterruptedException {
  HdfsConfiguration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(1)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .build();
  
  try {
    cluster.transitionToActive(0);
    
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    OutputStream out = fs.create(filePath);
    out.write("foo bar baz".getBytes());
    out.close();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
        cluster.getNameNode(1));
    
    // Change the gen stamp of the block on datanode to go back in time (gen
    // stamps start at 1000)
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
    assertTrue(cluster.changeGenStampOfBlock(0, block, 900));
    
    // Stop the DN so the replica with the changed gen stamp will be reported
    // when this DN starts up.
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    
    // Restart the namenode so that when the DN comes up it will see an initial
    // block report.
    cluster.restartNameNode(1, false);
    assertTrue(cluster.restartDataNode(dnProps, true));
    
    // Wait until the standby NN queues up the corrupt block in the pending DN
    // message queue.
    while (cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount() < 1) {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
    }
    
    assertEquals(1, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
    
    // Reformat/restart the DN.
    assertTrue(wipeAndRestartDn(cluster, 0));
    
    // Give the DN time to start up and register, which will cause the
    // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
    String newStorageId = "";
    do {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
      newStorageId = getRegisteredDatanodeUid(cluster, 1);
      System.out.println("====> oldStorageId: " + oldStorageId +
          " newStorageId: " + newStorageId);
    } while (newStorageId.equals(oldStorageId));
    
    assertEquals(0, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    
    // Now try to fail over.
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
  } finally {
    cluster.shutdown();
  }
}
 
Example 4
Source File: TestDFSUpgradeWithHA.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Make sure that an HA NN with NFS-based HA can successfully start and
 * upgrade.
 */
@Test
public void testNfsUpgrade() throws IOException, URISyntaxException {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHATopology())
        .numDataNodes(0)
        .build();
    
    File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
    
    // No upgrade is in progress at the moment.
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    checkPreviousDirExistence(sharedDir, false);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkPreviousDirExistence(sharedDir, true);
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    // Now restart NN1 and make sure that we can do ops against that as well.
    cluster.restartNameNode(1);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    assertTrue(fs.mkdirs(new Path("/foo4")));
    
    assertCTimesEqual(cluster);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 5
Source File: TestDFSUpgradeWithHA.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Ensure that an admin cannot finalize an HA upgrade without at least one NN
 * being active.
 */
@Test
public void testCannotFinalizeIfNoActive() throws IOException,
    URISyntaxException {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHATopology())
        .numDataNodes(0)
        .build();

    File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
    
    // No upgrade is in progress at the moment.
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    checkPreviousDirExistence(sharedDir, false);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkPreviousDirExistence(sharedDir, true);
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    // Now restart NN1 and make sure that we can do ops against that as well.
    cluster.restartNameNode(1);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    assertTrue(fs.mkdirs(new Path("/foo4")));
    
    assertCTimesEqual(cluster);
    
    // Now there's no active NN.
    cluster.transitionToStandby(1);

    try {
      runFinalizeCommand(cluster);
      fail("Should not have been able to finalize upgrade with no NN active");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "Cannot finalize with no NameNode active", ioe);
    }
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 6
Source File: TestHAFsck.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that fsck still works with HA enabled.
 */
@Test
public void testHaFsck() throws Exception {
  Configuration conf = new Configuration();
  
  // need some HTTP ports
  MiniDFSNNTopology topology = new MiniDFSNNTopology()
    .addNameservice(new MiniDFSNNTopology.NSConf("ha-nn-uri-0")
      .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10051))
      .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10052)));
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(topology)
    .numDataNodes(0)
    .build();
  FileSystem fs = null;
  try {
    cluster.waitActive();
  
    cluster.transitionToActive(0);
    
    // Make sure conf has the relevant HA configs.
    HATestUtil.setFailoverConfigurations(cluster, conf, "ha-nn-uri-0", 0);
    
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    fs.mkdirs(new Path("/test1"));
    fs.mkdirs(new Path("/test2"));
    
    runFsck(conf);
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    runFsck(conf);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 7
Source File: TestHAMetrics.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test(timeout = 300000)
public void testHAMetrics() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
      .build();
  FileSystem fs = null;
  try {
    cluster.waitActive();
    
    FSNamesystem nn0 = cluster.getNamesystem(0);
    FSNamesystem nn1 = cluster.getNamesystem(1);
    
    assertEquals(nn0.getHAState(), "standby");
    assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
    assertEquals(nn1.getHAState(), "standby");
    assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());

    cluster.transitionToActive(0);
    final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    final ObjectName mxbeanName =
        new ObjectName("Hadoop:service=NameNode,name=NameNodeStatus");
    final Long ltt1 =
        (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
    assertTrue("lastHATransitionTime should be > 0", ltt1 > 0);
    
    assertEquals("active", nn0.getHAState());
    assertEquals(0, nn0.getMillisSinceLastLoadedEdits());
    assertEquals("standby", nn1.getHAState());
    assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
    
    cluster.transitionToStandby(0);
    final Long ltt2 =
        (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
    assertTrue("lastHATransitionTime should be > " + ltt1, ltt2 > ltt1);
    cluster.transitionToActive(1);
    
    assertEquals("standby", nn0.getHAState());
    assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
    assertEquals("active", nn1.getHAState());
    assertEquals(0, nn1.getMillisSinceLastLoadedEdits());
    
    Thread.sleep(2000); // make sure standby gets a little out-of-date
    assertTrue(2000 <= nn0.getMillisSinceLastLoadedEdits());
    
    assertEquals(0, nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    DFSTestUtil.createFile(fs, new Path("/foo"),
        10, (short)1, 1L);
    
    assertTrue(0 < nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    long millisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(1),
        cluster.getNameNode(0));
    
    assertEquals(0, nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    long newMillisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
    // Since we just waited for the standby to catch up, the time since we
    // last loaded edits should be very low.
    assertTrue("expected " + millisSinceLastLoadedEdits + " > " +
        newMillisSinceLastLoadedEdits,
        millisSinceLastLoadedEdits > newMillisSinceLastLoadedEdits);
  } finally {
    IOUtils.cleanup(LOG, fs);
    cluster.shutdown();
  }
}
 
Example 8
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 9
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 10
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 11
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Override
void run(MiniDFSCluster cluster) throws IOException {
  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
}
 
Example 12
Source File: TestDFSUpgradeWithHA.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Make sure that even if the NN which initiated the upgrade is in the standby
 * state that we're allowed to finalize.
 */
@Test
public void testFinalizeFromSecondNameNodeWithJournalNodes()
    throws IOException, URISyntaxException {
  MiniQJMHACluster qjCluster = null;
  FileSystem fs = null;
  try {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder()
        .numDataNodes(0);
    qjCluster = builder.build();

    MiniDFSCluster cluster = qjCluster.getDfsCluster();
    
    // No upgrade is in progress at the moment.
    checkJnPreviousDirExistence(qjCluster, false);
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkJnPreviousDirExistence(qjCluster, true);
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    cluster.restartNameNode(1);
    
    // Make the second NN (not the one that initiated the upgrade) active when
    // the finalize command is run.
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    runFinalizeCommand(cluster);
    
    checkClusterPreviousDirExistence(cluster, false);
    checkJnPreviousDirExistence(qjCluster, false);
    assertCTimesEqual(cluster);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (qjCluster != null) {
      qjCluster.shutdown();
    }
  }
}
 
Example 13
Source File: TestDFSUpgradeWithHA.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Make sure that even if the NN which initiated the upgrade is in the standby
 * state that we're allowed to finalize.
 */
@Test
public void testFinalizeFromSecondNameNodeWithJournalNodes()
    throws IOException, URISyntaxException {
  MiniQJMHACluster qjCluster = null;
  FileSystem fs = null;
  try {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder()
        .numDataNodes(0);
    qjCluster = builder.build();

    MiniDFSCluster cluster = qjCluster.getDfsCluster();
    
    // No upgrade is in progress at the moment.
    checkJnPreviousDirExistence(qjCluster, false);
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkJnPreviousDirExistence(qjCluster, true);
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    cluster.restartNameNode(1);
    
    // Make the second NN (not the one that initiated the upgrade) active when
    // the finalize command is run.
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    runFinalizeCommand(cluster);
    
    checkClusterPreviousDirExistence(cluster, false);
    checkJnPreviousDirExistence(qjCluster, false);
    assertCTimesEqual(cluster);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (qjCluster != null) {
      qjCluster.shutdown();
    }
  }
}
 
Example 14
Source File: TestDFSUpgradeWithHA.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Make sure that an HA NN can successfully upgrade when configured using
 * JournalNodes.
 */
@Test
public void testUpgradeWithJournalNodes() throws IOException,
    URISyntaxException {
  MiniQJMHACluster qjCluster = null;
  FileSystem fs = null;
  try {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder()
        .numDataNodes(0);
    qjCluster = builder.build();

    MiniDFSCluster cluster = qjCluster.getDfsCluster();
    
    // No upgrade is in progress at the moment.
    checkJnPreviousDirExistence(qjCluster, false);
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));

    // get the value of the committedTxnId in journal nodes
    final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster);

    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkJnPreviousDirExistence(qjCluster, true);

    assertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster));
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));

    assertTrue(getCommittedTxnIdValue(qjCluster) > cidBeforeUpgrade);
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    // Now restart NN1 and make sure that we can do ops against that as well.
    cluster.restartNameNode(1);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    assertTrue(fs.mkdirs(new Path("/foo4")));
    
    assertCTimesEqual(cluster);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (qjCluster != null) {
      qjCluster.shutdown();
    }
  }
}
 
Example 15
Source File: TestDFSUpgradeWithHA.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Ensure that an admin cannot finalize an HA upgrade without at least one NN
 * being active.
 */
@Test
public void testCannotFinalizeIfNoActive() throws IOException,
    URISyntaxException {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHATopology())
        .numDataNodes(0)
        .build();

    File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
    
    // No upgrade is in progress at the moment.
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    checkPreviousDirExistence(sharedDir, false);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkPreviousDirExistence(sharedDir, true);
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    // Now restart NN1 and make sure that we can do ops against that as well.
    cluster.restartNameNode(1);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    assertTrue(fs.mkdirs(new Path("/foo4")));
    
    assertCTimesEqual(cluster);
    
    // Now there's no active NN.
    cluster.transitionToStandby(1);

    try {
      runFinalizeCommand(cluster);
      fail("Should not have been able to finalize upgrade with no NN active");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "Cannot finalize with no NameNode active", ioe);
    }
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 16
Source File: TestHAFsck.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test that fsck still works with HA enabled.
 */
@Test
public void testHaFsck() throws Exception {
  Configuration conf = new Configuration();
  
  // need some HTTP ports
  MiniDFSNNTopology topology = new MiniDFSNNTopology()
    .addNameservice(new MiniDFSNNTopology.NSConf("ha-nn-uri-0")
      .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10051))
      .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10052)));
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(topology)
    .numDataNodes(0)
    .build();
  FileSystem fs = null;
  try {
    cluster.waitActive();
  
    cluster.transitionToActive(0);
    
    // Make sure conf has the relevant HA configs.
    HATestUtil.setFailoverConfigurations(cluster, conf, "ha-nn-uri-0", 0);
    
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    fs.mkdirs(new Path("/test1"));
    fs.mkdirs(new Path("/test2"));
    
    runFsck(conf);
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    runFsck(conf);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 17
Source File: TestDFSUpgradeWithHA.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Make sure that an HA NN can successfully upgrade when configured using
 * JournalNodes.
 */
@Test
public void testUpgradeWithJournalNodes() throws IOException,
    URISyntaxException {
  MiniQJMHACluster qjCluster = null;
  FileSystem fs = null;
  try {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder()
        .numDataNodes(0);
    qjCluster = builder.build();

    MiniDFSCluster cluster = qjCluster.getDfsCluster();
    
    // No upgrade is in progress at the moment.
    checkJnPreviousDirExistence(qjCluster, false);
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));

    // get the value of the committedTxnId in journal nodes
    final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster);

    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkJnPreviousDirExistence(qjCluster, true);

    assertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster));
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));

    assertTrue(getCommittedTxnIdValue(qjCluster) > cidBeforeUpgrade);
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    // Now restart NN1 and make sure that we can do ops against that as well.
    cluster.restartNameNode(1);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    assertTrue(fs.mkdirs(new Path("/foo4")));
    
    assertCTimesEqual(cluster);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (qjCluster != null) {
      qjCluster.shutdown();
    }
  }
}
 
Example 18
Source File: TestPendingCorruptDnMessages.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testChangedStorageId() throws IOException, URISyntaxException,
    InterruptedException {
  HdfsConfiguration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(1)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .build();
  
  try {
    cluster.transitionToActive(0);
    
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    OutputStream out = fs.create(filePath);
    out.write("foo bar baz".getBytes());
    out.close();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
        cluster.getNameNode(1));
    
    // Change the gen stamp of the block on datanode to go back in time (gen
    // stamps start at 1000)
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
    assertTrue(cluster.changeGenStampOfBlock(0, block, 900));
    
    // Stop the DN so the replica with the changed gen stamp will be reported
    // when this DN starts up.
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    
    // Restart the namenode so that when the DN comes up it will see an initial
    // block report.
    cluster.restartNameNode(1, false);
    assertTrue(cluster.restartDataNode(dnProps, true));
    
    // Wait until the standby NN queues up the corrupt block in the pending DN
    // message queue.
    while (cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount() < 1) {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
    }
    
    assertEquals(1, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
    
    // Reformat/restart the DN.
    assertTrue(wipeAndRestartDn(cluster, 0));
    
    // Give the DN time to start up and register, which will cause the
    // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
    String newStorageId = "";
    do {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
      newStorageId = getRegisteredDatanodeUid(cluster, 1);
      System.out.println("====> oldStorageId: " + oldStorageId +
          " newStorageId: " + newStorageId);
    } while (newStorageId.equals(oldStorageId));
    
    assertEquals(0, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    
    // Now try to fail over.
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
  } finally {
    cluster.shutdown();
  }
}
 
Example 19
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 20
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}