Java Code Examples for org.apache.hadoop.hdfs.MiniDFSCluster#restartNameNode()

The following examples show how to use org.apache.hadoop.hdfs.MiniDFSCluster#restartNameNode() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: TestStartup.java From hadoop with Apache License 2.0

6 votes

/**
 * Verify the following scenario.
 * 1. NN restarts.
 * 2. Heartbeat RPC will retry and succeed. NN asks DN to reregister.
 * 3. After reregistration completes, DN will send Heartbeat, followed by
 *    Blockreport.
 * 4. NN will mark DatanodeStorageInfo#blockContentsStale to false.
 * @throws Exception
 */
@Test(timeout = 60000)
public void testStorageBlockContentsStaleAfterNNRestart() throws Exception {
  MiniDFSCluster dfsCluster = null;
  try {
    Configuration config = new Configuration();
    dfsCluster = new MiniDFSCluster.Builder(config).numDataNodes(1).build();
    dfsCluster.waitActive();
    dfsCluster.restartNameNode(true);
    BlockManagerTestUtil.checkHeartbeat(
        dfsCluster.getNamesystem().getBlockManager());
    MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    ObjectName mxbeanNameFsns = new ObjectName(
        "Hadoop:service=NameNode,name=FSNamesystemState");
    Integer numStaleStorages = (Integer) (mbs.getAttribute(
        mxbeanNameFsns, "NumStaleStorages"));
    assertEquals(0, numStaleStorages.intValue());
  } finally {
    if (dfsCluster != null) {
      dfsCluster.shutdown();
    }
  }

  return;
}

Example 2

Source File: TestINodeFile.java From big-c with Apache License 2.0

5 votes

private void ensureClusterRestartFails(MiniDFSCluster cluster) {
  try {
    cluster.restartNameNode();
    fail("Cluster should not have successfully started");
  } catch (Exception expected) {
    LOG.info("Expected exception thrown " + expected);
  }
  assertFalse(cluster.isClusterUp());
}

Example 3

Source File: TestBookKeeperAsHASharedDir.java From hadoop with Apache License 2.0

5 votes

private void assertCanNotStartNamenode(MiniDFSCluster cluster, int nnIndex) {
  try {
    cluster.restartNameNode(nnIndex, false);
    fail("Should not have been able to start NN" + (nnIndex)
        + " without shared dir");
  } catch (IOException ioe) {
    LOG.info("Got expected exception", ioe);
    GenericTestUtils.assertExceptionContains(
        "storage directory does not exist or is not accessible", ioe);
  }
}

Example 4

Source File: TestBookKeeperAsHASharedDir.java From big-c with Apache License 2.0

5 votes

private void assertCanStartHANameNodes(MiniDFSCluster cluster,
    Configuration conf, String path) throws ServiceFailedException,
    IOException, URISyntaxException, InterruptedException {
  // Now should be able to start both NNs. Pass "false" here so that we don't
  // try to waitActive on all NNs, since the second NN doesn't exist yet.
  cluster.restartNameNode(0, false);
  cluster.restartNameNode(1, true);

  // Make sure HA is working.
  cluster
      .getNameNode(0)
      .getRpcServer()
      .transitionToActive(
          new StateChangeRequestInfo(RequestSource.REQUEST_BY_USER));
  FileSystem fs = null;
  try {
    Path newPath = new Path(path);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(newPath));
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
        cluster.getNameNode(1));
    assertTrue(NameNodeAdapter.getFileInfo(cluster.getNameNode(1),
        newPath.toString(), false).isDir());
  } finally {
    if (fs != null) {
      fs.close();
    }
  }
}

Example 5

Source File: TestNNWithQJM.java From big-c with Apache License 2.0

5 votes

@Test (timeout = 30000)
public void testLogAndRestart() throws IOException {
  conf.set(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY,
      MiniDFSCluster.getBaseDirectory() + "/TestNNWithQJM/image");
  conf.set(DFSConfigKeys.DFS_NAMENODE_EDITS_DIR_KEY,
      mjc.getQuorumJournalURI("myjournal").toString());
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .numDataNodes(0)
    .manageNameDfsDirs(false)
    .build();
  try {
    cluster.getFileSystem().mkdirs(TEST_PATH);
    
    // Restart the NN and make sure the edit was persisted
    // and loaded again
    cluster.restartNameNode();
    
    assertTrue(cluster.getFileSystem().exists(TEST_PATH));
    cluster.getFileSystem().mkdirs(TEST_PATH_2);
    
    // Restart the NN again and make sure both edits are persisted.
    cluster.restartNameNode();
    assertTrue(cluster.getFileSystem().exists(TEST_PATH));
    assertTrue(cluster.getFileSystem().exists(TEST_PATH_2));
  } finally {
    cluster.shutdown();
  }
}

Example 6

Source File: TestBookKeeperAsHASharedDir.java From hadoop with Apache License 2.0

5 votes

private void assertCanStartHANameNodes(MiniDFSCluster cluster,
    Configuration conf, String path) throws ServiceFailedException,
    IOException, URISyntaxException, InterruptedException {
  // Now should be able to start both NNs. Pass "false" here so that we don't
  // try to waitActive on all NNs, since the second NN doesn't exist yet.
  cluster.restartNameNode(0, false);
  cluster.restartNameNode(1, true);

  // Make sure HA is working.
  cluster
      .getNameNode(0)
      .getRpcServer()
      .transitionToActive(
          new StateChangeRequestInfo(RequestSource.REQUEST_BY_USER));
  FileSystem fs = null;
  try {
    Path newPath = new Path(path);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(newPath));
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
        cluster.getNameNode(1));
    assertTrue(NameNodeAdapter.getFileInfo(cluster.getNameNode(1),
        newPath.toString(), false).isDir());
  } finally {
    if (fs != null) {
      fs.close();
    }
  }
}

Example 7

Source File: TestINodeFile.java From hadoop with Apache License 2.0

5 votes

private void ensureClusterRestartFails(MiniDFSCluster cluster) {
  try {
    cluster.restartNameNode();
    fail("Cluster should not have successfully started");
  } catch (Exception expected) {
    LOG.info("Expected exception thrown " + expected);
  }
  assertFalse(cluster.isClusterUp());
}

Example 8

Source File: TestDFSUpgradeWithHA.java From hadoop with Apache License 2.0

4 votes

/**
 * Make sure that starting a second NN with the -upgrade flag fails if the
 * other NN has already done that.
 */
@Test
public void testCannotUpgradeSecondNameNode() throws IOException,
    URISyntaxException {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(0)
    .build();

    File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
    
    // No upgrade is in progress at the moment.
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    checkPreviousDirExistence(sharedDir, false);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkPreviousDirExistence(sharedDir, true);
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));
    
    // Make sure that starting the second NN with the -upgrade flag fails.
    cluster.getNameNodeInfos()[1].setStartOpt(StartupOption.UPGRADE);
    try {
      cluster.restartNameNode(1, false);
      fail("Should not have been able to start second NN with -upgrade");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "It looks like the shared log is already being upgraded", ioe);
    }
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Example 9

Source File: TestEditLogsDuringFailover.java From big-c with Apache License 2.0

4 votes

@Test
public void testStartup() throws Exception {
  Configuration conf = new Configuration();
  HAUtil.setAllowStandbyReads(conf, true);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(0)
    .build();
  try {
    // During HA startup, both nodes should be in
    // standby and we shouldn't have any edits files
    // in any edits directory!
    List<URI> allDirs = Lists.newArrayList();
    allDirs.addAll(cluster.getNameDirs(0));
    allDirs.addAll(cluster.getNameDirs(1));
    allDirs.add(cluster.getSharedEditsDir(0, 1));
    assertNoEditFiles(allDirs);
    
    // Set the first NN to active, make sure it creates edits
    // in its own dirs and the shared dir. The standby
    // should still have no edits!
    cluster.transitionToActive(0);
    
    assertEditFiles(cluster.getNameDirs(0),
        NNStorage.getInProgressEditsFileName(1));
    assertEditFiles(
        Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
        NNStorage.getInProgressEditsFileName(1));
    assertNoEditFiles(cluster.getNameDirs(1));
    
    cluster.getNameNode(0).getRpcServer().mkdirs("/test",
        FsPermission.createImmutable((short)0755), true);

    // Restarting the standby should not finalize any edits files
    // in the shared directory when it starts up!
    cluster.restartNameNode(1);
    
    assertEditFiles(cluster.getNameDirs(0),
        NNStorage.getInProgressEditsFileName(1));
    assertEditFiles(
        Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
        NNStorage.getInProgressEditsFileName(1));
    assertNoEditFiles(cluster.getNameDirs(1));
    
    // Additionally it should not have applied any in-progress logs
    // at start-up -- otherwise, it would have read half-way into
    // the current log segment, and on the next roll, it would have to
    // either replay starting in the middle of the segment (not allowed)
    // or double-replay the edits (incorrect).
    assertNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
    
    cluster.getNameNode(0).getRpcServer().mkdirs("/test2",
        FsPermission.createImmutable((short)0755), true);

    // If we restart NN0, it'll come back as standby, and we can
    // transition NN1 to active and make sure it reads edits correctly at this point.
    cluster.restartNameNode(0);
    cluster.transitionToActive(1);

    // NN1 should have both the edits that came before its restart, and the edits that
    // came after its restart.
    assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
    assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test2", true));
  } finally {
    cluster.shutdown();
  }
}

Example 10

Source File: TestDataNodeMultipleRegistrations.java From hadoop with Apache License 2.0

4 votes

@Test
public void testDNWithInvalidStorageWithHA() throws Exception {
  MiniDFSNNTopology top = new MiniDFSNNTopology()
    .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
      .addNN(new MiniDFSNNTopology.NNConf("nn0").setClusterId("cluster-1"))
      .addNN(new MiniDFSNNTopology.NNConf("nn1").setClusterId("cluster-1")));

  top.setFederation(true);

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(top)
      .numDataNodes(0).build();
  try {
    cluster.startDataNodes(conf, 1, true, null, null);
    // let the initialization be complete
    Thread.sleep(10000);
    DataNode dn = cluster.getDataNodes().get(0);
    assertTrue("Datanode should be running", dn.isDatanodeUp());
    assertEquals("BPOfferService should be running", 1,
        dn.getAllBpOs().length);
    DataNodeProperties dnProp = cluster.stopDataNode(0);

    cluster.getNameNode(0).stop();
    cluster.getNameNode(1).stop();
    Configuration nn1 = cluster.getConfiguration(0);
    Configuration nn2 = cluster.getConfiguration(1);
    // setting up invalid cluster
    StartupOption.FORMAT.setClusterId("cluster-2");
    DFSTestUtil.formatNameNode(nn1);
    MiniDFSCluster.copyNameDirs(FSNamesystem.getNamespaceDirs(nn1),
        FSNamesystem.getNamespaceDirs(nn2), nn2);
    cluster.restartNameNode(0, false);
    cluster.restartNameNode(1, false);
    cluster.restartDataNode(dnProp);
    
    // let the initialization be complete
    Thread.sleep(10000);
    dn = cluster.getDataNodes().get(0);
    assertFalse("Datanode should have shutdown as only service failed",
        dn.isDatanodeUp());
  } finally {
    cluster.shutdown();
  }
}

Example 11

Source File: TestEditLog.java From hadoop with Apache License 2.0

4 votes

/**
 * Edit log op instances are cached internally using thread-local storage.
 * This test checks that the cached instances are reset in between different
 * transactions processed on the same thread, so that we don't accidentally
 * apply incorrect attributes to an inode.
 *
 * @throws IOException if there is an I/O error
 */
@Test
public void testResetThreadLocalCachedOps() throws IOException {
  Configuration conf = new HdfsConfiguration();
  conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true);
  // Set single handler thread, so all transactions hit same thread-local ops.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_HANDLER_COUNT_KEY, 1);
  MiniDFSCluster cluster = null;
  FileSystem fileSys = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    fileSys = cluster.getFileSystem();

    // Create /dir1 with a default ACL.
    Path dir1 = new Path("/dir1");
    fileSys.mkdirs(dir1);
    List<AclEntry> aclSpec = Lists.newArrayList(
        aclEntry(DEFAULT, USER, "foo", READ_EXECUTE));
    fileSys.modifyAclEntries(dir1, aclSpec);

    // /dir1/dir2 is expected to clone the default ACL.
    Path dir2 = new Path("/dir1/dir2");
    fileSys.mkdirs(dir2);

    // /dir1/file1 is expected to clone the default ACL.
    Path file1 = new Path("/dir1/file1");
    fileSys.create(file1).close();

    // /dir3 is not a child of /dir1, so must not clone the default ACL.
    Path dir3 = new Path("/dir3");
    fileSys.mkdirs(dir3);

    // /file2 is not a child of /dir1, so must not clone the default ACL.
    Path file2 = new Path("/file2");
    fileSys.create(file2).close();

    // Restart and assert the above stated expectations.
    IOUtils.cleanup(LOG, fileSys);
    cluster.restartNameNode();
    fileSys = cluster.getFileSystem();
    assertFalse(fileSys.getAclStatus(dir1).getEntries().isEmpty());
    assertFalse(fileSys.getAclStatus(dir2).getEntries().isEmpty());
    assertFalse(fileSys.getAclStatus(file1).getEntries().isEmpty());
    assertTrue(fileSys.getAclStatus(dir3).getEntries().isEmpty());
    assertTrue(fileSys.getAclStatus(file2).getEntries().isEmpty());
  } finally {
    IOUtils.cleanup(LOG, fileSys);
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Example 12

Source File: TestINodeFile.java From hadoop with Apache License 2.0

4 votes

private void ensureClusterRestartSucceeds(MiniDFSCluster cluster)
    throws IOException {
  cluster.restartNameNode();
  cluster.waitActive();
  assertTrue(cluster.isClusterUp());
}

Example 13

Source File: TestPendingCorruptDnMessages.java From big-c with Apache License 2.0

4 votes

@Test
public void testChangedStorageId() throws IOException, URISyntaxException,
    InterruptedException {
  HdfsConfiguration conf = new HdfsConfiguration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(1)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .build();
  
  try {
    cluster.transitionToActive(0);
    
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    OutputStream out = fs.create(filePath);
    out.write("foo bar baz".getBytes());
    out.close();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
        cluster.getNameNode(1));
    
    // Change the gen stamp of the block on datanode to go back in time (gen
    // stamps start at 1000)
    ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
    assertTrue(cluster.changeGenStampOfBlock(0, block, 900));
    
    // Stop the DN so the replica with the changed gen stamp will be reported
    // when this DN starts up.
    DataNodeProperties dnProps = cluster.stopDataNode(0);
    
    // Restart the namenode so that when the DN comes up it will see an initial
    // block report.
    cluster.restartNameNode(1, false);
    assertTrue(cluster.restartDataNode(dnProps, true));
    
    // Wait until the standby NN queues up the corrupt block in the pending DN
    // message queue.
    while (cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount() < 1) {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
    }
    
    assertEquals(1, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
    
    // Reformat/restart the DN.
    assertTrue(wipeAndRestartDn(cluster, 0));
    
    // Give the DN time to start up and register, which will cause the
    // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
    String newStorageId = "";
    do {
      ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
      newStorageId = getRegisteredDatanodeUid(cluster, 1);
      System.out.println("====> oldStorageId: " + oldStorageId +
          " newStorageId: " + newStorageId);
    } while (newStorageId.equals(oldStorageId));
    
    assertEquals(0, cluster.getNamesystem(1).getBlockManager()
        .getPendingDataNodeMessageCount());
    
    // Now try to fail over.
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
  } finally {
    cluster.shutdown();
  }
}

Example 14

Source File: TestDFSUpgradeWithHA.java From hadoop with Apache License 2.0

4 votes

/**
 * Test rollback with NFS shared dir.
 */
@Test
public void testRollbackWithNfs() throws Exception {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHATopology())
        .numDataNodes(0)
        .build();

    File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
    
    // No upgrade is in progress at the moment.
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    checkPreviousDirExistence(sharedDir, false);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkPreviousDirExistence(sharedDir, true);
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    cluster.restartNameNode(1);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, true);
    checkPreviousDirExistence(sharedDir, true);
    assertCTimesEqual(cluster);
    
    // Now shut down the cluster and do the rollback.
    Collection<URI> nn1NameDirs = cluster.getNameDirs(0);
    cluster.shutdown();

    conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
    NameNode.doRollback(conf, false);

    // The rollback operation should have rolled back the first NN's local
    // dirs, and the shared dir, but not the other NN's dirs. Those have to be
    // done by bootstrapping the standby.
    checkNnPreviousDirExistence(cluster, 0, false);
    checkPreviousDirExistence(sharedDir, false);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Example 15

Source File: TestDFSUpgradeWithHA.java From hadoop with Apache License 2.0

4 votes

@Test
public void testFinalizeWithJournalNodes() throws IOException,
    URISyntaxException {
  MiniQJMHACluster qjCluster = null;
  FileSystem fs = null;
  try {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder()
        .numDataNodes(0);
    qjCluster = builder.build();

    MiniDFSCluster cluster = qjCluster.getDfsCluster();
    
    // No upgrade is in progress at the moment.
    checkJnPreviousDirExistence(qjCluster, false);
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));

    final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster);
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    assertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster));
    
    assertTrue(fs.mkdirs(new Path("/foo2")));

    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkJnPreviousDirExistence(qjCluster, true);
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    cluster.restartNameNode(1);

    final long cidDuringUpgrade = getCommittedTxnIdValue(qjCluster);
    assertTrue(cidDuringUpgrade > cidBeforeUpgrade);

    runFinalizeCommand(cluster);

    assertEquals(cidDuringUpgrade, getCommittedTxnIdValue(qjCluster));
    checkClusterPreviousDirExistence(cluster, false);
    checkJnPreviousDirExistence(qjCluster, false);
    assertCTimesEqual(cluster);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (qjCluster != null) {
      qjCluster.shutdown();
    }
  }
}

Example 16

Source File: TestDFSUpgradeWithHA.java From big-c with Apache License 2.0

4 votes

@Test
public void testRollbackWithJournalNodes() throws IOException,
    URISyntaxException {
  MiniQJMHACluster qjCluster = null;
  FileSystem fs = null;
  try {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder()
        .numDataNodes(0);
    qjCluster = builder.build();

    MiniDFSCluster cluster = qjCluster.getDfsCluster();
    
    // No upgrade is in progress at the moment.
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    checkJnPreviousDirExistence(qjCluster, false);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));

    final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster);

    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkJnPreviousDirExistence(qjCluster, true);
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));

    final long cidDuringUpgrade = getCommittedTxnIdValue(qjCluster);
    assertTrue(cidDuringUpgrade > cidBeforeUpgrade);

    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    cluster.restartNameNode(1);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, true);
    checkJnPreviousDirExistence(qjCluster, true);
    assertCTimesEqual(cluster);
    
    // Shut down the NNs, but deliberately leave the JNs up and running.
    Collection<URI> nn1NameDirs = cluster.getNameDirs(0);
    cluster.shutdown();

    conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
    NameNode.doRollback(conf, false);

    final long cidAfterRollback = getCommittedTxnIdValue(qjCluster);
    assertTrue(cidBeforeUpgrade < cidAfterRollback);
    // make sure the committedTxnId has been reset correctly after rollback
    assertTrue(cidDuringUpgrade > cidAfterRollback);

    // The rollback operation should have rolled back the first NN's local
    // dirs, and the shared dir, but not the other NN's dirs. Those have to be
    // done by bootstrapping the standby.
    checkNnPreviousDirExistence(cluster, 0, false);
    checkJnPreviousDirExistence(qjCluster, false);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (qjCluster != null) {
      qjCluster.shutdown();
    }
  }
}

Example 17

Source File: TestDFSUpgradeWithHA.java From big-c with Apache License 2.0

4 votes

/**
 * Make sure that an HA NN can successfully upgrade when configured using
 * JournalNodes.
 */
@Test
public void testUpgradeWithJournalNodes() throws IOException,
    URISyntaxException {
  MiniQJMHACluster qjCluster = null;
  FileSystem fs = null;
  try {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder()
        .numDataNodes(0);
    qjCluster = builder.build();

    MiniDFSCluster cluster = qjCluster.getDfsCluster();
    
    // No upgrade is in progress at the moment.
    checkJnPreviousDirExistence(qjCluster, false);
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));

    // get the value of the committedTxnId in journal nodes
    final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster);

    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkJnPreviousDirExistence(qjCluster, true);

    assertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster));
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));

    assertTrue(getCommittedTxnIdValue(qjCluster) > cidBeforeUpgrade);
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    // Now restart NN1 and make sure that we can do ops against that as well.
    cluster.restartNameNode(1);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    assertTrue(fs.mkdirs(new Path("/foo4")));
    
    assertCTimesEqual(cluster);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (qjCluster != null) {
      qjCluster.shutdown();
    }
  }
}

Example 18

Source File: TestDFSUpgradeWithHA.java From hadoop with Apache License 2.0

4 votes

/**
 * Ensure that an admin cannot finalize an HA upgrade without at least one NN
 * being active.
 */
@Test
public void testCannotFinalizeIfNoActive() throws IOException,
    URISyntaxException {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHATopology())
        .numDataNodes(0)
        .build();

    File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
    
    // No upgrade is in progress at the moment.
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    checkPreviousDirExistence(sharedDir, false);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkPreviousDirExistence(sharedDir, true);
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    // Now restart NN1 and make sure that we can do ops against that as well.
    cluster.restartNameNode(1);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    assertTrue(fs.mkdirs(new Path("/foo4")));
    
    assertCTimesEqual(cluster);
    
    // Now there's no active NN.
    cluster.transitionToStandby(1);

    try {
      runFinalizeCommand(cluster);
      fail("Should not have been able to finalize upgrade with no NN active");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "Cannot finalize with no NameNode active", ioe);
    }
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Example 19

Source File: TestEditLogsDuringFailover.java From hadoop with Apache License 2.0

4 votes

@Test
public void testStartup() throws Exception {
  Configuration conf = new Configuration();
  HAUtil.setAllowStandbyReads(conf, true);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(0)
    .build();
  try {
    // During HA startup, both nodes should be in
    // standby and we shouldn't have any edits files
    // in any edits directory!
    List<URI> allDirs = Lists.newArrayList();
    allDirs.addAll(cluster.getNameDirs(0));
    allDirs.addAll(cluster.getNameDirs(1));
    allDirs.add(cluster.getSharedEditsDir(0, 1));
    assertNoEditFiles(allDirs);
    
    // Set the first NN to active, make sure it creates edits
    // in its own dirs and the shared dir. The standby
    // should still have no edits!
    cluster.transitionToActive(0);
    
    assertEditFiles(cluster.getNameDirs(0),
        NNStorage.getInProgressEditsFileName(1));
    assertEditFiles(
        Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
        NNStorage.getInProgressEditsFileName(1));
    assertNoEditFiles(cluster.getNameDirs(1));
    
    cluster.getNameNode(0).getRpcServer().mkdirs("/test",
        FsPermission.createImmutable((short)0755), true);

    // Restarting the standby should not finalize any edits files
    // in the shared directory when it starts up!
    cluster.restartNameNode(1);
    
    assertEditFiles(cluster.getNameDirs(0),
        NNStorage.getInProgressEditsFileName(1));
    assertEditFiles(
        Collections.singletonList(cluster.getSharedEditsDir(0, 1)),
        NNStorage.getInProgressEditsFileName(1));
    assertNoEditFiles(cluster.getNameDirs(1));
    
    // Additionally it should not have applied any in-progress logs
    // at start-up -- otherwise, it would have read half-way into
    // the current log segment, and on the next roll, it would have to
    // either replay starting in the middle of the segment (not allowed)
    // or double-replay the edits (incorrect).
    assertNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
    
    cluster.getNameNode(0).getRpcServer().mkdirs("/test2",
        FsPermission.createImmutable((short)0755), true);

    // If we restart NN0, it'll come back as standby, and we can
    // transition NN1 to active and make sure it reads edits correctly at this point.
    cluster.restartNameNode(0);
    cluster.transitionToActive(1);

    // NN1 should have both the edits that came before its restart, and the edits that
    // came after its restart.
    assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
    assertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test2", true));
  } finally {
    cluster.shutdown();
  }
}

Example 20

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

@Override
void run(MiniDFSCluster cluster) throws IOException {
  cluster.restartNameNode(0);
  cluster.transitionToActive(1);
}