org.apache.hadoop.hdfs.MiniDFSCluster#transitionToActive

Source File: TestEditLogTailer.java From big-c with Apache License 2.0

6 votes

private static void testStandbyTriggersLogRolls(int activeIndex)
    throws Exception {
  Configuration conf = new Configuration();
  // Roll every 1s
  conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  
  // Have to specify IPC ports so the NNs can talk to each other.
  MiniDFSNNTopology topology = new MiniDFSNNTopology()
    .addNameservice(new MiniDFSNNTopology.NSConf("ns1")
      .addNN(new MiniDFSNNTopology.NNConf("nn1").setIpcPort(10031))
      .addNN(new MiniDFSNNTopology.NNConf("nn2").setIpcPort(10032)));

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(topology)
    .numDataNodes(0)
    .build();
  try {
    cluster.transitionToActive(activeIndex);
    waitForLogRollInSharedDir(cluster, 3);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestHarFileSystemWithHA.java From hadoop with Apache License 2.0

6 votes

/**
 * Test that the HarFileSystem works with underlying HDFS URIs that have no
 * port specified, as is often the case with an HA setup.
 */
@Test
public void testHarUriWithHaUriWithNoPort() throws Exception {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf)
        .numDataNodes(1)
        .nnTopology(MiniDFSNNTopology.simpleHATopology())
        .build();
    cluster.transitionToActive(0);
    HATestUtil.setFailoverConfigurations(cluster, conf);
    
    createEmptyHarArchive(HATestUtil.configureFailoverFs(cluster, conf),
        TEST_HAR_PATH);
    
    URI failoverUri = FileSystem.getDefaultUri(conf);
    Path p = new Path("har://hdfs-" + failoverUri.getAuthority() + TEST_HAR_PATH);
    p.getFileSystem(conf);
  } finally {
    cluster.shutdown();
  }
}

Source File: TestWebHDFSForHA.java From big-c with Apache License 2.0

5 votes

@Test
public void testSecureHAToken() throws IOException, InterruptedException {
  Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
  conf.setBoolean(DFSConfigKeys
          .DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);

  MiniDFSCluster cluster = null;
  WebHdfsFileSystem fs = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo)
        .numDataNodes(0).build();

    HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
    cluster.waitActive();

    fs = spy((WebHdfsFileSystem) FileSystem.get(WEBHDFS_URI, conf));
    FileSystemTestHelper.addFileSystemForTesting(WEBHDFS_URI, conf, fs);

    cluster.transitionToActive(0);
    Token<?> token = fs.getDelegationToken(null);

    cluster.shutdownNameNode(0);
    cluster.transitionToActive(1);
    token.renew(conf);
    token.cancel(conf);
    verify(fs).renewDelegationToken(token);
    verify(fs).cancelDelegationToken(token);
  } finally {
    IOUtils.cleanup(null, fs);
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestWebHDFSForHA.java From hadoop with Apache License 2.0

5 votes

@Test
public void testFailoverAfterOpen() throws IOException {
  Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
  conf.set(FS_DEFAULT_NAME_KEY, HdfsConstants.HDFS_URI_SCHEME +
      "://" + LOGICAL_NAME);
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  final Path p = new Path("/test");
  final byte[] data = "Hello".getBytes();

  try {
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo)
            .numDataNodes(1).build();

    HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);

    cluster.waitActive();

    fs = FileSystem.get(WEBHDFS_URI, conf);
    cluster.transitionToActive(1);

    FSDataOutputStream out = fs.create(p);
    cluster.shutdownNameNode(1);
    cluster.transitionToActive(0);

    out.write(data);
    out.close();
    FSDataInputStream in = fs.open(p);
    byte[] buf = new byte[data.length];
    IOUtils.readFully(in, buf, 0, buf.length);
    Assert.assertArrayEquals(data, buf);
  } finally {
    IOUtils.cleanup(null, fs);
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestBookKeeperAsHASharedDir.java From big-c with Apache License 2.0

5 votes

/**
 * Test simple HA failover usecase with BK
 */
@Test
public void testFailoverWithBK() throws Exception {
  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
             BKJMUtil.createJournalURI("/hotfailover").toString());
    BKJMUtil.addJournalManagerDefinition(conf);

    cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .numDataNodes(0)
      .manageNameDfsSharedDirs(false)
      .build();
    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);

    cluster.waitActive();
    cluster.transitionToActive(0);

    Path p = new Path("/testBKJMfailover");

    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);

    fs.mkdirs(p);
    cluster.shutdownNameNode(0);

    cluster.transitionToActive(1);

    assertTrue(fs.exists(p));
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: HdfsSortedOplogOrganizerJUnitTest.java From gemfirexd-oss with Apache License 2.0

5 votes

public void testFlushWithNameNodeHA() throws Exception {
  deleteMiniClusterDir();
  int nn1port = AvailablePortHelper.getRandomAvailableTCPPort();
  int nn2port = AvailablePortHelper.getRandomAvailableTCPPort();
  
  MiniDFSCluster cluster = initMiniHACluster(nn1port, nn2port);
  
  initClientHAConf(nn1port, nn2port);
  HDFSStoreImpl store1 = (HDFSStoreImpl) hsf.create("Store-1");
  
  regionfactory.setHDFSStoreName(store1.getName());
  Region<Object, Object> region1 = regionfactory.create("region-1");
  HdfsRegionManager regionManager1 = ((LocalRegion)region1).getHdfsRegionManager();
  
  HoplogOrganizer<SortedHoplogPersistedEvent> organizer = new HdfsSortedOplogOrganizer(regionManager1, 0);
  ArrayList<TestEvent> items = new ArrayList<TestEvent>();
  items.add(new TestEvent(("1"), ("1-1")));
  organizer.flush(items.iterator(), items.size());

  TestUtils.addExpectedException("java.io.EOFException");
  NameNode nnode2 = cluster.getNameNode(1);
  assertTrue(nnode2.isStandbyState());
  cluster.shutdownNameNode(0);
  cluster.transitionToActive(1);
  assertFalse(nnode2.isStandbyState());
  
  items.add(new TestEvent(("4"), ("1-4")));
  organizer.flush(items.iterator(), items.size());
  byte[] keyBytes1 = BlobHelper.serializeToBlob("1");
  byte[] keyBytes4 = BlobHelper.serializeToBlob("4");
  assertEquals("1-1", organizer.read(keyBytes1).getValue());
  assertEquals("1-4", organizer.read(keyBytes4).getValue());
  TestUtils.removeExpectedException("java.io.EOFException");
  
  region1.destroyRegion();
  store1.destroy();
  cluster.shutdown();
  FileUtils.deleteDirectory(new File("hdfs-test-cluster"));
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

/**
 * Stress test for pipeline/lease recovery. Starts a number of
 * threads, each of which creates a file and has another client
 * break the lease. While these threads run, failover proceeds
 * back and forth between two namenodes.
 */
@Test(timeout=STRESS_RUNTIME*3)
public void testPipelineRecoveryStress() throws Exception {
  HAStressTestHarness harness = new HAStressTestHarness();
  // Disable permissions so that another user can recover the lease.
  harness.conf.setBoolean(
      DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  // This test triggers rapid NN failovers.  The client retry policy uses an
  // exponential backoff.  This can quickly lead to long sleep times and even
  // timeout the whole test.  Cap the sleep time at 1s to prevent this.
  harness.conf.setInt(DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY,
    1000);

  final MiniDFSCluster cluster = harness.startCluster();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    
    FileSystem fs = harness.getFailoverFs();
    DistributedFileSystem fsAsOtherUser = createFsAsOtherUser(
        cluster, harness.conf);
    
    TestContext testers = new TestContext();
    for (int i = 0; i < STRESS_NUM_THREADS; i++) {
      Path p = new Path("/test-" + i);
      testers.addThread(new PipelineTestThread(
          testers, fs, fsAsOtherUser, p));
    }
    
    // Start a separate thread which will make sure that replication
    // happens quickly by triggering deletion reports and replication
    // work calculation frequently.
    harness.addReplicationTriggerThread(500);
    harness.addFailoverThread(5000);
    harness.startThreads();
    testers.startThreads();
    
    testers.waitFor(STRESS_RUNTIME);
    testers.stop();
    harness.stopThreads();
  } finally {
    System.err.println("===========================\n\n\n\n");
    harness.shutdown();
  }
}

Source File: TestBookKeeperAsHASharedDir.java From hadoop with Apache License 2.0

4 votes

/**
 * Test that two namenodes can't continue as primary
 */
@Test
public void testMultiplePrimariesStarted() throws Exception {
  Path p1 = new Path("/testBKJMMultiplePrimary");

  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
             BKJMUtil.createJournalURI("/hotfailoverMultiple").toString());
    BKJMUtil.addJournalManagerDefinition(conf);

    cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .numDataNodes(0)
      .manageNameDfsSharedDirs(false)
      .checkExitOnShutdown(false)
      .build();
    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);
    cluster.waitActive();
    cluster.transitionToActive(0);

    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    fs.mkdirs(p1);
    nn1.getRpcServer().rollEditLog();
    cluster.transitionToActive(1);
    fs = cluster.getFileSystem(0); // get the older active server.

    try {
      fs.delete(p1, true);
      fail("Log update on older active should cause it to exit");
    } catch (RemoteException re) {
      assertTrue(re.getClassName().contains("ExitException"));
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestDFSUpgradeWithHA.java From big-c with Apache License 2.0

4 votes

@Test
public void testFinalizeWithJournalNodes() throws IOException,
    URISyntaxException {
  MiniQJMHACluster qjCluster = null;
  FileSystem fs = null;
  try {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder()
        .numDataNodes(0);
    qjCluster = builder.build();

    MiniDFSCluster cluster = qjCluster.getDfsCluster();
    
    // No upgrade is in progress at the moment.
    checkJnPreviousDirExistence(qjCluster, false);
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));

    final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster);
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    assertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster));
    
    assertTrue(fs.mkdirs(new Path("/foo2")));

    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkJnPreviousDirExistence(qjCluster, true);
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    cluster.restartNameNode(1);

    final long cidDuringUpgrade = getCommittedTxnIdValue(qjCluster);
    assertTrue(cidDuringUpgrade > cidBeforeUpgrade);

    runFinalizeCommand(cluster);

    assertEquals(cidDuringUpgrade, getCommittedTxnIdValue(qjCluster));
    checkClusterPreviousDirExistence(cluster, false);
    checkJnPreviousDirExistence(qjCluster, false);
    assertCTimesEqual(cluster);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (qjCluster != null) {
      qjCluster.shutdown();
    }
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

private void doWriteOverFailoverTest(TestScenario scenario,
    MethodToTestIdempotence methodToTest) throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  // Don't check replication periodically.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    int sizeWritten = 0;
    
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    sizeWritten += BLOCK_AND_A_HALF;
    
    // Make sure all of the blocks are written out before failover.
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    // NOTE: explicitly do *not* make any further metadata calls
    // to the NN here. The next IPC call should be to allocate the next
    // block. Any other call would notice the failover and not test
    // idempotence of the operation (HDFS-3031)
    
    FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
    BlockManagerTestUtil.updateState(ns1.getBlockManager());
    assertEquals(0, ns1.getPendingReplicationBlocks());
    assertEquals(0, ns1.getCorruptReplicaBlocks());
    assertEquals(0, ns1.getMissingBlocksCount());

    // If we're testing allocateBlock()'s idempotence, write another
    // block and a half, so we have to allocate a new block.
    // Otherise, don't write anything, so our next RPC will be
    // completeFile() if we're testing idempotence of that operation.
    if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
      // write another block and a half
      AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
      sizeWritten += BLOCK_AND_A_HALF;
    }
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestDFSUpgradeWithHA.java From big-c with Apache License 2.0

4 votes

/**
 * Ensure that an admin cannot finalize an HA upgrade without at least one NN
 * being active.
 */
@Test
public void testCannotFinalizeIfNoActive() throws IOException,
    URISyntaxException {
  MiniDFSCluster cluster = null;
  FileSystem fs = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf)
        .nnTopology(MiniDFSNNTopology.simpleHATopology())
        .numDataNodes(0)
        .build();

    File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
    
    // No upgrade is in progress at the moment.
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    checkPreviousDirExistence(sharedDir, false);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));
    
    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkPreviousDirExistence(sharedDir, true);
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    // Now restart NN1 and make sure that we can do ops against that as well.
    cluster.restartNameNode(1);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    assertTrue(fs.mkdirs(new Path("/foo4")));
    
    assertCTimesEqual(cluster);
    
    // Now there's no active NN.
    cluster.transitionToStandby(1);

    try {
      runFinalizeCommand(cluster);
      fail("Should not have been able to finalize upgrade with no NN active");
    } catch (IOException ioe) {
      GenericTestUtils.assertExceptionContains(
          "Cannot finalize with no NameNode active", ioe);
    }
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestHAMetrics.java From hadoop with Apache License 2.0

4 votes

@Test(timeout = 300000)
public void testHAMetrics() throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
  conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, Integer.MAX_VALUE);

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1)
      .build();
  FileSystem fs = null;
  try {
    cluster.waitActive();
    
    FSNamesystem nn0 = cluster.getNamesystem(0);
    FSNamesystem nn1 = cluster.getNamesystem(1);
    
    assertEquals(nn0.getHAState(), "standby");
    assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
    assertEquals(nn1.getHAState(), "standby");
    assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());

    cluster.transitionToActive(0);
    final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    final ObjectName mxbeanName =
        new ObjectName("Hadoop:service=NameNode,name=NameNodeStatus");
    final Long ltt1 =
        (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
    assertTrue("lastHATransitionTime should be > 0", ltt1 > 0);
    
    assertEquals("active", nn0.getHAState());
    assertEquals(0, nn0.getMillisSinceLastLoadedEdits());
    assertEquals("standby", nn1.getHAState());
    assertTrue(0 < nn1.getMillisSinceLastLoadedEdits());
    
    cluster.transitionToStandby(0);
    final Long ltt2 =
        (Long) mbs.getAttribute(mxbeanName, "LastHATransitionTime");
    assertTrue("lastHATransitionTime should be > " + ltt1, ltt2 > ltt1);
    cluster.transitionToActive(1);
    
    assertEquals("standby", nn0.getHAState());
    assertTrue(0 < nn0.getMillisSinceLastLoadedEdits());
    assertEquals("active", nn1.getHAState());
    assertEquals(0, nn1.getMillisSinceLastLoadedEdits());
    
    Thread.sleep(2000); // make sure standby gets a little out-of-date
    assertTrue(2000 <= nn0.getMillisSinceLastLoadedEdits());
    
    assertEquals(0, nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    DFSTestUtil.createFile(fs, new Path("/foo"),
        10, (short)1, 1L);
    
    assertTrue(0 < nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    long millisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
    
    HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(1),
        cluster.getNameNode(0));
    
    assertEquals(0, nn0.getPendingDataNodeMessageCount());
    assertEquals(0, nn1.getPendingDataNodeMessageCount());
    long newMillisSinceLastLoadedEdits = nn0.getMillisSinceLastLoadedEdits();
    // Since we just waited for the standby to catch up, the time since we
    // last loaded edits should be very low.
    assertTrue("expected " + millisSinceLastLoadedEdits + " > " +
        newMillisSinceLastLoadedEdits,
        millisSinceLastLoadedEdits > newMillisSinceLastLoadedEdits);
  } finally {
    IOUtils.cleanup(LOG, fs);
    cluster.shutdown();
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestHAFsck.java From big-c with Apache License 2.0

4 votes

/**
 * Test that fsck still works with HA enabled.
 */
@Test
public void testHaFsck() throws Exception {
  Configuration conf = new Configuration();
  
  // need some HTTP ports
  MiniDFSNNTopology topology = new MiniDFSNNTopology()
    .addNameservice(new MiniDFSNNTopology.NSConf("ha-nn-uri-0")
      .addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(10051))
      .addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(10052)));
  
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(topology)
    .numDataNodes(0)
    .build();
  FileSystem fs = null;
  try {
    cluster.waitActive();
  
    cluster.transitionToActive(0);
    
    // Make sure conf has the relevant HA configs.
    HATestUtil.setFailoverConfigurations(cluster, conf, "ha-nn-uri-0", 0);
    
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    fs.mkdirs(new Path("/test1"));
    fs.mkdirs(new Path("/test2"));
    
    runFsck(conf);
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    runFsck(conf);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Source File: TestBookKeeperAsHASharedDir.java From big-c with Apache License 2.0

4 votes

/**
 * Test that two namenodes can't continue as primary
 */
@Test
public void testMultiplePrimariesStarted() throws Exception {
  Path p1 = new Path("/testBKJMMultiplePrimary");

  MiniDFSCluster cluster = null;
  try {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY,
             BKJMUtil.createJournalURI("/hotfailoverMultiple").toString());
    BKJMUtil.addJournalManagerDefinition(conf);

    cluster = new MiniDFSCluster.Builder(conf)
      .nnTopology(MiniDFSNNTopology.simpleHATopology())
      .numDataNodes(0)
      .manageNameDfsSharedDirs(false)
      .checkExitOnShutdown(false)
      .build();
    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);
    cluster.waitActive();
    cluster.transitionToActive(0);

    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    fs.mkdirs(p1);
    nn1.getRpcServer().rollEditLog();
    cluster.transitionToActive(1);
    fs = cluster.getFileSystem(0); // get the older active server.

    try {
      fs.delete(p1, true);
      fail("Log update on older active should cause it to exit");
    } catch (RemoteException re) {
      assertTrue(re.getClassName().contains("ExitException"));
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestPipelinesFailover.java From hadoop with Apache License 2.0

4 votes

@Override
void run(MiniDFSCluster cluster) throws IOException {
  cluster.restartNameNode(0);
  cluster.transitionToActive(1);
}

Source File: TestDFSUpgradeWithHA.java From hadoop with Apache License 2.0

4 votes

/**
 * Make sure that an HA NN can successfully upgrade when configured using
 * JournalNodes.
 */
@Test
public void testUpgradeWithJournalNodes() throws IOException,
    URISyntaxException {
  MiniQJMHACluster qjCluster = null;
  FileSystem fs = null;
  try {
    Builder builder = new MiniQJMHACluster.Builder(conf);
    builder.getDfsBuilder()
        .numDataNodes(0);
    qjCluster = builder.build();

    MiniDFSCluster cluster = qjCluster.getDfsCluster();
    
    // No upgrade is in progress at the moment.
    checkJnPreviousDirExistence(qjCluster, false);
    checkClusterPreviousDirExistence(cluster, false);
    assertCTimesEqual(cluster);
    
    // Transition NN0 to active and do some FS ops.
    cluster.transitionToActive(0);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    assertTrue(fs.mkdirs(new Path("/foo1")));

    // get the value of the committedTxnId in journal nodes
    final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster);

    // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
    // flag.
    cluster.shutdownNameNode(1);
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
    cluster.restartNameNode(0, false);
    
    checkNnPreviousDirExistence(cluster, 0, true);
    checkNnPreviousDirExistence(cluster, 1, false);
    checkJnPreviousDirExistence(qjCluster, true);

    assertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster));
    
    // NN0 should come up in the active state when given the -upgrade option,
    // so no need to transition it to active.
    assertTrue(fs.mkdirs(new Path("/foo2")));
    
    // Restart NN0 without the -upgrade flag, to make sure that works.
    cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
    cluster.restartNameNode(0, false);
    
    // Make sure we can still do FS ops after upgrading.
    cluster.transitionToActive(0);
    assertTrue(fs.mkdirs(new Path("/foo3")));

    assertTrue(getCommittedTxnIdValue(qjCluster) > cidBeforeUpgrade);
    
    // Now bootstrap the standby with the upgraded info.
    int rc = BootstrapStandby.run(
        new String[]{"-force"},
        cluster.getConfiguration(1));
    assertEquals(0, rc);
    
    // Now restart NN1 and make sure that we can do ops against that as well.
    cluster.restartNameNode(1);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    assertTrue(fs.mkdirs(new Path("/foo4")));
    
    assertCTimesEqual(cluster);
  } finally {
    if (fs != null) {
      fs.close();
    }
    if (qjCluster != null) {
      qjCluster.shutdown();
    }
  }
}

Source File: TestWebHDFSForHA.java From big-c with Apache License 2.0

4 votes

/**
 * Make sure the WebHdfsFileSystem will retry based on RetriableException when
 * rpcServer is null in NamenodeWebHdfsMethods while NameNode starts up.
 */
@Test (timeout=120000)
public void testRetryWhileNNStartup() throws Exception {
  final Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
  MiniDFSCluster cluster = null;
  final Map<String, Boolean> resultMap = new HashMap<String, Boolean>();

  try {
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo)
        .numDataNodes(0).build();
    HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
    cluster.waitActive();
    cluster.transitionToActive(0);

    final NameNode namenode = cluster.getNameNode(0);
    final NamenodeProtocols rpcServer = namenode.getRpcServer();
    Whitebox.setInternalState(namenode, "rpcServer", null);

    new Thread() {
      @Override
      public void run() {
        boolean result = false;
        FileSystem fs = null;
        try {
          fs = FileSystem.get(WEBHDFS_URI, conf);
          final Path dir = new Path("/test");
          result = fs.mkdirs(dir);
        } catch (IOException e) {
          result = false;
        } finally {
          IOUtils.cleanup(null, fs);
        }
        synchronized (TestWebHDFSForHA.this) {
          resultMap.put("mkdirs", result);
          TestWebHDFSForHA.this.notifyAll();
        }
      }
    }.start();

    Thread.sleep(1000);
    Whitebox.setInternalState(namenode, "rpcServer", rpcServer);
    synchronized (this) {
      while (!resultMap.containsKey("mkdirs")) {
        this.wait();
      }
      Assert.assertTrue(resultMap.get("mkdirs"));
    }
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}

Source File: TestPipelinesFailover.java From big-c with Apache License 2.0

4 votes

/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}

Java Code Examples for org.apache.hadoop.hdfs.MiniDFSCluster#transitionToActive()