Java Code Examples for org.apache.hadoop.hdfs.AppendTestUtil#check()

The following examples show how to use org.apache.hadoop.hdfs.AppendTestUtil#check() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public void doAnAction() throws Exception {
  FSDataOutputStream stm = fs.create(path, true);
  try {
    AppendTestUtil.write(stm, 0, 100);
    stm.hflush();
    loopRecoverLease(fsOtherUser, path);
    AppendTestUtil.check(fs, path, 100);
  } finally {
    try {
      stm.close();
    } catch (IOException e) {
      // should expect this since we lost the lease
    }
  }
}
 
Example 2
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public void doAnAction() throws Exception {
  FSDataOutputStream stm = fs.create(path, true);
  try {
    AppendTestUtil.write(stm, 0, 100);
    stm.hflush();
    loopRecoverLease(fsOtherUser, path);
    AppendTestUtil.check(fs, path, 100);
  } finally {
    try {
      stm.close();
    } catch (IOException e) {
      // should expect this since we lost the lease
    }
  }
}
 
Example 3
Source File: TestDFSConcurrentFileOperations.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testLeaseRecoveryOnTrashedFile() throws Exception {
  Configuration conf = new Configuration();
  
  conf.setLong("dfs.block.size", blockSize);
  
  init(conf);
  
  String src = "/file-1";
  String dst = "/file-2";
  Path srcPath = new Path(src);
  Path dstPath = new Path(dst);
  FSDataOutputStream fos = fs.create(srcPath);

  AppendTestUtil.write(fos, 0, writeSize);
  fos.sync();
  
  // renaming a file out from under a client will cause close to fail
  // and result in the lease remaining while the blocks are finalized on
  // the DNs
  fs.rename(srcPath, dstPath);

  try {
    fos.close();
    fail("expected IOException");
  } catch (IOException e) {
    //expected
  }

  FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(conf);
  AppendTestUtil.recoverFile(cluster, fs2, dstPath);
  AppendTestUtil.check(fs2, dstPath, writeSize);
}
 
Example 4
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void doWriteOverFailoverTest(TestScenario scenario,
    MethodToTestIdempotence methodToTest) throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  // Don't check replication periodically.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    int sizeWritten = 0;
    
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    sizeWritten += BLOCK_AND_A_HALF;
    
    // Make sure all of the blocks are written out before failover.
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    // NOTE: explicitly do *not* make any further metadata calls
    // to the NN here. The next IPC call should be to allocate the next
    // block. Any other call would notice the failover and not test
    // idempotence of the operation (HDFS-3031)
    
    FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
    BlockManagerTestUtil.updateState(ns1.getBlockManager());
    assertEquals(0, ns1.getPendingReplicationBlocks());
    assertEquals(0, ns1.getCorruptReplicaBlocks());
    assertEquals(0, ns1.getMissingBlocksCount());

    // If we're testing allocateBlock()'s idempotence, write another
    // block and a half, so we have to allocate a new block.
    // Otherise, don't write anything, so our next RPC will be
    // completeFile() if we're testing idempotence of that operation.
    if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
      // write another block and a half
      AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
      sizeWritten += BLOCK_AND_A_HALF;
    }
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 5
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 6
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 7
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 8
Source File: TestDNFencing.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
  int numQueued = 0;
  int numDN = cluster.getDataNodes().size();
  
  // case 1: create file and call hflush after write
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Opening the file will report RBW replicas, but will be
    // queued on the StandbyNode.
    // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
    // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
    //
    cluster.triggerBlockReports();
    numQueued += numDN; // RBW messages

    // The cluster.triggerBlockReports() call above does a full 
    // block report that incurs 3 extra RBW messages
    numQueued += numDN; // RBW messages      
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived messages
  }

  cluster.triggerBlockReports();
  numQueued += numDN;
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 2: append to file and call hflush after write
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 10, 10);
    out.hflush();
    cluster.triggerBlockReports();
    numQueued += numDN * 2; // RBW messages, see comments in case 1
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 3: similar to case 2, except no hflush is called.
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 20, 10);
  } finally {
    // The write operation in the try block is buffered, thus no RBW message
    // is reported yet until the closeStream call here. When closeStream is
    // called, before HDFS-7217 fix, there would be three RBW messages
    // (blockReceiving), plus three FINALIZED messages (blockReceived)
    // delivered to NN. However, because of HDFS-7217 fix, the reporting of
    // RBW  messages is postponed. In this case, they are even overwritten 
    // by the blockReceived messages of the same block when they are waiting
    // to be delivered. All this happens within the closeStream() call.
    // What's delivered to NN is the three blockReceived messages. See 
    //    BPServiceActor#addPendingReplicationBlockInfo 
    //
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }

  cluster.triggerBlockReports();
  numQueued += numDN;

  LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());      

  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}
 
Example 9
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void doWriteOverFailoverTest(TestScenario scenario,
    MethodToTestIdempotence methodToTest) throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  // Don't check replication periodically.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    int sizeWritten = 0;
    
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    sizeWritten += BLOCK_AND_A_HALF;
    
    // Make sure all of the blocks are written out before failover.
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    // NOTE: explicitly do *not* make any further metadata calls
    // to the NN here. The next IPC call should be to allocate the next
    // block. Any other call would notice the failover and not test
    // idempotence of the operation (HDFS-3031)
    
    FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
    BlockManagerTestUtil.updateState(ns1.getBlockManager());
    assertEquals(0, ns1.getPendingReplicationBlocks());
    assertEquals(0, ns1.getCorruptReplicaBlocks());
    assertEquals(0, ns1.getMissingBlocksCount());

    // If we're testing allocateBlock()'s idempotence, write another
    // block and a half, so we have to allocate a new block.
    // Otherise, don't write anything, so our next RPC will be
    // completeFile() if we're testing idempotence of that operation.
    if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
      // write another block and a half
      AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
      sizeWritten += BLOCK_AND_A_HALF;
    }
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 10
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 11
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 12
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 13
Source File: TestDNFencing.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
  int numQueued = 0;
  int numDN = cluster.getDataNodes().size();
  
  // case 1: create file and call hflush after write
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Opening the file will report RBW replicas, but will be
    // queued on the StandbyNode.
    // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
    // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
    //
    cluster.triggerBlockReports();
    numQueued += numDN; // RBW messages

    // The cluster.triggerBlockReports() call above does a full 
    // block report that incurs 3 extra RBW messages
    numQueued += numDN; // RBW messages      
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived messages
  }

  cluster.triggerBlockReports();
  numQueued += numDN;
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 2: append to file and call hflush after write
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 10, 10);
    out.hflush();
    cluster.triggerBlockReports();
    numQueued += numDN * 2; // RBW messages, see comments in case 1
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 3: similar to case 2, except no hflush is called.
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 20, 10);
  } finally {
    // The write operation in the try block is buffered, thus no RBW message
    // is reported yet until the closeStream call here. When closeStream is
    // called, before HDFS-7217 fix, there would be three RBW messages
    // (blockReceiving), plus three FINALIZED messages (blockReceived)
    // delivered to NN. However, because of HDFS-7217 fix, the reporting of
    // RBW  messages is postponed. In this case, they are even overwritten 
    // by the blockReceived messages of the same block when they are waiting
    // to be delivered. All this happens within the closeStream() call.
    // What's delivered to NN is the three blockReceived messages. See 
    //    BPServiceActor#addPendingReplicationBlockInfo 
    //
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }

  cluster.triggerBlockReports();
  numQueued += numDN;

  LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());      

  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}