Java Code Examples for org.apache.hadoop.hdfs.AppendTestUtil#write()

The following examples show how to use org.apache.hadoop.hdfs.AppendTestUtil#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public void doAnAction() throws Exception {
  FSDataOutputStream stm = fs.create(path, true);
  try {
    AppendTestUtil.write(stm, 0, 100);
    stm.hflush();
    loopRecoverLease(fsOtherUser, path);
    AppendTestUtil.check(fs, path, 100);
  } finally {
    try {
      stm.close();
    } catch (IOException e) {
      // should expect this since we lost the lease
    }
  }
}
 
Example 2
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public void doAnAction() throws Exception {
  FSDataOutputStream stm = fs.create(path, true);
  try {
    AppendTestUtil.write(stm, 0, 100);
    stm.hflush();
    loopRecoverLease(fsOtherUser, path);
    AppendTestUtil.check(fs, path, 100);
  } finally {
    try {
      stm.close();
    } catch (IOException e) {
      // should expect this since we lost the lease
    }
  }
}
 
Example 3
Source File: TestDFSConcurrentFileOperations.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public void testLeaseRecoveryOnTrashedFile() throws Exception {
  Configuration conf = new Configuration();
  
  conf.setLong("dfs.block.size", blockSize);
  
  init(conf);
  
  String src = "/file-1";
  String dst = "/file-2";
  Path srcPath = new Path(src);
  Path dstPath = new Path(dst);
  FSDataOutputStream fos = fs.create(srcPath);

  AppendTestUtil.write(fos, 0, writeSize);
  fos.sync();
  
  // renaming a file out from under a client will cause close to fail
  // and result in the lease remaining while the blocks are finalized on
  // the DNs
  fs.rename(srcPath, dstPath);

  try {
    fos.close();
    fail("expected IOException");
  } catch (IOException e) {
    //expected
  }

  FileSystem fs2 = AppendTestUtil.createHdfsWithDifferentUsername(conf);
  AppendTestUtil.recoverFile(cluster, fs2, dstPath);
  AppendTestUtil.check(fs2, dstPath, writeSize);
}
 
Example 4
Source File: TestDNFencing.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Regression test for HDFS-2742. The issue in this bug was:
 * - DN does a block report while file is open. This BR contains
 *   the block in RBW state.
 * - Standby queues the RBW state in PendingDatanodeMessages
 * - Standby processes edit logs during failover. Before fixing
 *   this bug, it was mistakenly applying the RBW reported state
 *   after the block had been completed, causing the block to get
 *   marked corrupt. Instead, we should now be applying the RBW
 *   message on OP_ADD, and then the FINALIZED message on OP_CLOSE.
 */
@Test
public void testBlockReportsWhileFileBeingWritten() throws Exception {
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();
    
    // Block report will include the RBW replica, but will be
    // queued on the StandbyNode.
    cluster.triggerBlockReports();
    
  } finally {
    IOUtils.closeStream(out);
  }

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}
 
Example 5
Source File: TestDNFencing.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Regression test for HDFS-2742. The issue in this bug was:
 * - DN does a block report while file is open. This BR contains
 *   the block in RBW state.
 * - Standby queues the RBW state in PendingDatanodeMessages
 * - Standby processes edit logs during failover. Before fixing
 *   this bug, it was mistakenly applying the RBW reported state
 *   after the block had been completed, causing the block to get
 *   marked corrupt. Instead, we should now be applying the RBW
 *   message on OP_ADD, and then the FINALIZED message on OP_CLOSE.
 */
@Test
public void testBlockReportsWhileFileBeingWritten() throws Exception {
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();
    
    // Block report will include the RBW replica, but will be
    // queued on the StandbyNode.
    cluster.triggerBlockReports();
    
  } finally {
    IOUtils.closeStream(out);
  }

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}
 
Example 6
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 7
Source File: TestDFSIsUnderConstruction.java    From RDFS with Apache License 2.0 4 votes vote down vote up
public void testSecondLastBlockNotReceived() throws Exception {
  String fileName = "/testSecondLastBlockNotReceived";
  Path growingFile = new Path(fileName); 
  FSDataInputStream fis = null;
  FSDataOutputStream fos = fs.create(growingFile, false, 1024, (short)1, 1024);
  try {
    int fileLength = 2096;
    AppendTestUtil.write(fos, 0, fileLength);
    fos.sync();

    fis = fs.open(growingFile);
    for (int i = 0; i < fileLength; i++) {
      fis.read();
    }
    fis.close();

    FSNamesystem fsns = cluster.getNameNode().namesystem;
    INode[] inodes = fsns.dir.getExistingPathINodes(fileName);
    BlockInfo[] bis = ((INodeFile) (inodes[inodes.length - 1])).getBlocks();
    bis[bis.length - 2].setNumBytes(1);

    try {
      fis = fs.open(growingFile);
      TestCase.fail();
    } catch (IOException e) {
    }
    bis[bis.length - 2].setNumBytes(1024);

    bis[bis.length - 1].setNumBytes(1);
    fis = fs.open(growingFile);
    for (int i = 0; i < fileLength; i++) {
      fis.read();
    }
  } finally {
    if (fos != null) {
      fos.close();
    }
    if (fis != null) {
      fis.close();
    }
  }
}
 
Example 8
Source File: BlockReportTestBase.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test for the case where one of the DNs in the pipeline is in the
 * process of doing a block report exactly when the block is closed.
 * In this case, the block report becomes delayed until after the
 * block is marked completed on the NN, and hence it reports an RBW
 * replica for a COMPLETE block. Such a report should not be marked
 * corrupt.
 * This is a regression test for HDFS-2791.
 */
@Test(timeout=300000)
public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception {
  final CountDownLatch brFinished = new CountDownLatch(1);
  DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
    @Override
    protected Object passThrough(InvocationOnMock invocation)
        throws Throwable {
      try {
        return super.passThrough(invocation);
      } finally {
        // inform the test that our block report went through.
        brFinished.countDown();
      }
    }
  };

  final String METHOD_NAME = GenericTestUtils.getMethodName();
  Path filePath = new Path("/" + METHOD_NAME + ".dat");

  // Start a second DN for this test -- we're checking
  // what happens when one of the DNs is slowed for some reason.
  REPL_FACTOR = 2;
  startDNandWait(null, false);

  NameNode nn = cluster.getNameNode();

  FSDataOutputStream out = fs.create(filePath, REPL_FACTOR);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Set up a spy so that we can delay the block report coming
    // from this node.
    DataNode dn = cluster.getDataNodes().get(0);
    DatanodeProtocolClientSideTranslatorPB spy =
      DataNodeTestUtils.spyOnBposToNN(dn, nn);

    Mockito.doAnswer(delayer)
      .when(spy).blockReport(
        Mockito.<DatanodeRegistration>anyObject(),
        Mockito.anyString(),
        Mockito.<StorageBlockReport[]>anyObject(),
        Mockito.<BlockReportContext>anyObject());

    // Force a block report to be generated. The block report will have
    // an RBW replica in it. Wait for the RPC to be sent, but block
    // it before it gets to the NN.
    dn.scheduleAllBlockReport(0);
    delayer.waitForCall();

  } finally {
    IOUtils.closeStream(out);
  }

  // Now that the stream is closed, the NN will have the block in COMPLETE
  // state.
  delayer.proceed();
  brFinished.await();

  // Verify that no replicas are marked corrupt, and that the
  // file is still readable.
  BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
  assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks());
  DFSTestUtil.readFile(fs, filePath);

  // Ensure that the file is readable even from the DN that we futzed with.
  cluster.stopDataNode(1);
  DFSTestUtil.readFile(fs, filePath);
}
 
Example 9
Source File: TestDNFencing.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Another regression test for HDFS-2742. This tests the following sequence:
 * - DN does a block report while file is open. This BR contains
 *   the block in RBW state.
 * - The block report is delayed in reaching the standby.
 * - The file is closed.
 * - The standby processes the OP_ADD and OP_CLOSE operations before
 *   the RBW block report arrives.
 * - The standby should not mark the block as corrupt.
 */
@Test
public void testRBWReportArrivesAfterEdits() throws Exception {
  final CountDownLatch brFinished = new CountDownLatch(1);
  DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
    @Override
    protected Object passThrough(InvocationOnMock invocation)
        throws Throwable {
      try {
        return super.passThrough(invocation);
      } finally {
        // inform the test that our block report went through.
        brFinished.countDown();
      }
    }
  };

  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    DataNode dn = cluster.getDataNodes().get(0);
    DatanodeProtocolClientSideTranslatorPB spy =
      DataNodeTestUtils.spyOnBposToNN(dn, nn2);
    
    Mockito.doAnswer(delayer)
      .when(spy).blockReport(
        Mockito.<DatanodeRegistration>anyObject(),
        Mockito.anyString(),
        Mockito.<StorageBlockReport[]>anyObject(),
        Mockito.<BlockReportContext>anyObject());
    dn.scheduleAllBlockReport(0);
    delayer.waitForCall();
    
  } finally {
    IOUtils.closeStream(out);
  }

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  delayer.proceed();
  brFinished.await();
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}
 
Example 10
Source File: TestDNFencing.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
  int numQueued = 0;
  int numDN = cluster.getDataNodes().size();
  
  // case 1: create file and call hflush after write
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Opening the file will report RBW replicas, but will be
    // queued on the StandbyNode.
    // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
    // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
    //
    cluster.triggerBlockReports();
    numQueued += numDN; // RBW messages

    // The cluster.triggerBlockReports() call above does a full 
    // block report that incurs 3 extra RBW messages
    numQueued += numDN; // RBW messages      
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived messages
  }

  cluster.triggerBlockReports();
  numQueued += numDN;
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 2: append to file and call hflush after write
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 10, 10);
    out.hflush();
    cluster.triggerBlockReports();
    numQueued += numDN * 2; // RBW messages, see comments in case 1
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 3: similar to case 2, except no hflush is called.
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 20, 10);
  } finally {
    // The write operation in the try block is buffered, thus no RBW message
    // is reported yet until the closeStream call here. When closeStream is
    // called, before HDFS-7217 fix, there would be three RBW messages
    // (blockReceiving), plus three FINALIZED messages (blockReceived)
    // delivered to NN. However, because of HDFS-7217 fix, the reporting of
    // RBW  messages is postponed. In this case, they are even overwritten 
    // by the blockReceived messages of the same block when they are waiting
    // to be delivered. All this happens within the closeStream() call.
    // What's delivered to NN is the three blockReceived messages. See 
    //    BPServiceActor#addPendingReplicationBlockInfo 
    //
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }

  cluster.triggerBlockReports();
  numQueued += numDN;

  LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());      

  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}
 
Example 11
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 12
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 13
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void doWriteOverFailoverTest(TestScenario scenario,
    MethodToTestIdempotence methodToTest) throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  // Don't check replication periodically.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    int sizeWritten = 0;
    
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    sizeWritten += BLOCK_AND_A_HALF;
    
    // Make sure all of the blocks are written out before failover.
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    // NOTE: explicitly do *not* make any further metadata calls
    // to the NN here. The next IPC call should be to allocate the next
    // block. Any other call would notice the failover and not test
    // idempotence of the operation (HDFS-3031)
    
    FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
    BlockManagerTestUtil.updateState(ns1.getBlockManager());
    assertEquals(0, ns1.getPendingReplicationBlocks());
    assertEquals(0, ns1.getCorruptReplicaBlocks());
    assertEquals(0, ns1.getMissingBlocksCount());

    // If we're testing allocateBlock()'s idempotence, write another
    // block and a half, so we have to allocate a new block.
    // Otherise, don't write anything, so our next RPC will be
    // completeFile() if we're testing idempotence of that operation.
    if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
      // write another block and a half
      AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
      sizeWritten += BLOCK_AND_A_HALF;
    }
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 14
Source File: TestPipelinesFailover.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void doWriteOverFailoverTest(TestScenario scenario,
    MethodToTestIdempotence methodToTest) throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  // Don't check replication periodically.
  conf.setInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, 1000);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    int sizeWritten = 0;
    
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    sizeWritten += BLOCK_AND_A_HALF;
    
    // Make sure all of the blocks are written out before failover.
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    // NOTE: explicitly do *not* make any further metadata calls
    // to the NN here. The next IPC call should be to allocate the next
    // block. Any other call would notice the failover and not test
    // idempotence of the operation (HDFS-3031)
    
    FSNamesystem ns1 = cluster.getNameNode(1).getNamesystem();
    BlockManagerTestUtil.updateState(ns1.getBlockManager());
    assertEquals(0, ns1.getPendingReplicationBlocks());
    assertEquals(0, ns1.getCorruptReplicaBlocks());
    assertEquals(0, ns1.getMissingBlocksCount());

    // If we're testing allocateBlock()'s idempotence, write another
    // block and a half, so we have to allocate a new block.
    // Otherise, don't write anything, so our next RPC will be
    // completeFile() if we're testing idempotence of that operation.
    if (methodToTest == MethodToTestIdempotence.ALLOCATE_BLOCK) {
      // write another block and a half
      AppendTestUtil.write(stm, sizeWritten, BLOCK_AND_A_HALF);
      sizeWritten += BLOCK_AND_A_HALF;
    }
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, sizeWritten);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 15
Source File: BlockReportTestBase.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test for the case where one of the DNs in the pipeline is in the
 * process of doing a block report exactly when the block is closed.
 * In this case, the block report becomes delayed until after the
 * block is marked completed on the NN, and hence it reports an RBW
 * replica for a COMPLETE block. Such a report should not be marked
 * corrupt.
 * This is a regression test for HDFS-2791.
 */
@Test(timeout=300000)
public void testOneReplicaRbwReportArrivesAfterBlockCompleted() throws Exception {
  final CountDownLatch brFinished = new CountDownLatch(1);
  DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
    @Override
    protected Object passThrough(InvocationOnMock invocation)
        throws Throwable {
      try {
        return super.passThrough(invocation);
      } finally {
        // inform the test that our block report went through.
        brFinished.countDown();
      }
    }
  };

  final String METHOD_NAME = GenericTestUtils.getMethodName();
  Path filePath = new Path("/" + METHOD_NAME + ".dat");

  // Start a second DN for this test -- we're checking
  // what happens when one of the DNs is slowed for some reason.
  REPL_FACTOR = 2;
  startDNandWait(null, false);

  NameNode nn = cluster.getNameNode();

  FSDataOutputStream out = fs.create(filePath, REPL_FACTOR);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Set up a spy so that we can delay the block report coming
    // from this node.
    DataNode dn = cluster.getDataNodes().get(0);
    DatanodeProtocolClientSideTranslatorPB spy =
      DataNodeTestUtils.spyOnBposToNN(dn, nn);

    Mockito.doAnswer(delayer)
      .when(spy).blockReport(
        Mockito.<DatanodeRegistration>anyObject(),
        Mockito.anyString(),
        Mockito.<StorageBlockReport[]>anyObject(),
        Mockito.<BlockReportContext>anyObject());

    // Force a block report to be generated. The block report will have
    // an RBW replica in it. Wait for the RPC to be sent, but block
    // it before it gets to the NN.
    dn.scheduleAllBlockReport(0);
    delayer.waitForCall();

  } finally {
    IOUtils.closeStream(out);
  }

  // Now that the stream is closed, the NN will have the block in COMPLETE
  // state.
  delayer.proceed();
  brFinished.await();

  // Verify that no replicas are marked corrupt, and that the
  // file is still readable.
  BlockManagerTestUtil.updateState(nn.getNamesystem().getBlockManager());
  assertEquals(0, nn.getNamesystem().getCorruptReplicaBlocks());
  DFSTestUtil.readFile(fs, filePath);

  // Ensure that the file is readable even from the DN that we futzed with.
  cluster.stopDataNode(1);
  DFSTestUtil.readFile(fs, filePath);
}
 
Example 16
Source File: TestDNFencing.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Another regression test for HDFS-2742. This tests the following sequence:
 * - DN does a block report while file is open. This BR contains
 *   the block in RBW state.
 * - The block report is delayed in reaching the standby.
 * - The file is closed.
 * - The standby processes the OP_ADD and OP_CLOSE operations before
 *   the RBW block report arrives.
 * - The standby should not mark the block as corrupt.
 */
@Test
public void testRBWReportArrivesAfterEdits() throws Exception {
  final CountDownLatch brFinished = new CountDownLatch(1);
  DelayAnswer delayer = new GenericTestUtils.DelayAnswer(LOG) {
    @Override
    protected Object passThrough(InvocationOnMock invocation)
        throws Throwable {
      try {
        return super.passThrough(invocation);
      } finally {
        // inform the test that our block report went through.
        brFinished.countDown();
      }
    }
  };

  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    DataNode dn = cluster.getDataNodes().get(0);
    DatanodeProtocolClientSideTranslatorPB spy =
      DataNodeTestUtils.spyOnBposToNN(dn, nn2);
    
    Mockito.doAnswer(delayer)
      .when(spy).blockReport(
        Mockito.<DatanodeRegistration>anyObject(),
        Mockito.anyString(),
        Mockito.<StorageBlockReport[]>anyObject(),
        Mockito.<BlockReportContext>anyObject());
    dn.scheduleAllBlockReport(0);
    delayer.waitForCall();
    
  } finally {
    IOUtils.closeStream(out);
  }

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  delayer.proceed();
  brFinished.await();
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}
 
Example 17
Source File: TestDNFencing.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
  int numQueued = 0;
  int numDN = cluster.getDataNodes().size();
  
  // case 1: create file and call hflush after write
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Opening the file will report RBW replicas, but will be
    // queued on the StandbyNode.
    // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
    // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
    //
    cluster.triggerBlockReports();
    numQueued += numDN; // RBW messages

    // The cluster.triggerBlockReports() call above does a full 
    // block report that incurs 3 extra RBW messages
    numQueued += numDN; // RBW messages      
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived messages
  }

  cluster.triggerBlockReports();
  numQueued += numDN;
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 2: append to file and call hflush after write
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 10, 10);
    out.hflush();
    cluster.triggerBlockReports();
    numQueued += numDN * 2; // RBW messages, see comments in case 1
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 3: similar to case 2, except no hflush is called.
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 20, 10);
  } finally {
    // The write operation in the try block is buffered, thus no RBW message
    // is reported yet until the closeStream call here. When closeStream is
    // called, before HDFS-7217 fix, there would be three RBW messages
    // (blockReceiving), plus three FINALIZED messages (blockReceived)
    // delivered to NN. However, because of HDFS-7217 fix, the reporting of
    // RBW  messages is postponed. In this case, they are even overwritten 
    // by the blockReceived messages of the same block when they are waiting
    // to be delivered. All this happens within the closeStream() call.
    // What's delivered to NN is the three blockReceived messages. See 
    //    BPServiceActor#addPendingReplicationBlockInfo 
    //
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }

  cluster.triggerBlockReports();
  numQueued += numDN;

  LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());      

  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}
 
Example 18
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 19
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Tests lease recovery if a client crashes. This approximates the
 * use case of HBase WALs being recovered after a NN failover.
 */
@Test(timeout=30000)
public void testLeaseRecoveryAfterFailover() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    assertTrue(fs.exists(TEST_PATH));

    FileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);
    
    // Fail back to ensure that the block locations weren't lost on the
    // original node.
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF);      
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 20
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void doTestWriteOverFailoverWithDnFail(TestScenario scenario)
    throws Exception {
  Configuration conf = new Configuration();
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(5)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a block and a half
    AppendTestUtil.write(stm, 0, BLOCK_AND_A_HALF);
    
    // Make sure all the blocks are written before failover
    stm.hflush();

    LOG.info("Failing over to NN 1");
    scenario.run(cluster);

    assertTrue(fs.exists(TEST_PATH));
    
    cluster.stopDataNode(0);

    // write another block and a half
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF, BLOCK_AND_A_HALF);
    stm.hflush();
    
    LOG.info("Failing back to NN 0");
    cluster.transitionToStandby(1);
    cluster.transitionToActive(0);
    
    cluster.stopDataNode(1);
    
    AppendTestUtil.write(stm, BLOCK_AND_A_HALF*2, BLOCK_AND_A_HALF);
    stm.hflush();
    
    
    stm.close();
    stm = null;
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_AND_A_HALF * 3);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}