Java Code Examples for org.apache.hadoop.fs.FSDataOutputStream#hflush()

The following examples show how to use org.apache.hadoop.fs.FSDataOutputStream#hflush() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: NameNodeConnector.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * The idea for making sure that there is no more than one instance
 * running in an HDFS is to create a file in the HDFS, writes the hostname
 * of the machine on which the instance is running to the file, but did not
 * close the file until it exits. 
 * 
 * This prevents the second instance from running because it can not
 * creates the file while the first one is running.
 * 
 * This method checks if there is any running instance. If no, mark yes.
 * Note that this is an atomic operation.
 * 
 * @return null if there is a running instance;
 *         otherwise, the output stream to the newly created file.
 */
private OutputStream checkAndMarkRunning() throws IOException {
  try {
    if (fs.exists(idPath)) {
      // try appending to it so that it will fail fast if another balancer is
      // running.
      IOUtils.closeStream(fs.append(idPath));
      fs.delete(idPath, true);
    }
    final FSDataOutputStream fsout = fs.create(idPath, false);
    // mark balancer idPath to be deleted during filesystem closure
    fs.deleteOnExit(idPath);
    if (write2IdFile) {
      fsout.writeBytes(InetAddress.getLocalHost().getHostName());
      fsout.hflush();
    }
    return fsout;
  } catch(RemoteException e) {
    if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
      return null;
    } else {
      throw e;
    }
  }
}
 
Example 2
Source File: NameNodeConnector.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * The idea for making sure that there is no more than one instance
 * running in an HDFS is to create a file in the HDFS, writes the hostname
 * of the machine on which the instance is running to the file, but did not
 * close the file until it exits. 
 * 
 * This prevents the second instance from running because it can not
 * creates the file while the first one is running.
 * 
 * This method checks if there is any running instance. If no, mark yes.
 * Note that this is an atomic operation.
 * 
 * @return null if there is a running instance;
 *         otherwise, the output stream to the newly created file.
 */
private OutputStream checkAndMarkRunning() throws IOException {
  try {
    if (fs.exists(idPath)) {
      // try appending to it so that it will fail fast if another balancer is
      // running.
      IOUtils.closeStream(fs.append(idPath));
      fs.delete(idPath, true);
    }
    final FSDataOutputStream fsout = fs.create(idPath, false);
    // mark balancer idPath to be deleted during filesystem closure
    fs.deleteOnExit(idPath);
    if (write2IdFile) {
      fsout.writeBytes(InetAddress.getLocalHost().getHostName());
      fsout.hflush();
    }
    return fsout;
  } catch(RemoteException e) {
    if(AlreadyBeingCreatedException.class.getName().equals(e.getClassName())){
      return null;
    } else {
      throw e;
    }
  }
}
 
Example 3
Source File: TestINodeFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test(timeout=120000)
public void testWriteToDeletedFile() throws IOException {
  Configuration conf = new Configuration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
      .build();
  cluster.waitActive();
  FileSystem fs = cluster.getFileSystem();

  Path path = new Path("/test1");
  assertTrue(fs.mkdirs(path));

  int size = conf.getInt(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, 512);
  byte[] data = new byte[size];

  // Create one file
  Path filePath = new Path("/test1/file");
  FSDataOutputStream fos = fs.create(filePath);

  // Delete the file
  fs.delete(filePath, false);

  // Add new block should fail since /test1/file has been deleted.
  try {
    fos.write(data, 0, data.length);
    // make sure addBlock() request gets to NN immediately
    fos.hflush();

    fail("Write should fail after delete");
  } catch (Exception e) {
    /* Ignore */
  } finally {
    cluster.shutdown();
  }
}
 
Example 4
Source File: ProtobufLogWriter.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
public void sync(boolean forceSync) throws IOException {
  FSDataOutputStream fsdos = this.output;
  if (fsdos == null) {
    return; // Presume closed
  }
  fsdos.flush();
  if (forceSync) {
    fsdos.hsync();
  } else {
    fsdos.hflush();
  }
}
 
Example 5
Source File: TestHFlush.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** This creates a slow writer and check to see 
  * if pipeline heartbeats work fine
  */
@Test
 public void testPipelineHeartbeat() throws Exception {
   final int DATANODE_NUM = 2;
   final int fileLen = 6;
   Configuration conf = new HdfsConfiguration();
   final int timeout = 2000;
   conf.setInt(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY, 
       timeout);

   final Path p = new Path("/pipelineHeartbeat/foo");
   System.out.println("p=" + p);
   
   MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_NUM).build();
   try {
     DistributedFileSystem fs = cluster.getFileSystem();

     byte[] fileContents = AppendTestUtil.initBuffer(fileLen);

     // create a new file.
     FSDataOutputStream stm = AppendTestUtil.createFile(fs, p, DATANODE_NUM);

     stm.write(fileContents, 0, 1);
     Thread.sleep(timeout);
     stm.hflush();
     System.out.println("Wrote 1 byte and hflush " + p);

     // write another byte
     Thread.sleep(timeout);
     stm.write(fileContents, 1, 1);
     stm.hflush();

     stm.write(fileContents, 2, 1);
     Thread.sleep(timeout);
     stm.hflush();

     stm.write(fileContents, 3, 1);
     Thread.sleep(timeout);
     stm.write(fileContents, 4, 1);
     stm.hflush();

     stm.write(fileContents, 5, 1);
     Thread.sleep(timeout);
     stm.close();

     // verify that entire file is good
     AppendTestUtil.checkFullFile(fs, p, fileLen,
         fileContents, "Failed to slowly write to a file");
   } finally {
     cluster.shutdown();
   }
 }
 
Example 6
Source File: TestHSync.java    From hadoop with Apache License 2.0 4 votes vote down vote up
private void testHSyncOperation(boolean testWithAppend) throws IOException {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build();
  final DistributedFileSystem fs = cluster.getFileSystem();

  final Path p = new Path("/testHSync/foo");
  final int len = 1 << 16;
  FSDataOutputStream out = fs.create(p, FsPermission.getDefault(),
      EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE, CreateFlag.SYNC_BLOCK),
      4096, (short) 1, len, null);
  if (testWithAppend) {
    // re-open the file with append call
    out.close();
    out = fs.append(p, EnumSet.of(CreateFlag.APPEND, CreateFlag.SYNC_BLOCK),
        4096, null);
  }
  out.hflush();
  // hflush does not sync
  checkSyncMetric(cluster, 0);
  out.hsync();
  // hsync on empty file does nothing
  checkSyncMetric(cluster, 0);
  out.write(1);
  checkSyncMetric(cluster, 0);
  out.hsync();
  checkSyncMetric(cluster, 1);
  // avoiding repeated hsyncs is a potential future optimization
  out.hsync();
  checkSyncMetric(cluster, 2);
  out.hflush();
  // hflush still does not sync
  checkSyncMetric(cluster, 2);
  out.close();
  // close is sync'ing
  checkSyncMetric(cluster, 3);

  // same with a file created with out SYNC_BLOCK
  out = fs.create(p, FsPermission.getDefault(),
      EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE),
      4096, (short) 1, len, null);
  out.hsync();
  checkSyncMetric(cluster, 3);
  out.write(1);
  checkSyncMetric(cluster, 3);
  out.hsync();
  checkSyncMetric(cluster, 4);
  // repeated hsyncs
  out.hsync();
  checkSyncMetric(cluster, 5);
  out.close();
  // close does not sync (not opened with SYNC_BLOCK)
  checkSyncMetric(cluster, 5);
  cluster.shutdown();
}
 
Example 7
Source File: TestCheckpoint.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Regression test for HDFS-3849.  This makes sure that when we re-load the
 * FSImage in the 2NN, we clear the existing leases.
 */
@Test
public void testSecondaryNameNodeWithSavedLeases() throws IOException {
  MiniDFSCluster cluster = null;
  SecondaryNameNode secondary = null;
  FSDataOutputStream fos = null;
  Configuration conf = new HdfsConfiguration();
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDatanodes)
        .format(true).build();
    FileSystem fs = cluster.getFileSystem();
    fos = fs.create(new Path("tmpfile"));
    fos.write(new byte[] { 0, 1, 2, 3 });
    fos.hflush();
    assertEquals(1, cluster.getNamesystem().getLeaseManager().countLease());

    secondary = startSecondaryNameNode(conf);
    assertEquals(0, secondary.getFSNamesystem().getLeaseManager().countLease());

    // Checkpoint once, so the 2NN loads the lease into its in-memory sate.
    secondary.doCheckpoint();
    assertEquals(1, secondary.getFSNamesystem().getLeaseManager().countLease());
    fos.close();
    fos = null;

    // Perform a saveNamespace, so that the NN has a new fsimage, and the 2NN
    // therefore needs to download a new fsimage the next time it performs a
    // checkpoint.
    cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_ENTER, false);
    cluster.getNameNodeRpc().saveNamespace();
    cluster.getNameNodeRpc().setSafeMode(SafeModeAction.SAFEMODE_LEAVE, false);
    
    // Ensure that the 2NN can still perform a checkpoint.
    secondary.doCheckpoint();
    
    // And the leases have been cleared...
    assertEquals(0, secondary.getFSNamesystem().getLeaseManager().countLease());
  } finally {
    if (fos != null) {
      fos.close();
    }
    cleanup(secondary);
    secondary = null;
    cleanup(cluster);
    cluster = null;
  }
}
 
Example 8
Source File: TestDataNodeMetrics.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test(timeout=60000)
public void testTimeoutMetric() throws Exception {
  final Configuration conf = new HdfsConfiguration();
  final Path path = new Path("/test");

  final MiniDFSCluster cluster =
      new MiniDFSCluster.Builder(conf).numDataNodes(2).build();

  final List<FSDataOutputStream> streams = Lists.newArrayList();
  try {
    final FSDataOutputStream out =
        cluster.getFileSystem().create(path, (short) 2);
    final DataNodeFaultInjector injector = Mockito.mock
        (DataNodeFaultInjector.class);
    Mockito.doThrow(new IOException("mock IOException")).
        when(injector).
        writeBlockAfterFlush();
    DataNodeFaultInjector.instance = injector;
    streams.add(out);
    out.writeBytes("old gs data\n");
    out.hflush();

    /* Test the metric. */
    final MetricsRecordBuilder dnMetrics =
        getMetrics(cluster.getDataNodes().get(0).getMetrics().name());
    assertCounter("DatanodeNetworkErrors", 1L, dnMetrics);

    /* Test JMX datanode network counts. */
    final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    final ObjectName mxbeanName =
        new ObjectName("Hadoop:service=DataNode,name=DataNodeInfo");
    final Object dnc =
        mbs.getAttribute(mxbeanName, "DatanodeNetworkCounts");
    final String allDnc = dnc.toString();
    assertTrue("expected to see loopback address",
        allDnc.indexOf("127.0.0.1") >= 0);
    assertTrue("expected to see networkErrors",
        allDnc.indexOf("networkErrors") >= 0);
  } finally {
    IOUtils.cleanup(LOG, streams.toArray(new Closeable[0]));
    if (cluster != null) {
      cluster.shutdown();
    }
    DataNodeFaultInjector.instance = new DataNodeFaultInjector();
  }
}
 
Example 9
Source File: TestDataNodeMetrics.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test(timeout=60000)
public void testTimeoutMetric() throws Exception {
  final Configuration conf = new HdfsConfiguration();
  final Path path = new Path("/test");

  final MiniDFSCluster cluster =
      new MiniDFSCluster.Builder(conf).numDataNodes(2).build();

  final List<FSDataOutputStream> streams = Lists.newArrayList();
  try {
    final FSDataOutputStream out =
        cluster.getFileSystem().create(path, (short) 2);
    final DataNodeFaultInjector injector = Mockito.mock
        (DataNodeFaultInjector.class);
    Mockito.doThrow(new IOException("mock IOException")).
        when(injector).
        writeBlockAfterFlush();
    DataNodeFaultInjector.instance = injector;
    streams.add(out);
    out.writeBytes("old gs data\n");
    out.hflush();

    /* Test the metric. */
    final MetricsRecordBuilder dnMetrics =
        getMetrics(cluster.getDataNodes().get(0).getMetrics().name());
    assertCounter("DatanodeNetworkErrors", 1L, dnMetrics);

    /* Test JMX datanode network counts. */
    final MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
    final ObjectName mxbeanName =
        new ObjectName("Hadoop:service=DataNode,name=DataNodeInfo");
    final Object dnc =
        mbs.getAttribute(mxbeanName, "DatanodeNetworkCounts");
    final String allDnc = dnc.toString();
    assertTrue("expected to see loopback address",
        allDnc.indexOf("127.0.0.1") >= 0);
    assertTrue("expected to see networkErrors",
        allDnc.indexOf("networkErrors") >= 0);
  } finally {
    IOUtils.cleanup(LOG, streams.toArray(new Closeable[0]));
    if (cluster != null) {
      cluster.shutdown();
    }
    DataNodeFaultInjector.instance = new DataNodeFaultInjector();
  }
}
 
Example 10
Source File: TestBlockTokenWithDFS.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * testing that WRITE operation can handle token expiration when
 * re-establishing pipeline is needed
 */
@Test
public void testWrite() throws Exception {
  MiniDFSCluster cluster = null;
  int numDataNodes = 2;
  Configuration conf = getConf(numDataNodes);

  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(numDataNodes).build();
    cluster.waitActive();
    assertEquals(numDataNodes, cluster.getDataNodes().size());

    final NameNode nn = cluster.getNameNode();
    final BlockManager bm = nn.getNamesystem().getBlockManager();
    final BlockTokenSecretManager sm = bm.getBlockTokenSecretManager();

    // set a short token lifetime (1 second)
    SecurityTestUtil.setBlockTokenLifetime(sm, 1000L);
    Path fileToWrite = new Path(FILE_TO_WRITE);
    FileSystem fs = cluster.getFileSystem();

    FSDataOutputStream stm = writeFile(fs, fileToWrite, (short) numDataNodes,
        BLOCK_SIZE);
    // write a partial block
    int mid = rawData.length - 1;
    stm.write(rawData, 0, mid);
    stm.hflush();

    /*
     * wait till token used in stm expires
     */
    Token<BlockTokenIdentifier> token = DFSTestUtil.getBlockToken(stm);
    while (!SecurityTestUtil.isBlockTokenExpired(token)) {
      try {
        Thread.sleep(10);
      } catch (InterruptedException ignored) {
      }
    }

    // remove a datanode to force re-establishing pipeline
    cluster.stopDataNode(0);
    // write the rest of the file
    stm.write(rawData, mid, rawData.length - mid);
    stm.close();
    // check if write is successful
    FSDataInputStream in4 = fs.open(fileToWrite);
    assertTrue(checkFile1(in4));
  } finally {
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 11
Source File: TestPersistBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testRestartWithPartialBlockHflushed() throws IOException {
  final Configuration conf = new HdfsConfiguration();
  // Turn off persistent IPC, so that the DFSClient can survive NN restart
  conf.setInt(
      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
      0);
  MiniDFSCluster cluster = null;

  FSDataOutputStream stream;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    FileSystem fs = cluster.getFileSystem();
    NameNode.getAddress(conf).getPort();
    // Creating a file with 4096 blockSize to write multiple blocks
    stream = fs.create(FILE_PATH, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
    stream.write(DATA_BEFORE_RESTART);
    stream.write((byte)1);
    stream.hflush();
    
    // explicitly do NOT close the file before restarting the NN.
    cluster.restartNameNode();
    
    // this will fail if the final block of the file is prematurely COMPLETEd
    stream.write((byte)2);
    stream.hflush();
    stream.close();
    
    assertEquals(DATA_BEFORE_RESTART.length + 2,
        fs.getFileStatus(FILE_PATH).getLen());
    
    FSDataInputStream readStream = fs.open(FILE_PATH);
    try {
      byte[] verifyBuf = new byte[DATA_BEFORE_RESTART.length + 2];
      IOUtils.readFully(readStream, verifyBuf, 0, verifyBuf.length);
      byte[] expectedBuf = new byte[DATA_BEFORE_RESTART.length + 2];
      System.arraycopy(DATA_BEFORE_RESTART, 0, expectedBuf, 0,
          DATA_BEFORE_RESTART.length);
      System.arraycopy(new byte[]{1, 2}, 0, expectedBuf,
          DATA_BEFORE_RESTART.length, 2);
      assertArrayEquals(expectedBuf, verifyBuf);
    } finally {
      IOUtils.closeStream(readStream);
    }
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
}
 
Example 12
Source File: TestPersistBlocks.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testRestartWithPartialBlockHflushed() throws IOException {
  final Configuration conf = new HdfsConfiguration();
  // Turn off persistent IPC, so that the DFSClient can survive NN restart
  conf.setInt(
      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
      0);
  MiniDFSCluster cluster = null;

  FSDataOutputStream stream;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    FileSystem fs = cluster.getFileSystem();
    NameNode.getAddress(conf).getPort();
    // Creating a file with 4096 blockSize to write multiple blocks
    stream = fs.create(FILE_PATH, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
    stream.write(DATA_BEFORE_RESTART);
    stream.write((byte)1);
    stream.hflush();
    
    // explicitly do NOT close the file before restarting the NN.
    cluster.restartNameNode();
    
    // this will fail if the final block of the file is prematurely COMPLETEd
    stream.write((byte)2);
    stream.hflush();
    stream.close();
    
    assertEquals(DATA_BEFORE_RESTART.length + 2,
        fs.getFileStatus(FILE_PATH).getLen());
    
    FSDataInputStream readStream = fs.open(FILE_PATH);
    try {
      byte[] verifyBuf = new byte[DATA_BEFORE_RESTART.length + 2];
      IOUtils.readFully(readStream, verifyBuf, 0, verifyBuf.length);
      byte[] expectedBuf = new byte[DATA_BEFORE_RESTART.length + 2];
      System.arraycopy(DATA_BEFORE_RESTART, 0, expectedBuf, 0,
          DATA_BEFORE_RESTART.length);
      System.arraycopy(new byte[]{1, 2}, 0, expectedBuf,
          DATA_BEFORE_RESTART.length, 2);
      assertArrayEquals(expectedBuf, verifyBuf);
    } finally {
      IOUtils.closeStream(readStream);
    }
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
}
 
Example 13
Source File: TestPipelinesFailover.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test the scenario where the NN fails over after issuing a block
 * synchronization request, but before it is committed. The
 * DN running the recovery should then fail to commit the synchronization
 * and a later retry will succeed.
 */
@Test(timeout=30000)
public void testFailoverRightBeforeCommitSynchronization() throws Exception {
  final Configuration conf = new Configuration();
  // Disable permissions so that another user can recover the lease.
  conf.setBoolean(DFSConfigKeys.DFS_PERMISSIONS_ENABLED_KEY, false);
  conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
  
  FSDataOutputStream stm = null;
  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
    .nnTopology(MiniDFSNNTopology.simpleHATopology())
    .numDataNodes(3)
    .build();
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    Thread.sleep(500);

    LOG.info("Starting with NN 0 active");
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    stm = fs.create(TEST_PATH);
    
    // write a half block
    AppendTestUtil.write(stm, 0, BLOCK_SIZE / 2);
    stm.hflush();
    
    // Look into the block manager on the active node for the block
    // under construction.
    
    NameNode nn0 = cluster.getNameNode(0);
    ExtendedBlock blk = DFSTestUtil.getFirstBlock(fs, TEST_PATH);
    DatanodeDescriptor expectedPrimary =
        DFSTestUtil.getExpectedPrimaryNode(nn0, blk);
    LOG.info("Expecting block recovery to be triggered on DN " +
        expectedPrimary);
    
    // Find the corresponding DN daemon, and spy on its connection to the
    // active.
    DataNode primaryDN = cluster.getDataNode(expectedPrimary.getIpcPort());
    DatanodeProtocolClientSideTranslatorPB nnSpy =
        DataNodeTestUtils.spyOnBposToNN(primaryDN, nn0);
    
    // Delay the commitBlockSynchronization call
    DelayAnswer delayer = new DelayAnswer(LOG);
    Mockito.doAnswer(delayer).when(nnSpy).commitBlockSynchronization(
        Mockito.eq(blk),
        Mockito.anyInt(), // new genstamp
        Mockito.anyLong(), // new length
        Mockito.eq(true), // close file
        Mockito.eq(false), // delete block
        (DatanodeID[]) Mockito.anyObject(), // new targets
        (String[]) Mockito.anyObject()); // new target storages

    DistributedFileSystem fsOtherUser = createFsAsOtherUser(cluster, conf);
    assertFalse(fsOtherUser.recoverLease(TEST_PATH));
    
    LOG.info("Waiting for commitBlockSynchronization call from primary");
    delayer.waitForCall();

    LOG.info("Failing over to NN 1");
    
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    
    // Let the commitBlockSynchronization call go through, and check that
    // it failed with the correct exception.
    delayer.proceed();
    delayer.waitForResult();
    Throwable t = delayer.getThrown();
    if (t == null) {
      fail("commitBlockSynchronization call did not fail on standby");
    }
    GenericTestUtils.assertExceptionContains(
        "Operation category WRITE is not supported",
        t);
    
    // Now, if we try again to recover the block, it should succeed on the new
    // active.
    loopRecoverLease(fsOtherUser, TEST_PATH);
    
    AppendTestUtil.check(fs, TEST_PATH, BLOCK_SIZE/2);
  } finally {
    IOUtils.closeStream(stm);
    cluster.shutdown();
  }
}
 
Example 14
Source File: TestBalancer.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test running many balancer simultaneously.
 *
 * Case-1: First balancer is running. Now, running second one should get
 * "Another balancer is running. Exiting.." IOException and fail immediately
 *
 * Case-2: When running second balancer 'balancer.id' file exists but the
 * lease doesn't exists. Now, the second balancer should run successfully.
 */
@Test(timeout = 100000)
public void testManyBalancerSimultaneously() throws Exception {
  final Configuration conf = new HdfsConfiguration();
  initConf(conf);
  // add an empty node with half of the capacities(4 * CAPACITY) & the same
  // rack
  long[] capacities = new long[] { 4 * CAPACITY };
  String[] racks = new String[] { RACK0 };
  long newCapacity = 2 * CAPACITY;
  String newRack = RACK0;
  LOG.info("capacities = " + long2String(capacities));
  LOG.info("racks      = " + Arrays.asList(racks));
  LOG.info("newCapacity= " + newCapacity);
  LOG.info("newRack    = " + newRack);
  LOG.info("useTool    = " + false);
  assertEquals(capacities.length, racks.length);
  int numOfDatanodes = capacities.length;
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(capacities.length)
      .racks(racks).simulatedCapacities(capacities).build();
  try {
    cluster.waitActive();
    client = NameNodeProxies.createProxy(conf,
        cluster.getFileSystem(0).getUri(), ClientProtocol.class).getProxy();

    long totalCapacity = sum(capacities);

    // fill up the cluster to be 30% full
    final long totalUsedSpace = totalCapacity * 3 / 10;
    createFile(cluster, filePath, totalUsedSpace / numOfDatanodes,
        (short) numOfDatanodes, 0);
    // start up an empty node with the same capacity and on the same rack
    cluster.startDataNodes(conf, 1, true, null, new String[] { newRack },
        new long[] { newCapacity });

    // Case1: Simulate first balancer by creating 'balancer.id' file. It
    // will keep this file until the balancing operation is completed.
    FileSystem fs = cluster.getFileSystem(0);
    final FSDataOutputStream out = fs
        .create(Balancer.BALANCER_ID_PATH, false);
    out.writeBytes(InetAddress.getLocalHost().getHostName());
    out.hflush();
    assertTrue("'balancer.id' file doesn't exist!",
        fs.exists(Balancer.BALANCER_ID_PATH));

    // start second balancer
    final String[] args = { "-policy", "datanode" };
    final Tool tool = new Cli();
    tool.setConf(conf);
    int exitCode = tool.run(args); // start balancing
    assertEquals("Exit status code mismatches",
        ExitStatus.IO_EXCEPTION.getExitCode(), exitCode);

    // Case2: Release lease so that another balancer would be able to
    // perform balancing.
    out.close();
    assertTrue("'balancer.id' file doesn't exist!",
        fs.exists(Balancer.BALANCER_ID_PATH));
    exitCode = tool.run(args); // start balancing
    assertEquals("Exit status code mismatches",
        ExitStatus.SUCCESS.getExitCode(), exitCode);
  } finally {
    cluster.shutdown();
  }
}
 
Example 15
Source File: TestDNFencing.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
  int numQueued = 0;
  int numDN = cluster.getDataNodes().size();
  
  // case 1: create file and call hflush after write
  FSDataOutputStream out = fs.create(TEST_FILE_PATH);
  try {
    AppendTestUtil.write(out, 0, 10);
    out.hflush();

    // Opening the file will report RBW replicas, but will be
    // queued on the StandbyNode.
    // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
    // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
    //
    cluster.triggerBlockReports();
    numQueued += numDN; // RBW messages

    // The cluster.triggerBlockReports() call above does a full 
    // block report that incurs 3 extra RBW messages
    numQueued += numDN; // RBW messages      
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived messages
  }

  cluster.triggerBlockReports();
  numQueued += numDN;
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 2: append to file and call hflush after write
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 10, 10);
    out.hflush();
    cluster.triggerBlockReports();
    numQueued += numDN * 2; // RBW messages, see comments in case 1
  } finally {
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }
  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  // case 3: similar to case 2, except no hflush is called.
  try {
    out = fs.append(TEST_FILE_PATH);
    AppendTestUtil.write(out, 20, 10);
  } finally {
    // The write operation in the try block is buffered, thus no RBW message
    // is reported yet until the closeStream call here. When closeStream is
    // called, before HDFS-7217 fix, there would be three RBW messages
    // (blockReceiving), plus three FINALIZED messages (blockReceived)
    // delivered to NN. However, because of HDFS-7217 fix, the reporting of
    // RBW  messages is postponed. In this case, they are even overwritten 
    // by the blockReceived messages of the same block when they are waiting
    // to be delivered. All this happens within the closeStream() call.
    // What's delivered to NN is the three blockReceived messages. See 
    //    BPServiceActor#addPendingReplicationBlockInfo 
    //
    IOUtils.closeStream(out);
    numQueued += numDN; // blockReceived
  }

  cluster.triggerBlockReports();
  numQueued += numDN;

  LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());      

  assertEquals(numQueued, cluster.getNameNode(1).getNamesystem().
      getPendingDataNodeMessageCount());

  cluster.transitionToStandby(0);
  cluster.transitionToActive(1);
  
  // Verify that no replicas are marked corrupt, and that the
  // file is readable from the failed-over standby.
  BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
  BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
  assertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
  assertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
  
  AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}
 
Example 16
Source File: HdfsRecoverLeaseTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testBasic() throws IOException {
  long startRecoverLeaseSuccessCount = FSHDFSUtils.RECOVER_LEASE_SUCCESS_COUNT.get();
  
  URI uri = dfsCluster.getURI();
  Path path = new Path(uri);
  Configuration conf = HdfsTestUtil.getClientConfiguration(dfsCluster);
  FileSystem fs1 = FileSystem.get(path.toUri(), conf);
  Path testFile = new Path(uri.toString() + "/testfile");
  FSDataOutputStream out = fs1.create(testFile);
  
  out.write(5);
  out.hflush();
  out.close();

  FSHDFSUtils.recoverFileLease(fs1, testFile, conf, new CallerInfo() {
    
    @Override
    public boolean isCallerClosed() {
      return false;
    }
  });
  assertEquals(0, FSHDFSUtils.RECOVER_LEASE_SUCCESS_COUNT.get() - startRecoverLeaseSuccessCount);
  
  fs1.close();

  
  FileSystem fs2 = FileSystem.get(path.toUri(), conf);
  Path testFile2 = new Path(uri.toString() + "/testfile2");
  FSDataOutputStream out2 = fs2.create(testFile2);
  
  if (random().nextBoolean()) {
    int cnt = random().nextInt(100);
    for (int i = 0; i < cnt; i++) {
      out2.write(random().nextInt(20000));
    }
    out2.hflush();
  }

  
  // closing the fs will close the file it seems
  // fs2.close();
  
  FileSystem fs3 = FileSystem.get(path.toUri(), conf);

  FSHDFSUtils.recoverFileLease(fs3, testFile2, conf, new CallerInfo() {
    
    @Override
    public boolean isCallerClosed() {
      return false;
    }
  });
  assertEquals(1, FSHDFSUtils.RECOVER_LEASE_SUCCESS_COUNT.get() - startRecoverLeaseSuccessCount);
  
  fs3.close();
  fs2.close();
}
 
Example 17
Source File: TestFileConcurrentReader.java    From big-c with Apache License 2.0 4 votes vote down vote up
private void writeFileAndSync(FSDataOutputStream stm, int size)
  throws IOException {
  byte[] buffer = DFSTestUtil.generateSequentialBytes(0, size);
  stm.write(buffer, 0, size);
  stm.hflush();
}
 
Example 18
Source File: TestHFlush.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testHFlushInterrupted() throws Exception {
  final int DATANODE_NUM = 2;
  final int fileLen = 6;
  byte[] fileContents = AppendTestUtil.initBuffer(fileLen);
  Configuration conf = new HdfsConfiguration();
  final Path p = new Path("/hflush-interrupted");

  System.out.println("p=" + p);

  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(DATANODE_NUM).build();
  try {
    DistributedFileSystem fs = cluster.getFileSystem();

    // create a new file.
    FSDataOutputStream stm = AppendTestUtil.createFile(fs, p, DATANODE_NUM);

    stm.write(fileContents, 0, 2);
    Thread.currentThread().interrupt();
    try {
      stm.hflush();
      // If we made it past the hflush(), then that means that the ack made it back
      // from the pipeline before we got to the wait() call. In that case we should
      // still have interrupted status.
      assertTrue(Thread.interrupted());
    } catch (InterruptedIOException ie) {
      System.out.println("Got expected exception during flush");
    }
    assertFalse(Thread.interrupted());

    // Try again to flush should succeed since we no longer have interrupt status
    stm.hflush();

    // Write some more data and flush
    stm.write(fileContents, 2, 2);
    stm.hflush();

    // Write some data and close while interrupted

    stm.write(fileContents, 4, 2);
    Thread.currentThread().interrupt();
    try {
      stm.close();
      // If we made it past the close(), then that means that the ack made it back
      // from the pipeline before we got to the wait() call. In that case we should
      // still have interrupted status.
      assertTrue(Thread.interrupted());
    } catch (InterruptedIOException ioe) {
      System.out.println("Got expected exception during close");
      // If we got the exception, we shouldn't have interrupted status anymore.
      assertFalse(Thread.interrupted());

      // Now do a successful close.
      stm.close();
    }


    // verify that entire file is good
    AppendTestUtil.checkFullFile(fs, p, 4, fileContents,
        "Failed to deal with thread interruptions", false);
  } finally {
    cluster.shutdown();
  }
}
 
Example 19
Source File: TestCrcCorruption.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** 
 * Test case for data corruption during data transmission for
 * create/write. To recover from corruption while writing, at
 * least two replicas are needed.
 */
@Test(timeout=50000)
public void testCorruptionDuringWrt() throws Exception {
  Configuration conf = new HdfsConfiguration();
  // Set short retry timeouts so this test runs faster
  conf.setInt(DFSConfigKeys.DFS_CLIENT_RETRY_WINDOW_BASE, 10);
  MiniDFSCluster cluster = null;

  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(10).build();
    cluster.waitActive();
    FileSystem fs = cluster.getFileSystem();
    Path file = new Path("/test_corruption_file");
    FSDataOutputStream out = fs.create(file, true, 8192, (short)3, (long)(128*1024*1024));
    byte[] data = new byte[65536];
    for (int i=0; i < 65536; i++) {
      data[i] = (byte)(i % 256);
    }

    for (int i = 0; i < 5; i++) {
      out.write(data, 0, 65535);
    }
    out.hflush();
    // corrupt the packet once
    Mockito.when(faultInjector.corruptPacket()).thenReturn(true, false);
    Mockito.when(faultInjector.uncorruptPacket()).thenReturn(true, false);

    for (int i = 0; i < 5; i++) {
      out.write(data, 0, 65535);
    }
    out.close();
    // read should succeed
    FSDataInputStream in = fs.open(file);
    for(int c; (c = in.read()) != -1; );
    in.close();

    // test the retry limit
    out = fs.create(file, true, 8192, (short)3, (long)(128*1024*1024));

    // corrupt the packet once and never fix it.
    Mockito.when(faultInjector.corruptPacket()).thenReturn(true, false);
    Mockito.when(faultInjector.uncorruptPacket()).thenReturn(false);

    // the client should give up pipeline reconstruction after retries.
    try {
      for (int i = 0; i < 5; i++) {
        out.write(data, 0, 65535);
      }
      out.close();
      fail("Write did not fail");
    } catch (IOException ioe) {
      // we should get an ioe
      DFSClient.LOG.info("Got expected exception", ioe);
    }
  } finally {
    if (cluster != null) { cluster.shutdown(); }
    Mockito.when(faultInjector.corruptPacket()).thenReturn(false);
    Mockito.when(faultInjector.uncorruptPacket()).thenReturn(false);
  }
}
 
Example 20
Source File: TestGetBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test if the datanodes returned by
 * {@link ClientProtocol#getBlockLocations(String, long, long)} is correct
 * when stale nodes checking is enabled. Also test during the scenario when 1)
 * stale nodes checking is enabled, 2) a writing is going on, 3) a datanode
 * becomes stale happen simultaneously
 * 
 * @throws Exception
 */
@Test
public void testReadSelectNonStaleDatanode() throws Exception {
  HdfsConfiguration conf = new HdfsConfiguration();
  conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_AVOID_STALE_DATANODE_FOR_READ_KEY, true);
  long staleInterval = 30 * 1000 * 60;
  conf.setLong(DFSConfigKeys.DFS_NAMENODE_STALE_DATANODE_INTERVAL_KEY,
      staleInterval);
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(numDatanodes).racks(racks).build();

  cluster.waitActive();
  InetSocketAddress addr = new InetSocketAddress("localhost",
      cluster.getNameNodePort());
  DFSClient client = new DFSClient(addr, conf);
  List<DatanodeDescriptor> nodeInfoList = cluster.getNameNode()
      .getNamesystem().getBlockManager().getDatanodeManager()
      .getDatanodeListForReport(DatanodeReportType.LIVE);
  assertEquals("Unexpected number of datanodes", numDatanodes,
      nodeInfoList.size());
  FileSystem fileSys = cluster.getFileSystem();
  FSDataOutputStream stm = null;
  try {
    // do the writing but do not close the FSDataOutputStream
    // in order to mimic the ongoing writing
    final Path fileName = new Path("/file1");
    stm = fileSys.create(
        fileName,
        true,
        fileSys.getConf().getInt(
            CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096),
        (short) 3, blockSize);
    stm.write(new byte[(blockSize * 3) / 2]);
    // We do not close the stream so that
    // the writing seems to be still ongoing
    stm.hflush();

    LocatedBlocks blocks = client.getNamenode().getBlockLocations(
        fileName.toString(), 0, blockSize);
    DatanodeInfo[] nodes = blocks.get(0).getLocations();
    assertEquals(nodes.length, 3);
    DataNode staleNode = null;
    DatanodeDescriptor staleNodeInfo = null;
    // stop the heartbeat of the first node
    staleNode = this.stopDataNodeHeartbeat(cluster, nodes[0].getHostName());
    assertNotNull(staleNode);
    // set the first node as stale
    staleNodeInfo = cluster.getNameNode().getNamesystem().getBlockManager()
        .getDatanodeManager()
        .getDatanode(staleNode.getDatanodeId());
    DFSTestUtil.resetLastUpdatesWithOffset(staleNodeInfo,
        -(staleInterval + 1));

    LocatedBlocks blocksAfterStale = client.getNamenode().getBlockLocations(
        fileName.toString(), 0, blockSize);
    DatanodeInfo[] nodesAfterStale = blocksAfterStale.get(0).getLocations();
    assertEquals(nodesAfterStale.length, 3);
    assertEquals(nodesAfterStale[2].getHostName(), nodes[0].getHostName());

    // restart the staleNode's heartbeat
    DataNodeTestUtils.setHeartbeatsDisabledForTests(staleNode, false);
    // reset the first node as non-stale, so as to avoid two stale nodes
    DFSTestUtil.resetLastUpdatesWithOffset(staleNodeInfo, 0);
    LocatedBlock lastBlock = client.getLocatedBlocks(fileName.toString(), 0,
        Long.MAX_VALUE).getLastLocatedBlock();
    nodes = lastBlock.getLocations();
    assertEquals(nodes.length, 3);
    // stop the heartbeat of the first node for the last block
    staleNode = this.stopDataNodeHeartbeat(cluster, nodes[0].getHostName());
    assertNotNull(staleNode);
    // set the node as stale
    DatanodeDescriptor dnDesc = cluster.getNameNode().getNamesystem()
        .getBlockManager().getDatanodeManager()
        .getDatanode(staleNode.getDatanodeId());
    DFSTestUtil.resetLastUpdatesWithOffset(dnDesc, -(staleInterval + 1));

    LocatedBlock lastBlockAfterStale = client.getLocatedBlocks(
        fileName.toString(), 0, Long.MAX_VALUE).getLastLocatedBlock();
    nodesAfterStale = lastBlockAfterStale.getLocations();
    assertEquals(nodesAfterStale.length, 3);
    assertEquals(nodesAfterStale[2].getHostName(), nodes[0].getHostName());
  } finally {
    if (stm != null) {
      stm.close();
    }
    client.close();
    cluster.shutdown();
  }
}