Java Code Examples for org.apache.hadoop.hdfs.DistributedFileSystem#open()

The following examples show how to use org.apache.hadoop.hdfs.DistributedFileSystem#open() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestUnbuffer.java    From big-c with Apache License 2.0 5 votes vote down vote up
/**
 * Test opening many files via TCP (not short-circuit).
 *
 * This is practical when using unbuffer, because it reduces the number of
 * sockets and amount of memory that we use.
 */
@Test
public void testOpenManyFilesViaTcp() throws Exception {
  final int NUM_OPENS = 500;
  Configuration conf = new Configuration();
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);
  MiniDFSCluster cluster = null;
  FSDataInputStream[] streams = new FSDataInputStream[NUM_OPENS];
  try {
    cluster = new MiniDFSCluster.Builder(conf).build();
    DistributedFileSystem dfs = cluster.getFileSystem();
    final Path TEST_PATH = new Path("/testFile");
    DFSTestUtil.createFile(dfs, TEST_PATH, 131072, (short)1, 1);

    for (int i = 0; i < NUM_OPENS; i++) {
      streams[i] = dfs.open(TEST_PATH);
      LOG.info("opening file " + i + "...");
      Assert.assertTrue(-1 != streams[i].read());
      streams[i].unbuffer();
    }
  } finally {
    for (FSDataInputStream stream : streams) {
      IOUtils.cleanup(null, stream);
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 2
Source File: FlumeHDFSSinkServiceTest.java    From searchanalytics-bigdata with MIT License 5 votes vote down vote up
@Test
public void testProcessEvents() throws FileNotFoundException, IOException {
	int searchEventsCount = 101;
	List<Event> searchEvents = generateSearchAnalyticsDataService
			.getSearchEvents(searchEventsCount);

	flumeHDFSSinkService.processEvents(searchEvents);

	// list all files and check data.
	Path dirPath = new Path(hadoopClusterService.getHDFSUri()
			+ "/searchevents");
	// FileStatus[] dirStat = fs.listStatus(dirPath);
	// Path fList[] = FileUtil.stat2Paths(dirStat);

	DistributedFileSystem fs = hadoopClusterService.getFileSystem();
	RemoteIterator<LocatedFileStatus> files = fs.listFiles(dirPath, true);
	while (files.hasNext()) {
		LocatedFileStatus locatedFileStatus = files.next();
		System.out.println("Check:" + locatedFileStatus.getPath());
		if (locatedFileStatus.isFile()) {
			Path path = locatedFileStatus.getPath();
			if (path.getName().startsWith("searchevents")) {
				FSDataInputStream input = fs.open(path);
				BufferedReader reader = new BufferedReader(
						new InputStreamReader(input));
				String body = null;
				while ((body = reader.readLine()) != null) {
					System.out.println("body is:" + body);
				}
				reader.close();
				input.close();
			}
		}
	}
}
 
Example 3
Source File: TestByteBufLineReader.java    From tajo with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120000)
public void testReaderWithDFS() throws Exception {
  final Configuration conf = TestFileTablespace.getTestHdfsConfiguration();

  final MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(2).format(true).build();

  TajoConf tajoConf = new TajoConf(conf);
  tajoConf.setVar(TajoConf.ConfVars.ROOT_DIR, cluster.getFileSystem().getUri() + "/tajo");

  Path tablePath = new Path("/testReaderWithDFS");
  Path filePath = new Path(tablePath, "data.dat");
  try {
    DistributedFileSystem fs = cluster.getFileSystem();
    FSDataOutputStream out = fs.create(filePath, true);
    out.write(LINE.getBytes(Charset.defaultCharset()));
    out.write('\n');
    out.close();

    assertTrue(fs.exists(filePath));
    FSDataInputStream inputStream = fs.open(filePath);
    assertTrue(inputStream.getWrappedStream() instanceof ByteBufferReadable);

    ByteBufLineReader lineReader = new ByteBufLineReader(new FSDataInputChannel(inputStream));
    assertEquals(LINE, lineReader.readLine());
    lineReader.seek(0);
    assertEquals(LINE, lineReader.readLine());
    assertNull(lineReader.readLine());

    lineReader.close();
    fs.close();
  } finally {
    cluster.shutdown();
  }
}
 
Example 4
Source File: AbstractSearchJUnit4SpringContextTests.java    From searchanalytics-bigdata with MIT License 5 votes vote down vote up
protected int printAndCountHdfsFileDirData(String path, String filePrefix,
		boolean print, boolean count) throws IOException {
	int recordsCount = 0;
	DistributedFileSystem fs = hadoopClusterService.getFileSystem();
	RemoteIterator<LocatedFileStatus> files = fs.listFiles(new Path(path),
			true);
	while (files.hasNext()) {
		LocatedFileStatus locatedFileStatus = files.next();
		System.out.println("Check:" + locatedFileStatus.getPath());
		if (locatedFileStatus.isFile()) {
			Path filePath = locatedFileStatus.getPath();
			if (filePath.getName().startsWith(filePrefix)) {
				FSDataInputStream input = fs.open(filePath);
				BufferedReader reader = new BufferedReader(
						new InputStreamReader(input));
				String body = null;
				while ((body = reader.readLine()) != null) {
					if (print) {
						System.out.println("file is: " + filePath.getName() + "body is:" + body);
					}
					if (count) {
						recordsCount++;
					}
				}
				reader.close();
				input.close();
			}
		}
	}
	return recordsCount;
}
 
Example 5
Source File: CompleteSetupIntegrationTest.java    From searchanalytics-bigdata with MIT License 5 votes vote down vote up
private void FlumehdfsSinkAndTestData(List<Event> searchEvents)
		throws EventDeliveryException, IOException, FileNotFoundException {

	flumeHDFSSinkService.processEvents(searchEvents);

	// list all files and check data.
	Path dirPath = new Path(hadoopClusterService.getHDFSUri()
			+ "/searchevents");
	// FileStatus[] dirStat = fs.listStatus(dirPath);
	// Path fList[] = FileUtil.stat2Paths(dirStat);

	DistributedFileSystem fs = hadoopClusterService.getFileSystem();
	RemoteIterator<LocatedFileStatus> files = fs.listFiles(dirPath, true);
	while (files.hasNext()) {
		LocatedFileStatus locatedFileStatus = files.next();
		System.out.println("Check:" + locatedFileStatus.getPath());
		if (locatedFileStatus.isFile()) {
			Path path = locatedFileStatus.getPath();
			if (path.getName().startsWith("searchevents")) {
				FSDataInputStream input = fs.open(path);
				BufferedReader reader = new BufferedReader(
						new InputStreamReader(input));
				String body = null;
				while ((body = reader.readLine()) != null) {
					System.out.println("body is:" + body);
				}
				reader.close();
				input.close();
			}
		}
	}
}
 
Example 6
Source File: TestStuckDataNode.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/** This creates a slow writer and check to see
  * if pipeline heartbeats work fine
  */
 public void testStuckDataNode() throws Exception {
   final int DATANODE_NUM = 3;
   Configuration conf = new Configuration();
   final int timeout = 8000;
   conf.setInt("dfs.socket.timeout",timeout);

   final Path p = new Path("/pipelineHeartbeat/foo");
   System.out.println("p=" + p);

   MiniDFSCluster cluster = new MiniDFSCluster(conf, DATANODE_NUM, true, null);
   DistributedFileSystem fs = (DistributedFileSystem)cluster.getFileSystem();

DataNodeMetrics metrics = cluster.getDataNodes().get(0).myMetrics;
MetricsTimeVaryingLong spyBytesWritten = spy(metrics.bytesWritten);
DelayAnswer delayAnswer = new DelayAnswer(); 
doAnswer(delayAnswer).when(spyBytesWritten).inc(anyInt());
metrics.bytesWritten = spyBytesWritten;

try {
   	// create a new file.
   	FSDataOutputStream stm = fs.create(p);
   	stm.write(1);
   	stm.sync();
   	stm.write(2);
   	stm.close();

   	// verify that entire file is good
   	FSDataInputStream in = fs.open(p);
   	assertEquals(1, in.read());
   	assertEquals(2, in.read());
   	in.close();
   } finally {
     fs.close();
     cluster.shutdown();
   }
 }
 
Example 7
Source File: TestShortCircuitLocalRead.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Check file content, reading as user {@code readingUser} */
static void checkFileContent(URI uri, Path name, byte[] expected,
    int readOffset, String readingUser, Configuration conf,
    boolean legacyShortCircuitFails)
    throws IOException, InterruptedException {
  // Ensure short circuit is enabled
  DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
  ClientContext getClientContext = ClientContext.getFromConf(conf);
  if (legacyShortCircuitFails) {
    assertFalse(getClientContext.getDisableLegacyBlockReaderLocal());
  }
  
  FSDataInputStream stm = fs.open(name);
  byte[] actual = new byte[expected.length-readOffset];
  stm.readFully(readOffset, actual);
  checkData(actual, readOffset, expected, "Read 2");
  stm.close();
  // Now read using a different API.
  actual = new byte[expected.length-readOffset];
  stm = fs.open(name);
  IOUtils.skipFully(stm, readOffset);
  //Read a small number of bytes first.
  int nread = stm.read(actual, 0, 3);
  nread += stm.read(actual, nread, 2);
  //Read across chunk boundary
  nread += stm.read(actual, nread, 517);
  checkData(actual, readOffset, expected, nread, "A few bytes");
  //Now read rest of it
  while (nread < actual.length) {
    int nbytes = stm.read(actual, nread, actual.length - nread);
    if (nbytes < 0) {
      throw new EOFException("End of file reached before reading fully.");
    }
    nread += nbytes;
  }
  checkData(actual, readOffset, expected, "Read 3");
  
  if (legacyShortCircuitFails) {
    assertTrue(getClientContext.getDisableLegacyBlockReaderLocal());
  }
  stm.close();
}
 
Example 8
Source File: TestHASafeMode.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Test NN crash and client crash/stuck immediately after block allocation */
@Test(timeout = 100000)
public void testOpenFileWhenNNAndClientCrashAfterAddBlock() throws Exception {
  cluster.getConfiguration(0).set(
      DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, "1.0f");
  String testData = "testData";
  // to make sure we write the full block before creating dummy block at NN.
  cluster.getConfiguration(0).setInt("io.bytes.per.checksum",
      testData.length());
  cluster.restartNameNode(0);
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    cluster.transitionToStandby(1);
    DistributedFileSystem dfs = cluster.getFileSystem(0);
    String pathString = "/tmp1.txt";
    Path filePath = new Path(pathString);
    FSDataOutputStream create = dfs.create(filePath,
        FsPermission.getDefault(), true, 1024, (short) 3, testData.length(),
        null);
    create.write(testData.getBytes());
    create.hflush();
    long fileId = ((DFSOutputStream)create.
        getWrappedStream()).getFileId();
    FileStatus fileStatus = dfs.getFileStatus(filePath);
    DFSClient client = DFSClientAdapter.getClient(dfs);
    // add one dummy block at NN, but not write to DataNode
    ExtendedBlock previousBlock =
        DFSClientAdapter.getPreviousBlock(client, fileId);
    DFSClientAdapter.getNamenode(client).addBlock(
        pathString,
        client.getClientName(),
        new ExtendedBlock(previousBlock),
        new DatanodeInfo[0],
        DFSClientAdapter.getFileId((DFSOutputStream) create
            .getWrappedStream()), null);
    cluster.restartNameNode(0, true);
    cluster.restartDataNode(0);
    cluster.transitionToActive(0);
    // let the block reports be processed.
    Thread.sleep(2000);
    FSDataInputStream is = dfs.open(filePath);
    is.close();
    dfs.recoverLease(filePath);// initiate recovery
    assertTrue("Recovery also should be success", dfs.recoverLease(filePath));
  } finally {
    cluster.shutdown();
  }
}
 
Example 9
Source File: TestFsDatasetCacheRevocation.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test that when a client has a replica mmapped, we will not un-mlock that
 * replica for a reasonable amount of time, even if an uncache request
 * occurs.
 */
@Test(timeout=120000)
public void testPinning() throws Exception {
  assumeTrue(NativeCodeLoader.isNativeCodeLoaded() && !Path.WINDOWS);
  Configuration conf = getDefaultConf();
  // Set a really long revocation timeout, so that we won't reach it during
  // this test.
  conf.setLong(DFSConfigKeys.DFS_DATANODE_CACHE_REVOCATION_TIMEOUT_MS,
      1800000L);
  // Poll very often
  conf.setLong(DFSConfigKeys.DFS_DATANODE_CACHE_REVOCATION_POLLING_MS, 2L);
  MiniDFSCluster cluster = null;
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
  cluster.waitActive();
  DistributedFileSystem dfs = cluster.getFileSystem();

  // Create and cache a file.
  final String TEST_FILE = "/test_file";
  DFSTestUtil.createFile(dfs, new Path(TEST_FILE),
      BLOCK_SIZE, (short)1, 0xcafe);
  dfs.addCachePool(new CachePoolInfo("pool"));
  long cacheDirectiveId =
    dfs.addCacheDirective(new CacheDirectiveInfo.Builder().
      setPool("pool").setPath(new Path(TEST_FILE)).
        setReplication((short) 1).build());
  FsDatasetSpi<?> fsd = cluster.getDataNodes().get(0).getFSDataset();
  DFSTestUtil.verifyExpectedCacheUsage(BLOCK_SIZE, 1, fsd);

  // Mmap the file.
  FSDataInputStream in = dfs.open(new Path(TEST_FILE));
  ByteBuffer buf =
      in.read(null, BLOCK_SIZE, EnumSet.noneOf(ReadOption.class));

  // Attempt to uncache file.  The file should still be cached.
  dfs.removeCacheDirective(cacheDirectiveId);
  Thread.sleep(500);
  DFSTestUtil.verifyExpectedCacheUsage(BLOCK_SIZE, 1, fsd);

  // Un-mmap the file.  The file should be uncached after this.
  in.releaseBuffer(buf);
  DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd);

  // Cleanup
  in.close();
  cluster.shutdown();
}
 
Example 10
Source File: TestEnhancedByteBufferAccess.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void test2GBMmapLimit() throws Exception {
  Assume.assumeTrue(BlockReaderTestUtil.shouldTestLargeFiles());
  HdfsConfiguration conf = initZeroCopyTest();
  final long TEST_FILE_LENGTH = 2469605888L;
  conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "NULL");
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, TEST_FILE_LENGTH);
  MiniDFSCluster cluster = null;
  final Path TEST_PATH = new Path("/a");
  final String CONTEXT = "test2GBMmapLimit";
  conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);

  FSDataInputStream fsIn = null, fsIn2 = null;
  ByteBuffer buf1 = null, buf2 = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, TEST_PATH, TEST_FILE_LENGTH, (short)1, 0xB);
    DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
    
    fsIn = fs.open(TEST_PATH);
    buf1 = fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(1, buf1.remaining());
    fsIn.releaseBuffer(buf1);
    buf1 = null;
    fsIn.seek(2147483640L);
    buf1 = fsIn.read(null, 1024, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(7, buf1.remaining());
    Assert.assertEquals(Integer.MAX_VALUE, buf1.limit());
    fsIn.releaseBuffer(buf1);
    buf1 = null;
    Assert.assertEquals(2147483647L, fsIn.getPos());
    try {
      buf1 = fsIn.read(null, 1024,
          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
      Assert.fail("expected UnsupportedOperationException");
    } catch (UnsupportedOperationException e) {
      // expected; can't read past 2GB boundary.
    }
    fsIn.close();
    fsIn = null;

    // Now create another file with normal-sized blocks, and verify we
    // can read past 2GB
    final Path TEST_PATH2 = new Path("/b");
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 268435456L);
    DFSTestUtil.createFile(fs, TEST_PATH2, 1024 * 1024, TEST_FILE_LENGTH,
        268435456L, (short)1, 0xA);
    
    fsIn2 = fs.open(TEST_PATH2);
    fsIn2.seek(2147483640L);
    buf2 = fsIn2.read(null, 1024, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(8, buf2.remaining());
    Assert.assertEquals(2147483648L, fsIn2.getPos());
    fsIn2.releaseBuffer(buf2);
    buf2 = null;
    buf2 = fsIn2.read(null, 1024, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(1024, buf2.remaining());
    Assert.assertEquals(2147484672L, fsIn2.getPos());
    fsIn2.releaseBuffer(buf2);
    buf2 = null;
  } finally {
    if (buf1 != null) {
      fsIn.releaseBuffer(buf1);
    }
    if (buf2 != null) {
      fsIn2.releaseBuffer(buf2);
    }
    IOUtils.cleanup(null, fsIn, fsIn2);
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 11
Source File: TestUnbuffer.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that calling Unbuffer closes sockets.
 */
@Test
public void testUnbufferClosesSockets() throws Exception {
  Configuration conf = new Configuration();
  // Set a new ClientContext.  This way, we will have our own PeerCache,
  // rather than sharing one with other unit tests.
  conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT,
      "testUnbufferClosesSocketsContext");

  // Disable short-circuit reads.  With short-circuit, we wouldn't hold open a
  // TCP socket.
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);

  // Set a really long socket timeout to avoid test timing issues.
  conf.setLong(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
      100000000L);
  conf.setLong(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
      100000000L);

  MiniDFSCluster cluster = null;
  FSDataInputStream stream = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).build();
    DistributedFileSystem dfs = (DistributedFileSystem)
        FileSystem.newInstance(conf);
    final Path TEST_PATH = new Path("/test1");
    DFSTestUtil.createFile(dfs, TEST_PATH, 128, (short)1, 1);
    stream = dfs.open(TEST_PATH);
    // Read a byte.  This will trigger the creation of a block reader.
    stream.seek(2);
    int b = stream.read();
    Assert.assertTrue(-1 != b);

    // The Peer cache should start off empty.
    PeerCache cache = dfs.getClient().getClientContext().getPeerCache();
    Assert.assertEquals(0, cache.size());

    // Unbuffer should clear the block reader and return the socket to the
    // cache.
    stream.unbuffer();
    stream.seek(2);
    Assert.assertEquals(1, cache.size());
    int b2 = stream.read();
    Assert.assertEquals(b, b2);
  } finally {
    if (stream != null) {
      IOUtils.cleanup(null, stream);
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 12
Source File: TestUnbuffer.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Test that calling Unbuffer closes sockets.
 */
@Test
public void testUnbufferClosesSockets() throws Exception {
  Configuration conf = new Configuration();
  // Set a new ClientContext.  This way, we will have our own PeerCache,
  // rather than sharing one with other unit tests.
  conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT,
      "testUnbufferClosesSocketsContext");

  // Disable short-circuit reads.  With short-circuit, we wouldn't hold open a
  // TCP socket.
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, false);

  // Set a really long socket timeout to avoid test timing issues.
  conf.setLong(DFSConfigKeys.DFS_CLIENT_SOCKET_TIMEOUT_KEY,
      100000000L);
  conf.setLong(DFSConfigKeys.DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY,
      100000000L);

  MiniDFSCluster cluster = null;
  FSDataInputStream stream = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).build();
    DistributedFileSystem dfs = (DistributedFileSystem)
        FileSystem.newInstance(conf);
    final Path TEST_PATH = new Path("/test1");
    DFSTestUtil.createFile(dfs, TEST_PATH, 128, (short)1, 1);
    stream = dfs.open(TEST_PATH);
    // Read a byte.  This will trigger the creation of a block reader.
    stream.seek(2);
    int b = stream.read();
    Assert.assertTrue(-1 != b);

    // The Peer cache should start off empty.
    PeerCache cache = dfs.getClient().getClientContext().getPeerCache();
    Assert.assertEquals(0, cache.size());

    // Unbuffer should clear the block reader and return the socket to the
    // cache.
    stream.unbuffer();
    stream.seek(2);
    Assert.assertEquals(1, cache.size());
    int b2 = stream.read();
    Assert.assertEquals(b, b2);
  } finally {
    if (stream != null) {
      IOUtils.cleanup(null, stream);
    }
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 13
Source File: TestEnhancedByteBufferAccess.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void testClientMmapDisable() throws Exception {
  HdfsConfiguration conf = initZeroCopyTest();
  conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, false);
  MiniDFSCluster cluster = null;
  final Path TEST_PATH = new Path("/a");
  final int TEST_FILE_LENGTH = 16385;
  final int RANDOM_SEED = 23453;
  final String CONTEXT = "testClientMmapDisable";
  FSDataInputStream fsIn = null;
  DistributedFileSystem fs = null;
  conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);

  try {
    // With DFS_CLIENT_MMAP_ENABLED set to false, we should not do memory
    // mapped reads.
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, TEST_PATH,
        TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
    DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
    fsIn = fs.open(TEST_PATH);
    try {
      fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
      Assert.fail("expected zero-copy read to fail when client mmaps " +
          "were disabled.");
    } catch (UnsupportedOperationException e) {
    }
  } finally {
    if (fsIn != null) fsIn.close();
    if (fs != null) fs.close();
    if (cluster != null) cluster.shutdown();
  }

  fsIn = null;
  fs = null;
  cluster = null;
  try {
    // Now try again with DFS_CLIENT_MMAP_CACHE_SIZE == 0.  It should work.
    conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, true);
    conf.setInt(DFS_CLIENT_MMAP_CACHE_SIZE, 0);
    conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT + ".1");
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, TEST_PATH,
        TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
    DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
    fsIn = fs.open(TEST_PATH);
    ByteBuffer buf = fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    fsIn.releaseBuffer(buf);
    // Test EOF behavior
    IOUtils.skipFully(fsIn, TEST_FILE_LENGTH - 1);
    buf = fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(null, buf);
  } finally {
    if (fsIn != null) fsIn.close();
    if (fs != null) fs.close();
    if (cluster != null) cluster.shutdown();
  }
}
 
Example 14
Source File: TestEnhancedByteBufferAccess.java    From big-c with Apache License 2.0 4 votes vote down vote up
@Test
public void test2GBMmapLimit() throws Exception {
  Assume.assumeTrue(BlockReaderTestUtil.shouldTestLargeFiles());
  HdfsConfiguration conf = initZeroCopyTest();
  final long TEST_FILE_LENGTH = 2469605888L;
  conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "NULL");
  conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, TEST_FILE_LENGTH);
  MiniDFSCluster cluster = null;
  final Path TEST_PATH = new Path("/a");
  final String CONTEXT = "test2GBMmapLimit";
  conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);

  FSDataInputStream fsIn = null, fsIn2 = null;
  ByteBuffer buf1 = null, buf2 = null;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    DistributedFileSystem fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, TEST_PATH, TEST_FILE_LENGTH, (short)1, 0xB);
    DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
    
    fsIn = fs.open(TEST_PATH);
    buf1 = fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(1, buf1.remaining());
    fsIn.releaseBuffer(buf1);
    buf1 = null;
    fsIn.seek(2147483640L);
    buf1 = fsIn.read(null, 1024, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(7, buf1.remaining());
    Assert.assertEquals(Integer.MAX_VALUE, buf1.limit());
    fsIn.releaseBuffer(buf1);
    buf1 = null;
    Assert.assertEquals(2147483647L, fsIn.getPos());
    try {
      buf1 = fsIn.read(null, 1024,
          EnumSet.of(ReadOption.SKIP_CHECKSUMS));
      Assert.fail("expected UnsupportedOperationException");
    } catch (UnsupportedOperationException e) {
      // expected; can't read past 2GB boundary.
    }
    fsIn.close();
    fsIn = null;

    // Now create another file with normal-sized blocks, and verify we
    // can read past 2GB
    final Path TEST_PATH2 = new Path("/b");
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, 268435456L);
    DFSTestUtil.createFile(fs, TEST_PATH2, 1024 * 1024, TEST_FILE_LENGTH,
        268435456L, (short)1, 0xA);
    
    fsIn2 = fs.open(TEST_PATH2);
    fsIn2.seek(2147483640L);
    buf2 = fsIn2.read(null, 1024, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(8, buf2.remaining());
    Assert.assertEquals(2147483648L, fsIn2.getPos());
    fsIn2.releaseBuffer(buf2);
    buf2 = null;
    buf2 = fsIn2.read(null, 1024, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(1024, buf2.remaining());
    Assert.assertEquals(2147484672L, fsIn2.getPos());
    fsIn2.releaseBuffer(buf2);
    buf2 = null;
  } finally {
    if (buf1 != null) {
      fsIn.releaseBuffer(buf1);
    }
    if (buf2 != null) {
      fsIn2.releaseBuffer(buf2);
    }
    IOUtils.cleanup(null, fsIn, fsIn2);
    if (cluster != null) {
      cluster.shutdown();
    }
  }
}
 
Example 15
Source File: TestShortCircuitLocalRead.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Check file content, reading as user {@code readingUser} */
static void checkFileContent(URI uri, Path name, byte[] expected,
    int readOffset, String readingUser, Configuration conf,
    boolean legacyShortCircuitFails)
    throws IOException, InterruptedException {
  // Ensure short circuit is enabled
  DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
  ClientContext getClientContext = ClientContext.getFromConf(conf);
  if (legacyShortCircuitFails) {
    assertFalse(getClientContext.getDisableLegacyBlockReaderLocal());
  }
  
  FSDataInputStream stm = fs.open(name);
  byte[] actual = new byte[expected.length-readOffset];
  stm.readFully(readOffset, actual);
  checkData(actual, readOffset, expected, "Read 2");
  stm.close();
  // Now read using a different API.
  actual = new byte[expected.length-readOffset];
  stm = fs.open(name);
  IOUtils.skipFully(stm, readOffset);
  //Read a small number of bytes first.
  int nread = stm.read(actual, 0, 3);
  nread += stm.read(actual, nread, 2);
  //Read across chunk boundary
  nread += stm.read(actual, nread, 517);
  checkData(actual, readOffset, expected, nread, "A few bytes");
  //Now read rest of it
  while (nread < actual.length) {
    int nbytes = stm.read(actual, nread, actual.length - nread);
    if (nbytes < 0) {
      throw new EOFException("End of file reached before reading fully.");
    }
    nread += nbytes;
  }
  checkData(actual, readOffset, expected, "Read 3");
  
  if (legacyShortCircuitFails) {
    assertTrue(getClientContext.getDisableLegacyBlockReaderLocal());
  }
  stm.close();
}
 
Example 16
Source File: TestShortCircuitLocalRead.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Check the file content, reading as user {@code readingUser} */
static void checkFileContentDirect(URI uri, Path name, byte[] expected,
    int readOffset, String readingUser, Configuration conf,
    boolean legacyShortCircuitFails)
    throws IOException, InterruptedException {
  // Ensure short circuit is enabled
  DistributedFileSystem fs = getFileSystem(readingUser, uri, conf);
  ClientContext clientContext = ClientContext.getFromConf(conf);
  if (legacyShortCircuitFails) {
    assertTrue(clientContext.getDisableLegacyBlockReaderLocal());
  }
  
  HdfsDataInputStream stm = (HdfsDataInputStream)fs.open(name);

  ByteBuffer actual = ByteBuffer.allocateDirect(expected.length - readOffset);

  IOUtils.skipFully(stm, readOffset);

  actual.limit(3);

  //Read a small number of bytes first.
  int nread = stm.read(actual);
  actual.limit(nread + 2);
  nread += stm.read(actual);

  // Read across chunk boundary
  actual.limit(Math.min(actual.capacity(), nread + 517));
  nread += stm.read(actual);
  checkData(arrayFromByteBuffer(actual), readOffset, expected, nread,
      "A few bytes");
  //Now read rest of it
  actual.limit(actual.capacity());
  while (actual.hasRemaining()) {
    int nbytes = stm.read(actual);

    if (nbytes < 0) {
      throw new EOFException("End of file reached before reading fully.");
    }
    nread += nbytes;
  }
  checkData(arrayFromByteBuffer(actual), readOffset, expected, "Read 3");
  if (legacyShortCircuitFails) {
    assertTrue(clientContext.getDisableLegacyBlockReaderLocal());
  }
  stm.close();
}
 
Example 17
Source File: TestHASafeMode.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Test NN crash and client crash/stuck immediately after block allocation */
@Test(timeout = 100000)
public void testOpenFileWhenNNAndClientCrashAfterAddBlock() throws Exception {
  cluster.getConfiguration(0).set(
      DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, "1.0f");
  String testData = "testData";
  // to make sure we write the full block before creating dummy block at NN.
  cluster.getConfiguration(0).setInt("io.bytes.per.checksum",
      testData.length());
  cluster.restartNameNode(0);
  try {
    cluster.waitActive();
    cluster.transitionToActive(0);
    cluster.transitionToStandby(1);
    DistributedFileSystem dfs = cluster.getFileSystem(0);
    String pathString = "/tmp1.txt";
    Path filePath = new Path(pathString);
    FSDataOutputStream create = dfs.create(filePath,
        FsPermission.getDefault(), true, 1024, (short) 3, testData.length(),
        null);
    create.write(testData.getBytes());
    create.hflush();
    long fileId = ((DFSOutputStream)create.
        getWrappedStream()).getFileId();
    FileStatus fileStatus = dfs.getFileStatus(filePath);
    DFSClient client = DFSClientAdapter.getClient(dfs);
    // add one dummy block at NN, but not write to DataNode
    ExtendedBlock previousBlock =
        DFSClientAdapter.getPreviousBlock(client, fileId);
    DFSClientAdapter.getNamenode(client).addBlock(
        pathString,
        client.getClientName(),
        new ExtendedBlock(previousBlock),
        new DatanodeInfo[0],
        DFSClientAdapter.getFileId((DFSOutputStream) create
            .getWrappedStream()), null);
    cluster.restartNameNode(0, true);
    cluster.restartDataNode(0);
    cluster.transitionToActive(0);
    // let the block reports be processed.
    Thread.sleep(2000);
    FSDataInputStream is = dfs.open(filePath);
    is.close();
    dfs.recoverLease(filePath);// initiate recovery
    assertTrue("Recovery also should be success", dfs.recoverLease(filePath));
  } finally {
    cluster.shutdown();
  }
}
 
Example 18
Source File: TestFsDatasetCacheRevocation.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that when a client has a replica mmapped, we will not un-mlock that
 * replica for a reasonable amount of time, even if an uncache request
 * occurs.
 */
@Test(timeout=120000)
public void testPinning() throws Exception {
  assumeTrue(NativeCodeLoader.isNativeCodeLoaded() && !Path.WINDOWS);
  Configuration conf = getDefaultConf();
  // Set a really long revocation timeout, so that we won't reach it during
  // this test.
  conf.setLong(DFSConfigKeys.DFS_DATANODE_CACHE_REVOCATION_TIMEOUT_MS,
      1800000L);
  // Poll very often
  conf.setLong(DFSConfigKeys.DFS_DATANODE_CACHE_REVOCATION_POLLING_MS, 2L);
  MiniDFSCluster cluster = null;
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
  cluster.waitActive();
  DistributedFileSystem dfs = cluster.getFileSystem();

  // Create and cache a file.
  final String TEST_FILE = "/test_file";
  DFSTestUtil.createFile(dfs, new Path(TEST_FILE),
      BLOCK_SIZE, (short)1, 0xcafe);
  dfs.addCachePool(new CachePoolInfo("pool"));
  long cacheDirectiveId =
    dfs.addCacheDirective(new CacheDirectiveInfo.Builder().
      setPool("pool").setPath(new Path(TEST_FILE)).
        setReplication((short) 1).build());
  FsDatasetSpi<?> fsd = cluster.getDataNodes().get(0).getFSDataset();
  DFSTestUtil.verifyExpectedCacheUsage(BLOCK_SIZE, 1, fsd);

  // Mmap the file.
  FSDataInputStream in = dfs.open(new Path(TEST_FILE));
  ByteBuffer buf =
      in.read(null, BLOCK_SIZE, EnumSet.noneOf(ReadOption.class));

  // Attempt to uncache file.  The file should still be cached.
  dfs.removeCacheDirective(cacheDirectiveId);
  Thread.sleep(500);
  DFSTestUtil.verifyExpectedCacheUsage(BLOCK_SIZE, 1, fsd);

  // Un-mmap the file.  The file should be uncached after this.
  in.releaseBuffer(buf);
  DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd);

  // Cleanup
  in.close();
  cluster.shutdown();
}
 
Example 19
Source File: TestFsDatasetCacheRevocation.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that when we have an uncache request, and the client refuses to release
 * the replica for a long time, we will un-mlock it.
 */
@Test(timeout=120000)
public void testRevocation() throws Exception {
  assumeTrue(NativeCodeLoader.isNativeCodeLoaded() && !Path.WINDOWS);
  BlockReaderTestUtil.enableHdfsCachingTracing();
  BlockReaderTestUtil.enableShortCircuitShmTracing();
  Configuration conf = getDefaultConf();
  // Set a really short revocation timeout.
  conf.setLong(DFSConfigKeys.DFS_DATANODE_CACHE_REVOCATION_TIMEOUT_MS, 250L);
  // Poll very often
  conf.setLong(DFSConfigKeys.DFS_DATANODE_CACHE_REVOCATION_POLLING_MS, 2L);
  MiniDFSCluster cluster = null;
  cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
  cluster.waitActive();
  DistributedFileSystem dfs = cluster.getFileSystem();

  // Create and cache a file.
  final String TEST_FILE = "/test_file2";
  DFSTestUtil.createFile(dfs, new Path(TEST_FILE),
      BLOCK_SIZE, (short)1, 0xcafe);
  dfs.addCachePool(new CachePoolInfo("pool"));
  long cacheDirectiveId =
      dfs.addCacheDirective(new CacheDirectiveInfo.Builder().
          setPool("pool").setPath(new Path(TEST_FILE)).
          setReplication((short) 1).build());
  FsDatasetSpi<?> fsd = cluster.getDataNodes().get(0).getFSDataset();
  DFSTestUtil.verifyExpectedCacheUsage(BLOCK_SIZE, 1, fsd);

  // Mmap the file.
  FSDataInputStream in = dfs.open(new Path(TEST_FILE));
  ByteBuffer buf =
      in.read(null, BLOCK_SIZE, EnumSet.noneOf(ReadOption.class));

  // Attempt to uncache file.  The file should get uncached.
  LOG.info("removing cache directive {}", cacheDirectiveId);
  dfs.removeCacheDirective(cacheDirectiveId);
  LOG.info("finished removing cache directive {}", cacheDirectiveId);
  Thread.sleep(1000);
  DFSTestUtil.verifyExpectedCacheUsage(0, 0, fsd);

  // Cleanup
  in.releaseBuffer(buf);
  in.close();
  cluster.shutdown();
}
 
Example 20
Source File: TestEnhancedByteBufferAccess.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testClientMmapDisable() throws Exception {
  HdfsConfiguration conf = initZeroCopyTest();
  conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, false);
  MiniDFSCluster cluster = null;
  final Path TEST_PATH = new Path("/a");
  final int TEST_FILE_LENGTH = 16385;
  final int RANDOM_SEED = 23453;
  final String CONTEXT = "testClientMmapDisable";
  FSDataInputStream fsIn = null;
  DistributedFileSystem fs = null;
  conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT);

  try {
    // With DFS_CLIENT_MMAP_ENABLED set to false, we should not do memory
    // mapped reads.
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, TEST_PATH,
        TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
    DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
    fsIn = fs.open(TEST_PATH);
    try {
      fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
      Assert.fail("expected zero-copy read to fail when client mmaps " +
          "were disabled.");
    } catch (UnsupportedOperationException e) {
    }
  } finally {
    if (fsIn != null) fsIn.close();
    if (fs != null) fs.close();
    if (cluster != null) cluster.shutdown();
  }

  fsIn = null;
  fs = null;
  cluster = null;
  try {
    // Now try again with DFS_CLIENT_MMAP_CACHE_SIZE == 0.  It should work.
    conf.setBoolean(DFS_CLIENT_MMAP_ENABLED, true);
    conf.setInt(DFS_CLIENT_MMAP_CACHE_SIZE, 0);
    conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT, CONTEXT + ".1");
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build();
    cluster.waitActive();
    fs = cluster.getFileSystem();
    DFSTestUtil.createFile(fs, TEST_PATH,
        TEST_FILE_LENGTH, (short)1, RANDOM_SEED);
    DFSTestUtil.waitReplication(fs, TEST_PATH, (short)1);
    fsIn = fs.open(TEST_PATH);
    ByteBuffer buf = fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    fsIn.releaseBuffer(buf);
    // Test EOF behavior
    IOUtils.skipFully(fsIn, TEST_FILE_LENGTH - 1);
    buf = fsIn.read(null, 1, EnumSet.of(ReadOption.SKIP_CHECKSUMS));
    Assert.assertEquals(null, buf);
  } finally {
    if (fsIn != null) fsIn.close();
    if (fs != null) fs.close();
    if (cluster != null) cluster.shutdown();
  }
}