Java Code Examples for org.apache.hadoop.fs.FSDataOutputStream#write()

The following examples show how to use org.apache.hadoop.fs.FSDataOutputStream#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CopyCommands.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
protected void processArguments(LinkedList<PathData> items)
throws IOException {
  super.processArguments(items);
  if (exitCode != 0) { // check for error collecting paths
    return;
  }
  FSDataOutputStream out = dst.fs.create(dst.path);
  try {
    for (PathData src : srcs) {
      FSDataInputStream in = src.fs.open(src.path);
      try {
        IOUtils.copyBytes(in, out, getConf(), false);
        if (delimiter != null) {
          out.write(delimiter.getBytes("UTF-8"));
        }
      } finally {
        in.close();
      }
    }
  } finally {
    out.close();
  }      
}
 
Example 2
Source File: ContinuousFileProcessingTest.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Create a file with pre-determined String format of the form:
 * {@code fileIdx +": "+ sampleLine +" "+ lineNo}.
 * */
private static Tuple2<org.apache.hadoop.fs.Path, String> createFileAndFillWithData(
			String base, String fileName, int fileIdx, String sampleLine) throws IOException {

	assert (hdfs != null);

	final String fileRandSuffix = UUID.randomUUID().toString();

	org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(base + "/" + fileName + fileRandSuffix);
	Assert.assertFalse(hdfs.exists(file));

	org.apache.hadoop.fs.Path tmp = new org.apache.hadoop.fs.Path(base + "/." + fileName + fileRandSuffix);
	FSDataOutputStream stream = hdfs.create(tmp);
	StringBuilder str = new StringBuilder();
	for (int i = 0; i < LINES_PER_FILE; i++) {
		String line = fileIdx + ": " + sampleLine + " " + i + "\n";
		str.append(line);
		stream.write(line.getBytes(ConfigConstants.DEFAULT_CHARSET));
	}
	stream.close();

	hdfs.rename(tmp, file);

	Assert.assertTrue("No result file present", hdfs.exists(file));
	return new Tuple2<>(file, str.toString());
}
 
Example 3
Source File: TestFavoredNodesEndToEnd.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test(timeout = 180000)
public void testFavoredNodesEndToEndForAppend() throws Exception {
  // create 10 files with random preferred nodes
  for (int i = 0; i < NUM_FILES; i++) {
    Random rand = new Random(System.currentTimeMillis() + i);
    // pass a new created rand so as to get a uniform distribution each time
    // without too much collisions (look at the do-while loop in getDatanodes)
    InetSocketAddress datanode[] = getDatanodes(rand);
    Path p = new Path("/filename" + i);
    // create and close the file.
    dfs.create(p, FsPermission.getDefault(), true, 4096, (short) 3, 4096L,
        null, null).close();
    // re-open for append
    FSDataOutputStream out = dfs.append(p, EnumSet.of(CreateFlag.APPEND),
        4096, null, datanode);
    out.write(SOME_BYTES);
    out.close();
    BlockLocation[] locations = getBlockLocations(p);
    // verify the files got created in the right nodes
    for (BlockLocation loc : locations) {
      String[] hosts = loc.getNames();
      String[] hosts1 = getStringForInetSocketAddrs(datanode);
      assertTrue(compareNodes(hosts, hosts1));
    }
  }
}
 
Example 4
Source File: TestBlocksScheduledCounter.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void testBlocksScheduledCounter() throws IOException {
  
  MiniDFSCluster cluster = new MiniDFSCluster(new Configuration(), 1, 
                                              true, null);
  cluster.waitActive();
  FileSystem fs = cluster.getFileSystem();
  
  //open a file an write a few bytes:
  FSDataOutputStream out = fs.create(new Path("/testBlockScheduledCounter"));
  for (int i=0; i<1024; i++) {
    out.write(i);
  }
  // flush to make sure a block is allocated.
  ((DFSOutputStream)(out.getWrappedStream())).sync();
  
  ArrayList<DatanodeDescriptor> dnList = new ArrayList<DatanodeDescriptor>();
  cluster.getNameNode().namesystem.DFSNodesStatus(dnList, dnList);
  DatanodeDescriptor dn = dnList.get(0);
  
  assertEquals(1, dn.getBlocksScheduled());
 
  // close the file and the counter should go to zero.
  out.close();   
  assertEquals(0, dn.getBlocksScheduled());
}
 
Example 5
Source File: TestBlocksScheduledCounter.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public void testBlocksScheduledCounter() throws IOException {
  
  MiniDFSCluster cluster = new MiniDFSCluster(new Configuration(), 1, 
                                              true, null);
  cluster.waitActive();
  FileSystem fs = cluster.getFileSystem();
  
  //open a file an write a few bytes:
  FSDataOutputStream out = fs.create(new Path("/testBlockScheduledCounter"));
  for (int i=0; i<1024; i++) {
    out.write(i);
  }
  // flush to make sure a block is allocated.
  ((DFSOutputStream)(out.getWrappedStream())).sync();
  
  ArrayList<DatanodeDescriptor> dnList = new ArrayList<DatanodeDescriptor>();
  cluster.getNameNode().namesystem.DFSNodesStatus(dnList, dnList);
  DatanodeDescriptor dn = dnList.get(0);
  
  assertEquals(1, dn.getBlocksScheduled());
 
  // close the file and the counter should go to zero.
  out.close();   
  assertEquals(0, dn.getBlocksScheduled());
}
 
Example 6
Source File: TestLeaseRecovery3.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private Path createFile(DistributedFileSystem dfs, int size
    ) throws IOException, InterruptedException {
  // create a random file name
  String filestr = "/foo" + AppendTestUtil.nextInt();
  System.out.println("filestr=" + filestr);
  Path filepath = new Path(filestr);
  FSDataOutputStream stm = dfs.create(filepath, true,
      bufferSize, REPLICATION_NUM, BLOCK_SIZE);
  assertTrue(dfs.dfs.exists(filestr));

  // write random number of bytes into it.
  System.out.println("size=" + size);
  stm.write(buffer, 0, size);

  // sync file
  AppendTestUtil.LOG.info("sync");
  stm.sync();

  // write another piece of data to file. This piece of data
  // is not yet synced
  stm.write(buffer, 0, size);
  return filepath;
}
 
Example 7
Source File: OfflineMetaRebuildTestCore.java    From hbase with Apache License 2.0 6 votes vote down vote up
protected RegionInfo createRegion(Configuration conf, final Table htbl,
    byte[] startKey, byte[] endKey) throws IOException {
  Table meta = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME);
  RegionInfo hri = RegionInfoBuilder.newBuilder(htbl.getName())
      .setStartKey(startKey)
      .setEndKey(endKey)
      .build();

  LOG.info("manually adding regioninfo and hdfs data: " + hri.toString());
  Path rootDir = CommonFSUtils.getRootDir(conf);
  FileSystem fs = rootDir.getFileSystem(conf);
  Path p = new Path(CommonFSUtils.getTableDir(rootDir, htbl.getName()),
      hri.getEncodedName());
  fs.mkdirs(p);
  Path riPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
  FSDataOutputStream out = fs.create(riPath);
  out.write(RegionInfo.toDelimitedByteArray(hri));
  out.close();

  // add to meta.
  MetaTableAccessor.addRegionToMeta(TEST_UTIL.getConnection(), hri);
  meta.close();
  return hri;
}
 
Example 8
Source File: TestWasbFsck.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that we delete dangling files properly
 */
@Test
public void testDelete() throws Exception {
  Path danglingFile = new Path("/crashedInTheMiddle");

  // Create a file and leave it dangling and try to delete it.
  FSDataOutputStream stream = fs.create(danglingFile);
  stream.write(new byte[] { 1, 2, 3 });
  stream.flush();

  // Now we should still only see a zero-byte file in this place
  FileStatus fileStatus = fs.getFileStatus(danglingFile);
  assertNotNull(fileStatus);
  assertEquals(0, fileStatus.getLen());
  assertEquals(1, getNumTempBlobs());

  // Run WasbFsck -delete to delete the file.
  runFsck("-delete");

  // Now we should see no trace of the file.
  assertEquals(0, getNumTempBlobs());
  assertFalse(fs.exists(danglingFile));
}
 
Example 9
Source File: BinaryReader.java    From marklogic-contentpump with Apache License 2.0 5 votes vote down vote up
@Override
public void write(DocumentURI uri, BytesWritable content)
        throws IOException, InterruptedException {
    String pathStr = dir.getName() + uri.getUri();
    Path path = new Path(pathStr);
    FileSystem fs = path.getFileSystem(conf);
    FSDataOutputStream out = fs.create(path, false);
    System.out.println("writing to: " + path);
    out.write(content.getBytes(), 0, content.getLength());
    out.flush();
    out.close();
}
 
Example 10
Source File: TestFSOutputSummer.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private void writeFile3(Path name) throws Exception {
  FSDataOutputStream stm = fileSys.create(name, true, 
      fileSys.getConf().getInt("io.file.buffer.size", 4096),
      NUM_OF_DATANODES, BLOCK_SIZE);
  stm.write(expected, 0, HALF_CHUNK_SIZE);
  stm.write(expected, HALF_CHUNK_SIZE, BYTES_PER_CHECKSUM+2);
  stm.write(expected, HALF_CHUNK_SIZE+BYTES_PER_CHECKSUM+2, 2);
  stm.write(expected, HALF_CHUNK_SIZE+BYTES_PER_CHECKSUM+4, HALF_CHUNK_SIZE);
  stm.write(expected, BLOCK_SIZE+4, BYTES_PER_CHECKSUM-4);
  stm.write(expected, BLOCK_SIZE+BYTES_PER_CHECKSUM, 
      FILE_SIZE-3*BYTES_PER_CHECKSUM);
  stm.close();
  checkFile(name);
  cleanupFile(name);
}
 
Example 11
Source File: TestSwiftFileSystemPartitionedUploads.java    From hadoop with Apache License 2.0 5 votes vote down vote up
/**
 * Test sticks up a very large partitioned file and verifies that
 * it comes back unchanged.
 * @throws Throwable
 */
@Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT)
public void testManyPartitionedFile() throws Throwable {
  final Path path = new Path("/test/testManyPartitionedFile");

  int len = PART_SIZE_BYTES * 15;
  final byte[] src = SwiftTestUtils.dataset(len, 32, 144);
  FSDataOutputStream out = fs.create(path,
                                     false,
                                     getBufferSize(),
                                     (short) 1,
                                     BLOCK_SIZE);

  out.write(src, 0, src.length);
  int expected =
    getExpectedPartitionsWritten(len, PART_SIZE_BYTES, true);
  out.close();
  assertPartitionsWritten("write completed", out, expected);
  assertEquals("too few bytes written", len,
               SwiftNativeFileSystem.getBytesWritten(out));
  assertEquals("too few bytes uploaded", len,
               SwiftNativeFileSystem.getBytesUploaded(out));
  //now we verify that the data comes back. If it
  //doesn't, it means that the ordering of the partitions
  //isn't right
  byte[] dest = readDataset(fs, path, len);
  //compare data
  SwiftTestUtils.compareByteArrays(src, dest, len);
  //finally, check the data
  FileStatus[] stats = fs.listStatus(path);
  assertEquals("wrong entry count in "
               + SwiftTestUtils.dumpStats(path.toString(), stats),
               expected, stats.length);
}
 
Example 12
Source File: TestInMemoryFileSystem.java    From hudi with Apache License 2.0 5 votes vote down vote up
@Test
public void testCreateWriteGetFileAsBytes() throws IOException {
  Path outerInMemFSPath = getRandomOuterInMemPath();
  FSDataOutputStream out = outerInMemFSPath.getFileSystem(conf).create(outerInMemFSPath, true);
  // write random bytes
  byte[] randomBytes = new byte[RANDOM.nextInt(1000)];
  RANDOM.nextBytes(randomBytes);
  out.write(randomBytes);
  out.close();
  InMemoryFileSystem inMemoryFileSystem = (InMemoryFileSystem) outerInMemFSPath.getFileSystem(conf);
  byte[] bytesRead = inMemoryFileSystem.getFileAsBytes();
  assertArrayEquals(randomBytes, bytesRead);
  assertEquals(InMemoryFileSystem.SCHEME, inMemoryFileSystem.getScheme());
  assertEquals(URI.create(outerInMemFSPath.toString()), inMemoryFileSystem.getUri());
}
 
Example 13
Source File: TestHSync.java    From big-c with Apache License 2.0 5 votes vote down vote up
/** Test that syncBlock is correctly performed at replicas */
@Test
public void testHSyncWithReplication() throws Exception {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  final FileSystem fs = cluster.getFileSystem();

  final Path p = new Path("/testHSyncWithReplication/foo");
  final int len = 1 << 16;
  FSDataOutputStream out = fs.create(p, FsPermission.getDefault(),
      EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE, CreateFlag.SYNC_BLOCK),
      4096, (short) 3, len, null);
  out.write(1);
  out.hflush();
  checkSyncMetric(cluster, 0, 0);
  checkSyncMetric(cluster, 1, 0);
  checkSyncMetric(cluster, 2, 0);
  out.hsync();
  checkSyncMetric(cluster, 0, 1);
  checkSyncMetric(cluster, 1, 1);
  checkSyncMetric(cluster, 2, 1);
  out.hsync();
  checkSyncMetric(cluster, 0, 2);
  checkSyncMetric(cluster, 1, 2);
  checkSyncMetric(cluster, 2, 2);
  cluster.shutdown();
}
 
Example 14
Source File: TestLargeBlock.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * Writes pattern to file
 */
static void writeFile(FSDataOutputStream stm, final long fileSize) throws IOException {
  final int writeSize = pattern.length * 8 * 1024 * 1024; // write in chunks of 64 MB
  final int writeCount = (int) ((fileSize / ((long) writeSize)) + ((fileSize % ((long) writeSize) == 0L) ? 0L : 1L));

  if (writeSize > Integer.MAX_VALUE) {
    throw new IOException("A single write is too large " + writeSize);
  } 

  long bytesToWrite = fileSize;
  byte[] b = new byte[writeSize];

  // initialize buffer
  for (int j = 0; j < writeSize; j++) {
    b[j] = pattern[j % pattern.length];
  }

  int i = 0;

  while (bytesToWrite > 0) {
    int thiswrite = (int) Math.min(writeSize, bytesToWrite); // how many bytes we are writing in this iteration

    stm.write(b, 0, thiswrite);
    // System.out.println("Wrote[" + i + "/" + writeCount + "] " + thiswrite + " bytes.");
    bytesToWrite -= thiswrite;
    i++;
  }
}
 
Example 15
Source File: TestModTime.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void writeFile(FileSystem fileSys, Path name, int repl)
  throws IOException {
  // create and write a file that contains three blocks of data
  FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf()
      .getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096),
      (short) repl, blockSize);
  byte[] buffer = new byte[fileSize];
  Random rand = new Random(seed);
  rand.nextBytes(buffer);
  stm.write(buffer);
  stm.close();
}
 
Example 16
Source File: StringWriter.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void write(T element) throws IOException {
	FSDataOutputStream outputStream = getStream();
	outputStream.write(element.toString().getBytes(charset));
	outputStream.write(rowDelimiterBytes);
}
 
Example 17
Source File: TestShortCircuitLocalRead.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test(timeout=10000)
public void testSkipWithVerifyChecksum() throws IOException {
  int size = blockSize;
  Configuration conf = new Configuration();
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
  conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
      "/tmp/testSkipWithVerifyChecksum._PORT");
  DomainSocket.disableBindPathValidation();
  if (simulatedStorage) {
    SimulatedFSDataset.setFactory(conf);
  }
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
      .format(true).build();
  FileSystem fs = cluster.getFileSystem();
  try {
    // check that / exists
    Path path = new Path("/");
    assertTrue("/ should be a directory", fs.getFileStatus(path)
        .isDirectory() == true);
    
    byte[] fileData = AppendTestUtil.randomBytes(seed, size*3);
    // create a new file in home directory. Do not close it.
    Path file1 = new Path("filelocal.dat");
    FSDataOutputStream stm = createFile(fs, file1, 1);

    // write to file
    stm.write(fileData);
    stm.close();
    
    // now test the skip function
    FSDataInputStream instm = fs.open(file1);
    byte[] actual = new byte[fileData.length];
    // read something from the block first, otherwise BlockReaderLocal.skip()
    // will not be invoked
    int nread = instm.read(actual, 0, 3);
    long skipped = 2*size+3;
    instm.seek(skipped);
    nread = instm.read(actual, (int)(skipped + nread), 3);
    instm.close();
      
  } finally {
    fs.close();
    cluster.shutdown();
  }
}
 
Example 18
Source File: JobSplitWriter.java    From big-c with Apache License 2.0 4 votes vote down vote up
private static void writeSplitHeader(FSDataOutputStream out) 
throws IOException {
  out.write(SPLIT_FILE_HEADER);
  out.writeInt(splitVersion);
}
 
Example 19
Source File: TestShortCircuitLocalRead.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that file data can be read by reading the block file
 * directly from the local store.
 */
public void doTestShortCircuitReadImpl(boolean ignoreChecksum, int size,
    int readOffset, String shortCircuitUser, String readingUser,
    boolean legacyShortCircuitFails) throws IOException, InterruptedException {
  Configuration conf = new Configuration();
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
      ignoreChecksum);
  // Set a random client context name so that we don't share a cache with
  // other invocations of this function.
  conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT,
      UUID.randomUUID().toString());
  conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
      new File(sockDir.getDir(),
        "TestShortCircuitLocalRead._PORT.sock").getAbsolutePath());
  if (shortCircuitUser != null) {
    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
        shortCircuitUser);
    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, true);
  }
  if (simulatedStorage) {
    SimulatedFSDataset.setFactory(conf);
  }
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
      .format(true).build();
  FileSystem fs = cluster.getFileSystem();
  try {
    // check that / exists
    Path path = new Path("/");
    assertTrue("/ should be a directory", fs.getFileStatus(path)
        .isDirectory() == true);
    
    byte[] fileData = AppendTestUtil.randomBytes(seed, size);
    Path file1 = fs.makeQualified(new Path("filelocal.dat"));
    FSDataOutputStream stm = createFile(fs, file1, 1);
    stm.write(fileData);
    stm.close();
    
    URI uri = cluster.getURI();
    checkFileContent(uri, file1, fileData, readOffset, readingUser, conf,
        legacyShortCircuitFails);
    checkFileContentDirect(uri, file1, fileData, readOffset, readingUser,
        conf, legacyShortCircuitFails);
  } finally {
    fs.close();
    cluster.shutdown();
  }
}
 
Example 20
Source File: TestCombineFileInputFormat.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Test that directories do not get included as part of getSplits()
 */
@Test
public void testGetSplitsWithDirectory() throws Exception {
  MiniDFSCluster dfs = null;
  try {
    Configuration conf = new Configuration();
    dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1)
        .build();
    dfs.waitActive();

    FileSystem fileSys = dfs.getFileSystem();

    // Set up the following directory structure:
    // /dir1/: directory
    // /dir1/file: regular file
    // /dir1/dir2/: directory
    Path dir1 = new Path("/dir1");
    Path file = new Path("/dir1/file1");
    Path dir2 = new Path("/dir1/dir2");
    if (!fileSys.mkdirs(dir1)) {
      throw new IOException("Mkdirs failed to create " + dir1.toString());
    }
    FSDataOutputStream out = fileSys.create(file);
    out.write(new byte[0]);
    out.close();
    if (!fileSys.mkdirs(dir2)) {
      throw new IOException("Mkdirs failed to create " + dir2.toString());
    }

    // split it using a CombinedFile input format
    DummyInputFormat inFormat = new DummyInputFormat();
    Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, "/dir1");
    List<InputSplit> splits = inFormat.getSplits(job);

    // directories should be omitted from getSplits() - we should only see file1 and not dir2
    assertEquals(1, splits.size());
    CombineFileSplit fileSplit = (CombineFileSplit) splits.get(0);
    assertEquals(1, fileSplit.getNumPaths());
    assertEquals(file.getName(), fileSplit.getPath(0).getName());
    assertEquals(0, fileSplit.getOffset(0));
    assertEquals(0, fileSplit.getLength(0));
  } finally {
    if (dfs != null) {
      dfs.shutdown();
    }
  }
}