Java Code Examples for org.apache.hadoop.fs.FSDataOutputStream#write()

The following examples show how to use org.apache.hadoop.fs.FSDataOutputStream#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: CopyCommands.java From big-c with Apache License 2.0

6 votes

@Override
protected void processArguments(LinkedList<PathData> items)
throws IOException {
  super.processArguments(items);
  if (exitCode != 0) { // check for error collecting paths
    return;
  }
  FSDataOutputStream out = dst.fs.create(dst.path);
  try {
    for (PathData src : srcs) {
      FSDataInputStream in = src.fs.open(src.path);
      try {
        IOUtils.copyBytes(in, out, getConf(), false);
        if (delimiter != null) {
          out.write(delimiter.getBytes("UTF-8"));
        }
      } finally {
        in.close();
      }
    }
  } finally {
    out.close();
  }      
}

Example 2

Source File: ContinuousFileProcessingTest.java From flink with Apache License 2.0

6 votes

/**
 * Create a file with pre-determined String format of the form:
 * {@code fileIdx +": "+ sampleLine +" "+ lineNo}.
 * */
private static Tuple2<org.apache.hadoop.fs.Path, String> createFileAndFillWithData(
			String base, String fileName, int fileIdx, String sampleLine) throws IOException {

	assert (hdfs != null);

	final String fileRandSuffix = UUID.randomUUID().toString();

	org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(base + "/" + fileName + fileRandSuffix);
	Assert.assertFalse(hdfs.exists(file));

	org.apache.hadoop.fs.Path tmp = new org.apache.hadoop.fs.Path(base + "/." + fileName + fileRandSuffix);
	FSDataOutputStream stream = hdfs.create(tmp);
	StringBuilder str = new StringBuilder();
	for (int i = 0; i < LINES_PER_FILE; i++) {
		String line = fileIdx + ": " + sampleLine + " " + i + "\n";
		str.append(line);
		stream.write(line.getBytes(ConfigConstants.DEFAULT_CHARSET));
	}
	stream.close();

	hdfs.rename(tmp, file);

	Assert.assertTrue("No result file present", hdfs.exists(file));
	return new Tuple2<>(file, str.toString());
}

Example 3

Source File: TestFavoredNodesEndToEnd.java From big-c with Apache License 2.0

6 votes

@Test(timeout = 180000)
public void testFavoredNodesEndToEndForAppend() throws Exception {
  // create 10 files with random preferred nodes
  for (int i = 0; i < NUM_FILES; i++) {
    Random rand = new Random(System.currentTimeMillis() + i);
    // pass a new created rand so as to get a uniform distribution each time
    // without too much collisions (look at the do-while loop in getDatanodes)
    InetSocketAddress datanode[] = getDatanodes(rand);
    Path p = new Path("/filename" + i);
    // create and close the file.
    dfs.create(p, FsPermission.getDefault(), true, 4096, (short) 3, 4096L,
        null, null).close();
    // re-open for append
    FSDataOutputStream out = dfs.append(p, EnumSet.of(CreateFlag.APPEND),
        4096, null, datanode);
    out.write(SOME_BYTES);
    out.close();
    BlockLocation[] locations = getBlockLocations(p);
    // verify the files got created in the right nodes
    for (BlockLocation loc : locations) {
      String[] hosts = loc.getNames();
      String[] hosts1 = getStringForInetSocketAddrs(datanode);
      assertTrue(compareNodes(hosts, hosts1));
    }
  }
}

Example 4

Source File: TestBlocksScheduledCounter.java From RDFS with Apache License 2.0

6 votes

public void testBlocksScheduledCounter() throws IOException {
  
  MiniDFSCluster cluster = new MiniDFSCluster(new Configuration(), 1, 
                                              true, null);
  cluster.waitActive();
  FileSystem fs = cluster.getFileSystem();
  
  //open a file an write a few bytes:
  FSDataOutputStream out = fs.create(new Path("/testBlockScheduledCounter"));
  for (int i=0; i<1024; i++) {
    out.write(i);
  }
  // flush to make sure a block is allocated.
  ((DFSOutputStream)(out.getWrappedStream())).sync();
  
  ArrayList<DatanodeDescriptor> dnList = new ArrayList<DatanodeDescriptor>();
  cluster.getNameNode().namesystem.DFSNodesStatus(dnList, dnList);
  DatanodeDescriptor dn = dnList.get(0);
  
  assertEquals(1, dn.getBlocksScheduled());
 
  // close the file and the counter should go to zero.
  out.close();   
  assertEquals(0, dn.getBlocksScheduled());
}

Example 5

Source File: TestBlocksScheduledCounter.java From hadoop-gpu with Apache License 2.0

6 votes

public void testBlocksScheduledCounter() throws IOException {
  
  MiniDFSCluster cluster = new MiniDFSCluster(new Configuration(), 1, 
                                              true, null);
  cluster.waitActive();
  FileSystem fs = cluster.getFileSystem();
  
  //open a file an write a few bytes:
  FSDataOutputStream out = fs.create(new Path("/testBlockScheduledCounter"));
  for (int i=0; i<1024; i++) {
    out.write(i);
  }
  // flush to make sure a block is allocated.
  ((DFSOutputStream)(out.getWrappedStream())).sync();
  
  ArrayList<DatanodeDescriptor> dnList = new ArrayList<DatanodeDescriptor>();
  cluster.getNameNode().namesystem.DFSNodesStatus(dnList, dnList);
  DatanodeDescriptor dn = dnList.get(0);
  
  assertEquals(1, dn.getBlocksScheduled());
 
  // close the file and the counter should go to zero.
  out.close();   
  assertEquals(0, dn.getBlocksScheduled());
}

Example 6

Source File: TestLeaseRecovery3.java From RDFS with Apache License 2.0

6 votes

private Path createFile(DistributedFileSystem dfs, int size
    ) throws IOException, InterruptedException {
  // create a random file name
  String filestr = "/foo" + AppendTestUtil.nextInt();
  System.out.println("filestr=" + filestr);
  Path filepath = new Path(filestr);
  FSDataOutputStream stm = dfs.create(filepath, true,
      bufferSize, REPLICATION_NUM, BLOCK_SIZE);
  assertTrue(dfs.dfs.exists(filestr));

  // write random number of bytes into it.
  System.out.println("size=" + size);
  stm.write(buffer, 0, size);

  // sync file
  AppendTestUtil.LOG.info("sync");
  stm.sync();

  // write another piece of data to file. This piece of data
  // is not yet synced
  stm.write(buffer, 0, size);
  return filepath;
}

Example 7

Source File: OfflineMetaRebuildTestCore.java From hbase with Apache License 2.0

6 votes

protected RegionInfo createRegion(Configuration conf, final Table htbl,
    byte[] startKey, byte[] endKey) throws IOException {
  Table meta = TEST_UTIL.getConnection().getTable(TableName.META_TABLE_NAME);
  RegionInfo hri = RegionInfoBuilder.newBuilder(htbl.getName())
      .setStartKey(startKey)
      .setEndKey(endKey)
      .build();

  LOG.info("manually adding regioninfo and hdfs data: " + hri.toString());
  Path rootDir = CommonFSUtils.getRootDir(conf);
  FileSystem fs = rootDir.getFileSystem(conf);
  Path p = new Path(CommonFSUtils.getTableDir(rootDir, htbl.getName()),
      hri.getEncodedName());
  fs.mkdirs(p);
  Path riPath = new Path(p, HRegionFileSystem.REGION_INFO_FILE);
  FSDataOutputStream out = fs.create(riPath);
  out.write(RegionInfo.toDelimitedByteArray(hri));
  out.close();

  // add to meta.
  MetaTableAccessor.addRegionToMeta(TEST_UTIL.getConnection(), hri);
  meta.close();
  return hri;
}

Example 8

Source File: TestWasbFsck.java From hadoop with Apache License 2.0

6 votes

/**
 * Tests that we delete dangling files properly
 */
@Test
public void testDelete() throws Exception {
  Path danglingFile = new Path("/crashedInTheMiddle");

  // Create a file and leave it dangling and try to delete it.
  FSDataOutputStream stream = fs.create(danglingFile);
  stream.write(new byte[] { 1, 2, 3 });
  stream.flush();

  // Now we should still only see a zero-byte file in this place
  FileStatus fileStatus = fs.getFileStatus(danglingFile);
  assertNotNull(fileStatus);
  assertEquals(0, fileStatus.getLen());
  assertEquals(1, getNumTempBlobs());

  // Run WasbFsck -delete to delete the file.
  runFsck("-delete");

  // Now we should see no trace of the file.
  assertEquals(0, getNumTempBlobs());
  assertFalse(fs.exists(danglingFile));
}

Example 9

Source File: BinaryReader.java From marklogic-contentpump with Apache License 2.0

5 votes

@Override
public void write(DocumentURI uri, BytesWritable content)
        throws IOException, InterruptedException {
    String pathStr = dir.getName() + uri.getUri();
    Path path = new Path(pathStr);
    FileSystem fs = path.getFileSystem(conf);
    FSDataOutputStream out = fs.create(path, false);
    System.out.println("writing to: " + path);
    out.write(content.getBytes(), 0, content.getLength());
    out.flush();
    out.close();
}

Example 10

Source File: TestFSOutputSummer.java From RDFS with Apache License 2.0

5 votes

private void writeFile3(Path name) throws Exception {
  FSDataOutputStream stm = fileSys.create(name, true, 
      fileSys.getConf().getInt("io.file.buffer.size", 4096),
      NUM_OF_DATANODES, BLOCK_SIZE);
  stm.write(expected, 0, HALF_CHUNK_SIZE);
  stm.write(expected, HALF_CHUNK_SIZE, BYTES_PER_CHECKSUM+2);
  stm.write(expected, HALF_CHUNK_SIZE+BYTES_PER_CHECKSUM+2, 2);
  stm.write(expected, HALF_CHUNK_SIZE+BYTES_PER_CHECKSUM+4, HALF_CHUNK_SIZE);
  stm.write(expected, BLOCK_SIZE+4, BYTES_PER_CHECKSUM-4);
  stm.write(expected, BLOCK_SIZE+BYTES_PER_CHECKSUM, 
      FILE_SIZE-3*BYTES_PER_CHECKSUM);
  stm.close();
  checkFile(name);
  cleanupFile(name);
}

Example 11

Source File: TestSwiftFileSystemPartitionedUploads.java From hadoop with Apache License 2.0

5 votes

/**
 * Test sticks up a very large partitioned file and verifies that
 * it comes back unchanged.
 * @throws Throwable
 */
@Test(timeout = SWIFT_BULK_IO_TEST_TIMEOUT)
public void testManyPartitionedFile() throws Throwable {
  final Path path = new Path("/test/testManyPartitionedFile");

  int len = PART_SIZE_BYTES * 15;
  final byte[] src = SwiftTestUtils.dataset(len, 32, 144);
  FSDataOutputStream out = fs.create(path,
                                     false,
                                     getBufferSize(),
                                     (short) 1,
                                     BLOCK_SIZE);

  out.write(src, 0, src.length);
  int expected =
    getExpectedPartitionsWritten(len, PART_SIZE_BYTES, true);
  out.close();
  assertPartitionsWritten("write completed", out, expected);
  assertEquals("too few bytes written", len,
               SwiftNativeFileSystem.getBytesWritten(out));
  assertEquals("too few bytes uploaded", len,
               SwiftNativeFileSystem.getBytesUploaded(out));
  //now we verify that the data comes back. If it
  //doesn't, it means that the ordering of the partitions
  //isn't right
  byte[] dest = readDataset(fs, path, len);
  //compare data
  SwiftTestUtils.compareByteArrays(src, dest, len);
  //finally, check the data
  FileStatus[] stats = fs.listStatus(path);
  assertEquals("wrong entry count in "
               + SwiftTestUtils.dumpStats(path.toString(), stats),
               expected, stats.length);
}

Example 12

Source File: TestInMemoryFileSystem.java From hudi with Apache License 2.0

5 votes

@Test
public void testCreateWriteGetFileAsBytes() throws IOException {
  Path outerInMemFSPath = getRandomOuterInMemPath();
  FSDataOutputStream out = outerInMemFSPath.getFileSystem(conf).create(outerInMemFSPath, true);
  // write random bytes
  byte[] randomBytes = new byte[RANDOM.nextInt(1000)];
  RANDOM.nextBytes(randomBytes);
  out.write(randomBytes);
  out.close();
  InMemoryFileSystem inMemoryFileSystem = (InMemoryFileSystem) outerInMemFSPath.getFileSystem(conf);
  byte[] bytesRead = inMemoryFileSystem.getFileAsBytes();
  assertArrayEquals(randomBytes, bytesRead);
  assertEquals(InMemoryFileSystem.SCHEME, inMemoryFileSystem.getScheme());
  assertEquals(URI.create(outerInMemFSPath.toString()), inMemoryFileSystem.getUri());
}

Example 13

Source File: TestHSync.java From big-c with Apache License 2.0

5 votes

/** Test that syncBlock is correctly performed at replicas */
@Test
public void testHSyncWithReplication() throws Exception {
  Configuration conf = new HdfsConfiguration();
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
  final FileSystem fs = cluster.getFileSystem();

  final Path p = new Path("/testHSyncWithReplication/foo");
  final int len = 1 << 16;
  FSDataOutputStream out = fs.create(p, FsPermission.getDefault(),
      EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE, CreateFlag.SYNC_BLOCK),
      4096, (short) 3, len, null);
  out.write(1);
  out.hflush();
  checkSyncMetric(cluster, 0, 0);
  checkSyncMetric(cluster, 1, 0);
  checkSyncMetric(cluster, 2, 0);
  out.hsync();
  checkSyncMetric(cluster, 0, 1);
  checkSyncMetric(cluster, 1, 1);
  checkSyncMetric(cluster, 2, 1);
  out.hsync();
  checkSyncMetric(cluster, 0, 2);
  checkSyncMetric(cluster, 1, 2);
  checkSyncMetric(cluster, 2, 2);
  cluster.shutdown();
}

Example 14

Source File: TestLargeBlock.java From RDFS with Apache License 2.0

5 votes

/**
 * Writes pattern to file
 */
static void writeFile(FSDataOutputStream stm, final long fileSize) throws IOException {
  final int writeSize = pattern.length * 8 * 1024 * 1024; // write in chunks of 64 MB
  final int writeCount = (int) ((fileSize / ((long) writeSize)) + ((fileSize % ((long) writeSize) == 0L) ? 0L : 1L));

  if (writeSize > Integer.MAX_VALUE) {
    throw new IOException("A single write is too large " + writeSize);
  } 

  long bytesToWrite = fileSize;
  byte[] b = new byte[writeSize];

  // initialize buffer
  for (int j = 0; j < writeSize; j++) {
    b[j] = pattern[j % pattern.length];
  }

  int i = 0;

  while (bytesToWrite > 0) {
    int thiswrite = (int) Math.min(writeSize, bytesToWrite); // how many bytes we are writing in this iteration

    stm.write(b, 0, thiswrite);
    // System.out.println("Wrote[" + i + "/" + writeCount + "] " + thiswrite + " bytes.");
    bytesToWrite -= thiswrite;
    i++;
  }
}

Example 15

Source File: TestModTime.java From big-c with Apache License 2.0

5 votes

private void writeFile(FileSystem fileSys, Path name, int repl)
  throws IOException {
  // create and write a file that contains three blocks of data
  FSDataOutputStream stm = fileSys.create(name, true, fileSys.getConf()
      .getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY, 4096),
      (short) repl, blockSize);
  byte[] buffer = new byte[fileSize];
  Random rand = new Random(seed);
  rand.nextBytes(buffer);
  stm.write(buffer);
  stm.close();
}

Example 16

Source File: StringWriter.java From flink with Apache License 2.0

4 votes

@Override
public void write(T element) throws IOException {
	FSDataOutputStream outputStream = getStream();
	outputStream.write(element.toString().getBytes(charset));
	outputStream.write(rowDelimiterBytes);
}

Example 17

Source File: TestShortCircuitLocalRead.java From hadoop with Apache License 2.0

4 votes

@Test(timeout=10000)
public void testSkipWithVerifyChecksum() throws IOException {
  int size = blockSize;
  Configuration conf = new Configuration();
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
  conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
      "/tmp/testSkipWithVerifyChecksum._PORT");
  DomainSocket.disableBindPathValidation();
  if (simulatedStorage) {
    SimulatedFSDataset.setFactory(conf);
  }
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
      .format(true).build();
  FileSystem fs = cluster.getFileSystem();
  try {
    // check that / exists
    Path path = new Path("/");
    assertTrue("/ should be a directory", fs.getFileStatus(path)
        .isDirectory() == true);
    
    byte[] fileData = AppendTestUtil.randomBytes(seed, size*3);
    // create a new file in home directory. Do not close it.
    Path file1 = new Path("filelocal.dat");
    FSDataOutputStream stm = createFile(fs, file1, 1);

    // write to file
    stm.write(fileData);
    stm.close();
    
    // now test the skip function
    FSDataInputStream instm = fs.open(file1);
    byte[] actual = new byte[fileData.length];
    // read something from the block first, otherwise BlockReaderLocal.skip()
    // will not be invoked
    int nread = instm.read(actual, 0, 3);
    long skipped = 2*size+3;
    instm.seek(skipped);
    nread = instm.read(actual, (int)(skipped + nread), 3);
    instm.close();
      
  } finally {
    fs.close();
    cluster.shutdown();
  }
}

Example 18

Source File: JobSplitWriter.java From big-c with Apache License 2.0

4 votes

private static void writeSplitHeader(FSDataOutputStream out) 
throws IOException {
  out.write(SPLIT_FILE_HEADER);
  out.writeInt(splitVersion);
}

Example 19

Source File: TestShortCircuitLocalRead.java From big-c with Apache License 2.0

4 votes

/**
 * Test that file data can be read by reading the block file
 * directly from the local store.
 */
public void doTestShortCircuitReadImpl(boolean ignoreChecksum, int size,
    int readOffset, String shortCircuitUser, String readingUser,
    boolean legacyShortCircuitFails) throws IOException, InterruptedException {
  Configuration conf = new Configuration();
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY,
      ignoreChecksum);
  // Set a random client context name so that we don't share a cache with
  // other invocations of this function.
  conf.set(DFSConfigKeys.DFS_CLIENT_CONTEXT,
      UUID.randomUUID().toString());
  conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
      new File(sockDir.getDir(),
        "TestShortCircuitLocalRead._PORT.sock").getAbsolutePath());
  if (shortCircuitUser != null) {
    conf.set(DFSConfigKeys.DFS_BLOCK_LOCAL_PATH_ACCESS_USER_KEY,
        shortCircuitUser);
    conf.setBoolean(DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADERLOCAL, true);
  }
  if (simulatedStorage) {
    SimulatedFSDataset.setFactory(conf);
  }
  MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1)
      .format(true).build();
  FileSystem fs = cluster.getFileSystem();
  try {
    // check that / exists
    Path path = new Path("/");
    assertTrue("/ should be a directory", fs.getFileStatus(path)
        .isDirectory() == true);
    
    byte[] fileData = AppendTestUtil.randomBytes(seed, size);
    Path file1 = fs.makeQualified(new Path("filelocal.dat"));
    FSDataOutputStream stm = createFile(fs, file1, 1);
    stm.write(fileData);
    stm.close();
    
    URI uri = cluster.getURI();
    checkFileContent(uri, file1, fileData, readOffset, readingUser, conf,
        legacyShortCircuitFails);
    checkFileContentDirect(uri, file1, fileData, readOffset, readingUser,
        conf, legacyShortCircuitFails);
  } finally {
    fs.close();
    cluster.shutdown();
  }
}

Example 20

Source File: TestCombineFileInputFormat.java From big-c with Apache License 2.0

4 votes

/**
 * Test that directories do not get included as part of getSplits()
 */
@Test
public void testGetSplitsWithDirectory() throws Exception {
  MiniDFSCluster dfs = null;
  try {
    Configuration conf = new Configuration();
    dfs = new MiniDFSCluster.Builder(conf).racks(rack1).hosts(hosts1)
        .build();
    dfs.waitActive();

    FileSystem fileSys = dfs.getFileSystem();

    // Set up the following directory structure:
    // /dir1/: directory
    // /dir1/file: regular file
    // /dir1/dir2/: directory
    Path dir1 = new Path("/dir1");
    Path file = new Path("/dir1/file1");
    Path dir2 = new Path("/dir1/dir2");
    if (!fileSys.mkdirs(dir1)) {
      throw new IOException("Mkdirs failed to create " + dir1.toString());
    }
    FSDataOutputStream out = fileSys.create(file);
    out.write(new byte[0]);
    out.close();
    if (!fileSys.mkdirs(dir2)) {
      throw new IOException("Mkdirs failed to create " + dir2.toString());
    }

    // split it using a CombinedFile input format
    DummyInputFormat inFormat = new DummyInputFormat();
    Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, "/dir1");
    List<InputSplit> splits = inFormat.getSplits(job);

    // directories should be omitted from getSplits() - we should only see file1 and not dir2
    assertEquals(1, splits.size());
    CombineFileSplit fileSplit = (CombineFileSplit) splits.get(0);
    assertEquals(1, fileSplit.getNumPaths());
    assertEquals(file.getName(), fileSplit.getPath(0).getName());
    assertEquals(0, fileSplit.getOffset(0));
    assertEquals(0, fileSplit.getLength(0));
  } finally {
    if (dfs != null) {
      dfs.shutdown();
    }
  }
}