Java Code Examples for org.apache.hadoop.io.IOUtils#readFully()

The following examples show how to use org.apache.hadoop.io.IOUtils#readFully() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: FileBasedKVWriter.java    From incubator-tez with Apache License 2.0 6 votes vote down vote up
public byte[] getData() throws IOException {
  Preconditions.checkState(closed,
      "Only available after the Writer has been closed");
  FSDataInputStream inStream = null;
  byte[] buf = null;
  try {
    inStream = rfs.open(outputPath);
    buf = new byte[(int) getCompressedLength()];
    IOUtils.readFully(inStream, buf, 0, (int) getCompressedLength());
  } finally {
    if (inStream != null) {
      inStream.close();
    }
  }
  return buf;
}
 
Example 2
Source File: TestFuseDFS.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Check that the given file exists with the given contents */
private static void checkFile(File f, String expectedContents) 
    throws IOException {
  FileInputStream fi = new FileInputStream(f);
  int len = expectedContents.length();
  byte[] b = new byte[len];
  try {
    IOUtils.readFully(fi, b, 0, len);
  } catch (IOException ie) {
    fail("Reading "+f.getName()+" failed with "+ie.getMessage());
  } finally {
    fi.close(); // NB: leaving f unclosed prevents unmount
  }
  String s = new String(b, 0, len);
  assertEquals("File content differs", expectedContents, s);
}
 
Example 3
Source File: FileContextTestHelper.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public static byte[] readFile(FileContext fc, Path path, int len)
    throws IOException {
  DataInputStream dis = fc.open(path);
  byte[] buffer = new byte[len];
  IOUtils.readFully(dis, buffer, 0, len);
  dis.close();
  return buffer;
}
 
Example 4
Source File: TestFSInputChecker.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void checkFileCorruption(LocalFileSystem fileSys, Path file, 
                                 Path fileToCorrupt) throws IOException {
  
  // corrupt the file 
  RandomAccessFile out = 
    new RandomAccessFile(new File(fileToCorrupt.toString()), "rw");
  
  byte[] buf = new byte[(int)fileSys.getFileStatus(file).getLen()];    
  int corruptFileLen = (int)fileSys.getFileStatus(fileToCorrupt).getLen();
  assertTrue(buf.length >= corruptFileLen);
  
  rand.nextBytes(buf);
  out.seek(corruptFileLen/2);
  out.write(buf, 0, corruptFileLen/4);
  out.close();

  boolean gotException = false;
  
  InputStream in = fileSys.open(file);
  try {
    IOUtils.readFully(in, buf, 0, buf.length);
  } catch (ChecksumException e) {
    gotException = true;
  }
  assertTrue(gotException);
  in.close();    
}
 
Example 5
Source File: TestFsck.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public void checkSalvagedRemains() throws IOException {
  int chainIdx = 0;
  HdfsFileStatus status = dfsClient.getFileInfo(name);
  long length = status.getLen();
  int numBlocks = (int)((length + blockSize - 1) / blockSize);
  DFSInputStream in = null;
  byte[] blockBuffer = new byte[blockSize];

  try {
    for (int blockIdx = 0; blockIdx < numBlocks; blockIdx++) {
      if (blocksToCorrupt.contains(blockIdx)) {
        if (in != null) {
          in.close();
          in = null;
        }
        continue;
      }
      if (in == null) {
        in = dfsClient.open("/lost+found" + name + "/" + chainIdx);
        chainIdx++;
      }
      int len = blockBuffer.length;
      if (blockIdx == (numBlocks - 1)) {
        // The last block might not be full-length
        len = (int)(in.getFileLength() % blockSize);
        if (len == 0) len = blockBuffer.length;
      }
      IOUtils.readFully(in, blockBuffer, 0, len);
      int startIdx = blockIdx * blockSize;
      for (int i = 0; i < len; i++) {
        if (initialContents[startIdx + i] != blockBuffer[i]) {
          throw new IOException("salvaged file " + name + " differed " +
          "from what we expected on block " + blockIdx);
        }
      }
    }
  } finally {
    IOUtils.cleanup(null, in);
  }
}
 
Example 6
Source File: TestFsck.java    From big-c with Apache License 2.0 5 votes vote down vote up
private byte[] cacheInitialContents() throws IOException {
  HdfsFileStatus status = dfsClient.getFileInfo(name);
  byte[] content = new byte[(int)status.getLen()];
  DFSInputStream in = null;
  try {
    in = dfsClient.open(name);
    IOUtils.readFully(in, content, 0, content.length);
  } finally {
    in.close();
  }
  return content;
}
 
Example 7
Source File: TestFileConcurrentReader.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void assertBytesAvailable(
  FileSystem fileSystem,
  Path path,
  int numBytes
) throws IOException {
  byte[] buffer = new byte[numBytes];
  FSDataInputStream inputStream = fileSystem.open(path);
  IOUtils.readFully(inputStream, buffer, 0, numBytes);
  inputStream.close();

  assertTrue(
    "unable to validate bytes",
    validateSequentialBytes(buffer, 0, numBytes)
  );
}
 
Example 8
Source File: FileSystemTestWrapper.java    From big-c with Apache License 2.0 5 votes vote down vote up
public byte[] readFile(Path path, int len) throws IOException {
  DataInputStream dis = fs.open(path);
  byte[] buffer = new byte[len];
  IOUtils.readFully(dis, buffer, 0, len);
  dis.close();
  return buffer;
}
 
Example 9
Source File: TestTracingShortCircuitLocalRead.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Test
public void testShortCircuitTraceHooks() throws IOException {
  assumeTrue(NativeCodeLoader.isNativeCodeLoaded() && !Path.WINDOWS);
  conf = new Configuration();
  conf.set(DFSConfigKeys.DFS_CLIENT_HTRACE_PREFIX +
      SpanReceiverHost.SPAN_RECEIVERS_CONF_SUFFIX,
      TestTracing.SetSpanReceiver.class.getName());
  conf.setLong("dfs.blocksize", 100 * 1024);
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_KEY, true);
  conf.setBoolean(DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, false);
  conf.set(DFSConfigKeys.DFS_DOMAIN_SOCKET_PATH_KEY,
      "testShortCircuitTraceHooks._PORT");
  conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "CRC32C");
  cluster = new MiniDFSCluster.Builder(conf)
      .numDataNodes(1)
      .build();
  dfs = cluster.getFileSystem();

  try {
    DFSTestUtil.createFile(dfs, TEST_PATH, TEST_LENGTH, (short)1, 5678L);

    TraceScope ts = Trace.startSpan("testShortCircuitTraceHooks", Sampler.ALWAYS);
    FSDataInputStream stream = dfs.open(TEST_PATH);
    byte buf[] = new byte[TEST_LENGTH];
    IOUtils.readFully(stream, buf, 0, TEST_LENGTH);
    stream.close();
    ts.close();

    String[] expectedSpanNames = {
      "OpRequestShortCircuitAccessProto",
      "ShortCircuitShmRequestProto"
    };
    TestTracing.assertSpanNamesFound(expectedSpanNames);
  } finally {
    dfs.close();
    cluster.shutdown();
  }
}
 
Example 10
Source File: AdmmIterationHelper.java    From laser with Apache License 2.0 5 votes vote down vote up
public static String fsDataInputStreamToString(FSDataInputStream in,
		int inputSize) throws IOException {
	byte[] fileContents = new byte[inputSize];
	IOUtils.readFully(in, fileContents, 0, fileContents.length);
	String keyValue = new Text(fileContents).toString();
	return keyValue; // output from the last reduce job will be key | value
}
 
Example 11
Source File: PacketReceiver.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private static void doReadFully(ReadableByteChannel ch, InputStream in,
    ByteBuffer buf) throws IOException {
  if (ch != null) {
    readChannelFully(ch, buf);
  } else {
    Preconditions.checkState(!buf.isDirect(),
        "Must not use direct buffers with InputStream API");
    IOUtils.readFully(in, buf.array(),
        buf.arrayOffset() + buf.position(),
        buf.remaining());
    buf.position(buf.position() + buf.remaining());
  }
}
 
Example 12
Source File: FileContextTestWrapper.java    From big-c with Apache License 2.0 5 votes vote down vote up
public byte[] readFile(Path path, int len) throws IOException {
  DataInputStream dis = fc.open(path);
  byte[] buffer = new byte[len];
  IOUtils.readFully(dis, buffer, 0, len);
  dis.close();
  return buffer;
}
 
Example 13
Source File: BlockXCodingMerger.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/**
 * reads in the partial crc chunk and computes checksum of pre-existing data
 * in partial chunk.
 */
private void computePartialChunkCrc(long blkoff, long ckoff,
		int bytesPerChecksum) throws IOException {

	// find offset of the beginning of partial chunk.
	//
	int sizePartialChunk = (int) (blkoff % bytesPerChecksum);
	int checksumSize = checksum.getChecksumSize();
	blkoff = blkoff - sizePartialChunk;

	// create an input stream from the block file
	// and read in partial crc chunk into temporary buffer
	byte[] buf = new byte[sizePartialChunk];
	byte[] crcbuf = new byte[checksumSize];
	FSDataset.BlockInputStreams instr = null;
	try {
		instr = datanode.data.getTmpInputStreams(namespaceId, block,
				blkoff, ckoff);
		IOUtils.readFully(instr.dataIn, buf, 0, sizePartialChunk);

		// open meta file and read in crc value computer earlier
		IOUtils.readFully(instr.checksumIn, crcbuf, 0, crcbuf.length);
	} finally {
		IOUtils.closeStream(instr);
	}

	// compute crc of partial chunk from data read in the block file.
	partialCrc = new CRC32();
	partialCrc.update(buf, 0, sizePartialChunk);

	// paranoia! verify that the pre-computed crc matches what we
	// recalculated just now
	if (partialCrc.getValue() != FSInputChecker.checksum2long(crcbuf)) {
		String msg = "Partial CRC " + partialCrc.getValue()
				+ " does not match value computed the "
				+ " last time file was closed "
				+ FSInputChecker.checksum2long(crcbuf);
		throw new IOException(msg);
	}
}
 
Example 14
Source File: ReadRecordFactory.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public boolean next(GridmixKey key, GridmixRecord val) throws IOException {
  if (!factory.next(key, val)) {
    return false;
  }
  for (int len = (null == key ? 0 : key.getSize()) + val.getSize();
       len > 0; len -= buf.length) {
    IOUtils.readFully(src, buf, 0, Math.min(buf.length, len));
  }
  return true;
}
 
Example 15
Source File: BlockReceiver.java    From RDFS with Apache License 2.0 4 votes vote down vote up
/**
 * reads in the partial crc chunk and computes checksum
 * of pre-existing data in partial chunk.
 */
private void computePartialChunkCrc(long blkoff, long ckoff, 
                                    int bytesPerChecksum) throws IOException {

  // find offset of the beginning of partial chunk.
  //
  int sizePartialChunk = (int) (blkoff % bytesPerChecksum);
  int checksumSize = checksum.getChecksumSize();
  blkoff = blkoff - sizePartialChunk;
  LOG.info("computePartialChunkCrc sizePartialChunk " + 
            sizePartialChunk +
            " block " + block +
            " offset in block " + blkoff +
            " offset in metafile " + ckoff);

  // create an input stream from the block file
  // and read in partial crc chunk into temporary buffer
  //
  byte[] buf = new byte[sizePartialChunk];
  byte[] crcbuf = new byte[checksumSize];
  FSDataset.BlockInputStreams instr = null;
  try { 
    instr = datanode.data.getTmpInputStreams(namespaceId, block, blkoff, ckoff);
    IOUtils.readFully(instr.dataIn, buf, 0, sizePartialChunk);

    // open meta file and read in crc value computer earlier
    IOUtils.readFully(instr.checksumIn, crcbuf, 0, crcbuf.length);
  } finally {
    IOUtils.closeStream(instr);
  }

  // compute crc of partial chunk from data read in the block file.
  partialCrc = new CRC32();
  partialCrc.update(buf, 0, sizePartialChunk);
  LOG.info("Read in partial CRC chunk from disk for block " + block);

  // paranoia! verify that the pre-computed crc matches what we
  // recalculated just now
  if (partialCrc.getValue() != FSInputChecker.checksum2long(crcbuf)) {
    String msg = "Partial CRC " + partialCrc.getValue() +
                 " does not match value computed the " +
                 " last time file was closed " +
                 FSInputChecker.checksum2long(crcbuf);
    throw new IOException(msg);
  }
  //LOG.debug("Partial CRC matches 0x" + 
  //            Long.toHexString(partialCrc.getValue()));
}
 
Example 16
Source File: BlockReceiver.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * reads in the partial crc chunk and computes checksum
 * of pre-existing data in partial chunk.
 */
private Checksum computePartialChunkCrc(long blkoff, long ckoff)
    throws IOException {

  // find offset of the beginning of partial chunk.
  //
  int sizePartialChunk = (int) (blkoff % bytesPerChecksum);
  blkoff = blkoff - sizePartialChunk;
  if (LOG.isDebugEnabled()) {
    LOG.debug("computePartialChunkCrc for " + block
        + ": sizePartialChunk=" + sizePartialChunk
        + ", block offset=" + blkoff
        + ", metafile offset=" + ckoff);
  }

  // create an input stream from the block file
  // and read in partial crc chunk into temporary buffer
  //
  byte[] buf = new byte[sizePartialChunk];
  byte[] crcbuf = new byte[checksumSize];
  try (ReplicaInputStreams instr =
      datanode.data.getTmpInputStreams(block, blkoff, ckoff)) {
    IOUtils.readFully(instr.getDataIn(), buf, 0, sizePartialChunk);

    // open meta file and read in crc value computer earlier
    IOUtils.readFully(instr.getChecksumIn(), crcbuf, 0, crcbuf.length);
  }

  // compute crc of partial chunk from data read in the block file.
  final Checksum partialCrc = DataChecksum.newDataChecksum(
      diskChecksum.getChecksumType(), diskChecksum.getBytesPerChecksum());
  partialCrc.update(buf, 0, sizePartialChunk);
  if (LOG.isDebugEnabled()) {
    LOG.debug("Read in partial CRC chunk from disk for " + block);
  }

  // paranoia! verify that the pre-computed crc matches what we
  // recalculated just now
  if (partialCrc.getValue() != checksum2long(crcbuf)) {
    String msg = "Partial CRC " + partialCrc.getValue() +
                 " does not match value computed the " +
                 " last time file was closed " +
                 checksum2long(crcbuf);
    throw new IOException(msg);
  }
  return partialCrc;
}
 
Example 17
Source File: InMemoryMapOutput.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public void shuffle(MapHost host, InputStream input,
                    long compressedLength, long decompressedLength,
                    ShuffleClientMetrics metrics,
                    Reporter reporter) throws IOException {
  IFileInputStream checksumIn = 
    new IFileInputStream(input, compressedLength, conf);

  input = checksumIn;       

  // Are map-outputs compressed?
  if (codec != null) {
    decompressor.reset();
    input = codec.createInputStream(input, decompressor);
  }

  try {
    IOUtils.readFully(input, memory, 0, memory.length);
    metrics.inputBytes(memory.length);
    reporter.progress();
    LOG.info("Read " + memory.length + " bytes from map-output for " +
              getMapId());

    /**
     * We've gotten the amount of data we were expecting. Verify the
     * decompressor has nothing more to offer. This action also forces the
     * decompressor to read any trailing bytes that weren't critical
     * for decompression, which is necessary to keep the stream
     * in sync.
     */
    if (input.read() >= 0 ) {
      throw new IOException("Unexpected extra bytes from input stream for " +
                             getMapId());
    }

  } catch (IOException ioe) {      
    // Close the streams
    IOUtils.cleanup(LOG, input);

    // Re-throw
    throw ioe;
  } finally {
    CodecPool.returnDecompressor(decompressor);
  }
}
 
Example 18
Source File: IFile.java    From tez with Apache License 2.0 4 votes vote down vote up
public static boolean isCompressedFlagEnabled(InputStream in) throws IOException {
  byte[] header = new byte[HEADER.length];
  IOUtils.readFully(in, header, 0, HEADER.length);
  verifyHeaderMagic(header);
  return (header[3] == 1);
}
 
Example 19
Source File: WALCellCodec.java    From hbase with Apache License 2.0 4 votes vote down vote up
@Override
protected Cell parseCell() throws IOException {
  int keylength = StreamUtils.readRawVarint32(in);
  int vlength = StreamUtils.readRawVarint32(in);

  int tagsLength = StreamUtils.readRawVarint32(in);
  int length = 0;
  if(tagsLength == 0) {
    length = KeyValue.KEYVALUE_INFRASTRUCTURE_SIZE + keylength + vlength;
  } else {
    length = KeyValue.KEYVALUE_WITH_TAGS_INFRASTRUCTURE_SIZE + keylength + vlength + tagsLength;
  }

  byte[] backingArray = new byte[length];
  int pos = 0;
  pos = Bytes.putInt(backingArray, pos, keylength);
  pos = Bytes.putInt(backingArray, pos, vlength);

  // the row
  int elemLen = readIntoArray(backingArray, pos + Bytes.SIZEOF_SHORT,
    compression.getDictionary(CompressionContext.DictionaryIndex.ROW));
  checkLength(elemLen, Short.MAX_VALUE);
  pos = Bytes.putShort(backingArray, pos, (short)elemLen);
  pos += elemLen;

  // family
  elemLen = readIntoArray(backingArray, pos + Bytes.SIZEOF_BYTE,
    compression.getDictionary(CompressionContext.DictionaryIndex.FAMILY));
  checkLength(elemLen, Byte.MAX_VALUE);
  pos = Bytes.putByte(backingArray, pos, (byte)elemLen);
  pos += elemLen;

  // qualifier
  elemLen = readIntoArray(backingArray, pos,
    compression.getDictionary(CompressionContext.DictionaryIndex.QUALIFIER));
  pos += elemLen;

  // timestamp, type and value
  int tsTypeValLen = length - pos;
  if (tagsLength > 0) {
    tsTypeValLen = tsTypeValLen - tagsLength - KeyValue.TAGS_LENGTH_SIZE;
  }
  IOUtils.readFully(in, backingArray, pos, tsTypeValLen);
  pos += tsTypeValLen;

  // tags
  if (tagsLength > 0) {
    pos = Bytes.putAsShort(backingArray, pos, tagsLength);
    if (compression.tagCompressionContext != null) {
      compression.tagCompressionContext.uncompressTags(in, backingArray, pos, tagsLength);
    } else {
      IOUtils.readFully(in, backingArray, pos, tagsLength);
    }
  }
  return new KeyValue(backingArray, 0, length);
}
 
Example 20
Source File: TestPersistBlocks.java    From hadoop with Apache License 2.0 4 votes vote down vote up
@Test
public void testRestartWithPartialBlockHflushed() throws IOException {
  final Configuration conf = new HdfsConfiguration();
  // Turn off persistent IPC, so that the DFSClient can survive NN restart
  conf.setInt(
      CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY,
      0);
  MiniDFSCluster cluster = null;

  FSDataOutputStream stream;
  try {
    cluster = new MiniDFSCluster.Builder(conf).numDataNodes(3).build();
    FileSystem fs = cluster.getFileSystem();
    NameNode.getAddress(conf).getPort();
    // Creating a file with 4096 blockSize to write multiple blocks
    stream = fs.create(FILE_PATH, true, BLOCK_SIZE, (short) 1, BLOCK_SIZE);
    stream.write(DATA_BEFORE_RESTART);
    stream.write((byte)1);
    stream.hflush();
    
    // explicitly do NOT close the file before restarting the NN.
    cluster.restartNameNode();
    
    // this will fail if the final block of the file is prematurely COMPLETEd
    stream.write((byte)2);
    stream.hflush();
    stream.close();
    
    assertEquals(DATA_BEFORE_RESTART.length + 2,
        fs.getFileStatus(FILE_PATH).getLen());
    
    FSDataInputStream readStream = fs.open(FILE_PATH);
    try {
      byte[] verifyBuf = new byte[DATA_BEFORE_RESTART.length + 2];
      IOUtils.readFully(readStream, verifyBuf, 0, verifyBuf.length);
      byte[] expectedBuf = new byte[DATA_BEFORE_RESTART.length + 2];
      System.arraycopy(DATA_BEFORE_RESTART, 0, expectedBuf, 0,
          DATA_BEFORE_RESTART.length);
      System.arraycopy(new byte[]{1, 2}, 0, expectedBuf,
          DATA_BEFORE_RESTART.length, 2);
      assertArrayEquals(expectedBuf, verifyBuf);
    } finally {
      IOUtils.closeStream(readStream);
    }
  } finally {
    if (cluster != null) { cluster.shutdown(); }
  }
}