Java Code Examples for org.apache.hadoop.io.compress.CompressionInputStream#read()

The following examples show how to use org.apache.hadoop.io.compress.CompressionInputStream#read() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AbstractFileOutputOperatorTest.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Test
public void testSnappyCompressionSimple() throws IOException
{
  if (checkNativeSnappy()) {
    return;
  }

  File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy");

  BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile));
  Configuration conf = new Configuration();
  CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(SnappyCodec.class, conf);
  FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream(
      codec.createOutputStream(os));

  int ONE_MB = 1024 * 1024;

  String testStr = "TestSnap-16bytes";
  for (int i = 0; i < ONE_MB; i++) { // write 16 MBs
    filterStream.write(testStr.getBytes());
  }
  filterStream.flush();
  filterStream.close();

  CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile));

  byte[] recovered = new byte[testStr.length()];
  int bytesRead = is.read(recovered);
  is.close();
  assertEquals(testStr, new String(recovered));
}
 
Example 2
Source File: BGZFEnhancedGzipCodec.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Override
public SplitCompressionInputStream createInputStream(InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
  if (!(seekableIn instanceof Seekable)) {
    throw new IOException("seekableIn must be an instance of " +
        Seekable.class.getName());
  }
  if (!BlockCompressedInputStream.isValidFile(new BufferedInputStream(seekableIn))) {
    // data is regular gzip, not BGZF
    ((Seekable)seekableIn).seek(0);
    final CompressionInputStream compressionInputStream = createInputStream(seekableIn,
        decompressor);
    return new SplitCompressionInputStream(compressionInputStream, start, end) {
      @Override
      public int read(byte[] b, int off, int len) throws IOException {
        return compressionInputStream.read(b, off, len);
      }
      @Override
      public void resetState() throws IOException {
        compressionInputStream.resetState();
      }
      @Override
      public int read() throws IOException {
        return compressionInputStream.read();
      }
    };
  }
  BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn);
  long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end);
  ((Seekable)seekableIn).seek(adjustedStart);
  return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end);
}
 
Example 3
Source File: TestSnappyCodec.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void TestSnappyStream() throws IOException {
  SnappyCodec codec = new SnappyCodec();
  codec.setConf(new Configuration());
  
  int blockSize = 1024;
  int inputSize = blockSize * 1024;
 
  byte[] input = new byte[inputSize];
  for (int i = 0; i < inputSize; ++i) {
    input[i] = (byte)i;
  }

  ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
  
  CompressionOutputStream compressor = codec.createOutputStream(compressedStream);
  int bytesCompressed = 0;
  while (bytesCompressed < inputSize) {
    int len = Math.min(inputSize - bytesCompressed, blockSize);
    compressor.write(input, bytesCompressed, len);
    bytesCompressed += len;
  }
  compressor.finish();
  
  byte[] rawCompressed = Snappy.compress(input);
  byte[] codecCompressed = compressedStream.toByteArray();
  
  // Validate that the result from the codec is the same as if we compressed the 
  // buffer directly.
  assertArrayEquals(rawCompressed, codecCompressed);

  ByteArrayInputStream inputStream = new ByteArrayInputStream(codecCompressed);    
  CompressionInputStream decompressor = codec.createInputStream(inputStream);
  byte[] codecDecompressed = new byte[inputSize];
  int bytesDecompressed = 0;
  int numBytes;
  while ((numBytes = decompressor.read(codecDecompressed, bytesDecompressed, blockSize)) != 0) {
    bytesDecompressed += numBytes;
    if (bytesDecompressed == inputSize) break;
  }
  
  byte[] rawDecompressed = Snappy.uncompress(rawCompressed);
  
  assertArrayEquals(input, rawDecompressed);
  assertArrayEquals(input, codecDecompressed);
}