org.apache.hadoop.io.compress.CompressionInputStream Java Examples

The following examples show how to use org.apache.hadoop.io.compress.CompressionInputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HadoopUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
public static BufferedReader getBufferedReader(File file, MapredContext context)
        throws IOException {
    URI fileuri = file.toURI();
    Path path = new Path(fileuri);

    Configuration conf = context.getJobConf();
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(path);

    if (codec == null) {
        return new BufferedReader(new FileReader(file));
    } else {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        FileInputStream fis = new FileInputStream(file);
        CompressionInputStream cis = codec.createInputStream(fis, decompressor);
        BufferedReader br = new BufferedReaderExt(new InputStreamReader(cis), decompressor);
        return br;
    }
}
 
Example #2
Source File: SequenceFile.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
/** Read a compressed buffer */
private synchronized void readBuffer(DataInputBuffer buffer, 
                                     CompressionInputStream filter) throws IOException {
  // Read data into a temporary buffer
  DataOutputBuffer dataBuffer = new DataOutputBuffer();

  try {
    int dataBufferLength = WritableUtils.readVInt(in);
    dataBuffer.write(in, dataBufferLength);
  
    // Set up 'buffer' connected to the input-stream
    buffer.reset(dataBuffer.getData(), 0, dataBuffer.getLength());
  } finally {
    dataBuffer.close();
  }

  // Reset the codec
  filter.resetState();
}
 
Example #3
Source File: TestShuffleUtils.java    From tez with Apache License 2.0 6 votes vote down vote up
@Test
public void testInternalErrorTranslation() throws Exception {
  String codecErrorMsg = "codec failure";
  CompressionInputStream mockCodecStream = mock(CompressionInputStream.class);
  when(mockCodecStream.read(any(byte[].class), anyInt(), anyInt()))
      .thenThrow(new InternalError(codecErrorMsg));
  Decompressor mockDecoder = mock(Decompressor.class);
  CompressionCodec mockCodec = mock(ConfigurableCodecForTest.class);
  when(mockCodec.createDecompressor()).thenReturn(mockDecoder);
  when(mockCodec.createInputStream(any(InputStream.class), any(Decompressor.class)))
      .thenReturn(mockCodecStream);
  byte[] header = new byte[] { (byte) 'T', (byte) 'I', (byte) 'F', (byte) 1};
  try {
    ShuffleUtils.shuffleToMemory(new byte[1024], new ByteArrayInputStream(header),
        1024, 128, mockCodec, false, 0, mock(Logger.class), null);
    Assert.fail("shuffle was supposed to throw!");
  } catch (IOException e) {
    Assert.assertTrue(e.getCause() instanceof InternalError);
    Assert.assertTrue(e.getMessage().contains(codecErrorMsg));
  }
}
 
Example #4
Source File: SequenceFile.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
/** Read a compressed buffer */
private synchronized void readBuffer(DataInputBuffer buffer, 
                                     CompressionInputStream filter) throws IOException {
  // Read data into a temporary buffer
  DataOutputBuffer dataBuffer = new DataOutputBuffer();

  try {
    int dataBufferLength = WritableUtils.readVInt(in);
    dataBuffer.write(in, dataBufferLength);
  
    // Set up 'buffer' connected to the input-stream
    buffer.reset(dataBuffer.getData(), 0, dataBuffer.getLength());
  } finally {
    dataBuffer.close();
  }

  // Reset the codec
  filter.resetState();
}
 
Example #5
Source File: SequenceFile.java    From RDFS with Apache License 2.0 6 votes vote down vote up
/** Read a compressed buffer */
private synchronized void readBuffer(DataInputBuffer buffer, 
                                     CompressionInputStream filter) throws IOException {
  // Read data into a temporary buffer
  DataOutputBuffer dataBuffer = new DataOutputBuffer();

  try {
    int dataBufferLength = WritableUtils.readVInt(in);
    dataBuffer.write(in, dataBufferLength);
  
    // Set up 'buffer' connected to the input-stream
    buffer.reset(dataBuffer.getData(), 0, dataBuffer.getLength());
  } finally {
    dataBuffer.close();
  }

  // Reset the codec
  filter.resetState();
}
 
Example #6
Source File: SnappyFramedCodecTest.java    From kangaroo with Apache License 2.0 6 votes vote down vote up
@Test
public void testCompressAndDecompressConsistent() throws Exception {
    final String testString = "Test String";
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final OutputStreamWriter writer = new OutputStreamWriter(subject.createOutputStream(baos));
    writer.write(testString);
    writer.flush();
    writer.close();

    final CompressionInputStream inputStream = subject.createInputStream(new ByteArrayInputStream(baos
            .toByteArray()));
    final StringWriter contentsTester = new StringWriter();
    IOUtils.copy(inputStream, contentsTester);
    inputStream.close();
    contentsTester.flush();
    contentsTester.close();

    Assert.assertEquals(testString, contentsTester.toString());
}
 
Example #7
Source File: SequenceFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
/** Read a compressed buffer */
private synchronized void readBuffer(DataInputBuffer buffer, 
                                     CompressionInputStream filter) throws IOException {
  // Read data into a temporary buffer
  DataOutputBuffer dataBuffer = new DataOutputBuffer();

  try {
    int dataBufferLength = WritableUtils.readVInt(in);
    dataBuffer.write(in, dataBufferLength);
  
    // Set up 'buffer' connected to the input-stream
    buffer.reset(dataBuffer.getData(), 0, dataBuffer.getLength());
  } finally {
    dataBuffer.close();
  }

  // Reset the codec
  filter.resetState();
}
 
Example #8
Source File: SequenceFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Read a compressed buffer */
private synchronized void readBuffer(DataInputBuffer buffer, 
                                     CompressionInputStream filter) throws IOException {
  // Read data into a temporary buffer
  DataOutputBuffer dataBuffer = new DataOutputBuffer();

  try {
    int dataBufferLength = WritableUtils.readVInt(in);
    dataBuffer.write(in, dataBufferLength);
  
    // Set up 'buffer' connected to the input-stream
    buffer.reset(dataBuffer.getData(), 0, dataBuffer.getLength());
  } finally {
    dataBuffer.close();
  }

  // Reset the codec
  filter.resetState();
}
 
Example #9
Source File: SequenceFile.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
/** Read a compressed buffer */
private synchronized void readBuffer(DataInputBuffer buffer, 
                                     CompressionInputStream filter) throws IOException {
  // Read data into a temporary buffer
  DataOutputBuffer dataBuffer = new DataOutputBuffer();

  try {
    int dataBufferLength = WritableUtils.readVInt(in);
    dataBuffer.write(in, dataBufferLength);
  
    // Set up 'buffer' connected to the input-stream
    buffer.reset(dataBuffer.getData(), 0, dataBuffer.getLength());
  } finally {
    dataBuffer.close();
  }

  // Reset the codec
  filter.resetState();
}
 
Example #10
Source File: CellBlockBuilder.java    From hbase with Apache License 2.0 6 votes vote down vote up
private ByteBuffer decompress(CompressionCodec compressor, InputStream cellBlockStream,
    int osInitialSize) throws IOException {
  // GZIPCodec fails w/ NPE if no configuration.
  if (compressor instanceof Configurable) {
    ((Configurable) compressor).setConf(this.conf);
  }
  Decompressor poolDecompressor = CodecPool.getDecompressor(compressor);
  CompressionInputStream cis = compressor.createInputStream(cellBlockStream, poolDecompressor);
  ByteBufferOutputStream bbos;
  try {
    // TODO: This is ugly. The buffer will be resized on us if we guess wrong.
    // TODO: Reuse buffers.
    bbos = new ByteBufferOutputStream(osInitialSize);
    IOUtils.copy(cis, bbos);
    bbos.close();
    return bbos.getByteBuffer();
  } finally {
    CodecPool.returnDecompressor(poolDecompressor);
  }
}
 
Example #11
Source File: FileReaderWriterFactoryTest.java    From secor with Apache License 2.0 5 votes vote down vote up
private void mockDelimitedTextFileWriter(boolean isCompressed) throws Exception {
    PowerMockito.mockStatic(FileSystem.class);
    FileSystem fs = Mockito.mock(FileSystem.class);
    Mockito.when(
            FileSystem.get(Mockito.any(URI.class),
                    Mockito.any(Configuration.class))).thenReturn(fs);

    Path fsPath = (!isCompressed) ? new Path(PATH) : new Path(PATH_GZ);

    GzipCodec codec = PowerMockito.mock(GzipCodec.class);
    PowerMockito.whenNew(GzipCodec.class).withNoArguments()
            .thenReturn(codec);

    FSDataInputStream fileInputStream = Mockito
            .mock(FSDataInputStream.class);
    FSDataOutputStream fileOutputStream = Mockito
            .mock(FSDataOutputStream.class);

    Mockito.when(fs.open(fsPath)).thenReturn(fileInputStream);
    Mockito.when(fs.create(fsPath)).thenReturn(fileOutputStream);

    CompressionInputStream inputStream = Mockito
            .mock(CompressionInputStream.class);
    CompressionOutputStream outputStream = Mockito
            .mock(CompressionOutputStream.class);
    Mockito.when(codec.createInputStream(Mockito.any(InputStream.class)))
            .thenReturn(inputStream);

    Mockito.when(codec.createOutputStream(Mockito.any(OutputStream.class)))
            .thenReturn(outputStream);
}
 
Example #12
Source File: AbstractFileOutputOperatorTest.java    From attic-apex-malhar with Apache License 2.0 5 votes vote down vote up
@Test
public void testSnappyCompressionSimple() throws IOException
{
  if (checkNativeSnappy()) {
    return;
  }

  File snappyFile = new File(testMeta.getDir(), "snappyTestFile.snappy");

  BufferedOutputStream os = new BufferedOutputStream(new FileOutputStream(snappyFile));
  Configuration conf = new Configuration();
  CompressionCodec codec = (CompressionCodec)ReflectionUtils.newInstance(SnappyCodec.class, conf);
  FilterStreamCodec.SnappyFilterStream filterStream = new FilterStreamCodec.SnappyFilterStream(
      codec.createOutputStream(os));

  int ONE_MB = 1024 * 1024;

  String testStr = "TestSnap-16bytes";
  for (int i = 0; i < ONE_MB; i++) { // write 16 MBs
    filterStream.write(testStr.getBytes());
  }
  filterStream.flush();
  filterStream.close();

  CompressionInputStream is = codec.createInputStream(new FileInputStream(snappyFile));

  byte[] recovered = new byte[testStr.length()];
  int bytesRead = is.read(recovered);
  is.close();
  assertEquals(testStr, new String(recovered));
}
 
Example #13
Source File: MainframeVBRecordReader.java    From Cobol-to-Hive with Apache License 2.0 5 votes vote down vote up
public void initialize(Configuration job, long splitStart,
		long splitLength, Path file) throws IOException {

	start = splitStart;
	end = start + splitLength;
	LOG.info("Start of the split:" + start + "-End of split:" + end);
	LOG.debug("VLR initialize started: start pos:" + start + "endpos:"
			+ end);

	// open the file and seek to the start of the split
	final FileSystem fs = file.getFileSystem(job);
	fileIn = fs.open(file);

	CompressionCodec codec = new CompressionCodecFactory(job)
			.getCodec(file);
	if (null != codec) {
		isCompressedInput = true;
		decompressor = CodecPool.getDecompressor(codec);
		CompressionInputStream cIn = codec.createInputStream(fileIn,
				decompressor);
		filePosition = (Seekable) cIn;
		inputStream = cIn;
		LOG.info("Compressed input; cannot compute number of records in the split");
	} else {
		fileIn.seek(start);
		filePosition = fileIn;
		inputStream = fileIn;
		numBytesRemainingInSplit = splitLength;
		LOG.info("Variable length input; cannot compute number of records in the split");

	}
	this.pos = start;
}
 
Example #14
Source File: Compression.java    From hbase with Apache License 2.0 5 votes vote down vote up
public InputStream createDecompressionStream(
    InputStream downStream, Decompressor decompressor,
    int downStreamBufferSize) throws IOException {
  CompressionCodec codec = getCodec(conf);
  // Set the internal buffer size to read from down stream.
  if (downStreamBufferSize > 0) {
    ((Configurable)codec).getConf().setInt("io.file.buffer.size",
        downStreamBufferSize);
  }
  CompressionInputStream cis =
      codec.createInputStream(downStream, decompressor);
  BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
  return bis2;

}
 
Example #15
Source File: BGZFEnhancedGzipCodec.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Override
public SplitCompressionInputStream createInputStream(InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
  if (!(seekableIn instanceof Seekable)) {
    throw new IOException("seekableIn must be an instance of " +
        Seekable.class.getName());
  }
  if (!BlockCompressedInputStream.isValidFile(new BufferedInputStream(seekableIn))) {
    // data is regular gzip, not BGZF
    ((Seekable)seekableIn).seek(0);
    final CompressionInputStream compressionInputStream = createInputStream(seekableIn,
        decompressor);
    return new SplitCompressionInputStream(compressionInputStream, start, end) {
      @Override
      public int read(byte[] b, int off, int len) throws IOException {
        return compressionInputStream.read(b, off, len);
      }
      @Override
      public void resetState() throws IOException {
        compressionInputStream.resetState();
      }
      @Override
      public int read() throws IOException {
        return compressionInputStream.read();
      }
    };
  }
  BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn);
  long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end);
  ((Seekable)seekableIn).seek(adjustedStart);
  return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end);
}
 
Example #16
Source File: Compression.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized InputStream createDecompressionStream(
    InputStream downStream, Decompressor decompressor,
    int downStreamBufferSize) throws IOException {
  // Set the internal buffer size to read from down stream.
  if (downStreamBufferSize > 0) {
    codec.getConf().setInt("io.file.buffer.size", downStreamBufferSize);
  }
  CompressionInputStream cis =
      codec.createInputStream(downStream, decompressor);
  BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
  return bis2;
}
 
Example #17
Source File: RasterWritable.java    From mrgeo with Apache License 2.0 5 votes vote down vote up
public static MrGeoRaster toMrGeoRaster(RasterWritable writable,
    CompressionCodec codec, Decompressor decompressor) throws IOException
{
  decompressor.reset();
  ByteArrayInputStream bis = new ByteArrayInputStream(writable.bytes, 0, writable.getSize());
  CompressionInputStream gis = codec.createInputStream(bis, decompressor);
  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  IOUtils.copyBytes(gis, baos, 1024 * 1024 * 2, true);

  return toMrGeoRaster(new RasterWritable(baos.toByteArray()));
}
 
Example #18
Source File: SnappyFramedCodec.java    From kangaroo with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 */
@Override
public CompressionInputStream createInputStream(final InputStream in, final Decompressor decompressor)
        throws IOException {
    LOG.debug("Creating decompressor stream");
    return new IStreamDelegatingDecompressorStream(new SnappyFramedInputStream(in));
}
 
Example #19
Source File: Compression.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized InputStream createDecompressionStream(
    InputStream downStream, Decompressor decompressor,
    int downStreamBufferSize) throws IOException {
  // Set the internal buffer size to read from down stream.
  if (downStreamBufferSize > 0) {
    codec.getConf().setInt("io.file.buffer.size", downStreamBufferSize);
  }
  CompressionInputStream cis =
      codec.createInputStream(downStream, decompressor);
  BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
  return bis2;
}
 
Example #20
Source File: TextInput.java    From Bats with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new instance with the mandatory characters for handling newlines
 * transparently. lineSeparator the sequence of characters that represent a
 * newline, as defined in {@link Format#getLineSeparator()}
 * normalizedLineSeparator the normalized newline character (as defined in
 * {@link Format#getNormalizedNewline()}) that is used to replace any
 * lineSeparator sequence found in the input.
 */
public TextInput(TextParsingSettings settings, InputStream input, DrillBuf readBuffer, long startPos, long endPos) {
  this.lineSeparator = settings.getNewLineDelimiter();
  byte normalizedLineSeparator = settings.getNormalizedNewLine();
  Preconditions.checkArgument(input instanceof Seekable, "Text input only supports an InputStream that supports Seekable.");
  boolean isCompressed = input instanceof CompressionInputStream;
  Preconditions.checkArgument(!isCompressed || startPos == 0, "Cannot use split on compressed stream.");

  // splits aren't allowed with compressed data.  The split length will be the compressed size which means we'll normally end prematurely.
  if (isCompressed && endPos > 0) {
    endPos = Long.MAX_VALUE;
  }

  this.input = input;
  this.seekable = (Seekable) input;
  this.settings = settings;

  if (input instanceof FSDataInputStream) {
    this.inputFS = (FSDataInputStream) input;
    this.bufferReadable = inputFS.getWrappedStream() instanceof ByteBufferReadable;
  } else {
    this.inputFS = null;
    this.bufferReadable = false;
  }

  this.startPos = startPos;
  this.endPos = endPos;

  this.normalizedLineSeparator = normalizedLineSeparator;

  this.buffer = readBuffer;
  this.bStart = buffer.memoryAddress();
  this.bStartMinus1 = bStart -1;
  this.underlyingBuffer = buffer.nioBuffer(0, buffer.capacity());
}
 
Example #21
Source File: HadoopDecompressor.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public void decompress(Slice compressed, Slice uncompressed)
        throws RcFileCorruptionException
{
    checkState(!destroyed, "Codec has been destroyed");
    decompressor.reset();
    try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput(), decompressor)) {
        uncompressed.setBytes(0, decompressorStream, uncompressed.length());
    }
    catch (IndexOutOfBoundsException | IOException e) {
        throw new RcFileCorruptionException(e, "Compressed stream is truncated");
    }
}
 
Example #22
Source File: AircompressorDecompressor.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public void decompress(Slice compressed, Slice uncompressed)
        throws RcFileCorruptionException
{
    try (CompressionInputStream decompressorStream = codec.createInputStream(compressed.getInput())) {
        uncompressed.setBytes(0, decompressorStream, uncompressed.length());
    }
    catch (IndexOutOfBoundsException | IOException e) {
        throw new RcFileCorruptionException(e, "Compressed stream is truncated");
    }
}
 
Example #23
Source File: Compression.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized InputStream createDecompressionStream(
    InputStream downStream, Decompressor decompressor,
    int downStreamBufferSize) throws IOException {
  // Set the internal buffer size to read from down stream.
  if (downStreamBufferSize > 0) {
    codec.getConf().setInt("io.file.buffer.size", downStreamBufferSize);
  }
  CompressionInputStream cis =
      codec.createInputStream(downStream, decompressor);
  BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
  return bis2;
}
 
Example #24
Source File: Compression.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized InputStream createDecompressionStream(
    InputStream downStream, Decompressor decompressor,
    int downStreamBufferSize) throws IOException {
  // Set the internal buffer size to read from down stream.
  if (downStreamBufferSize > 0) {
    codec.getConf().setInt("io.file.buffer.size", downStreamBufferSize);
  }
  CompressionInputStream cis =
      codec.createInputStream(downStream, decompressor);
  BufferedInputStream bis2 = new BufferedInputStream(cis, DATA_IBUF_SIZE);
  return bis2;
}
 
Example #25
Source File: TestSnappyCodec.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Test
public void TestSnappyStream() throws IOException {
  SnappyCodec codec = new SnappyCodec();
  codec.setConf(new Configuration());
  
  int blockSize = 1024;
  int inputSize = blockSize * 1024;
 
  byte[] input = new byte[inputSize];
  for (int i = 0; i < inputSize; ++i) {
    input[i] = (byte)i;
  }

  ByteArrayOutputStream compressedStream = new ByteArrayOutputStream();
  
  CompressionOutputStream compressor = codec.createOutputStream(compressedStream);
  int bytesCompressed = 0;
  while (bytesCompressed < inputSize) {
    int len = Math.min(inputSize - bytesCompressed, blockSize);
    compressor.write(input, bytesCompressed, len);
    bytesCompressed += len;
  }
  compressor.finish();
  
  byte[] rawCompressed = Snappy.compress(input);
  byte[] codecCompressed = compressedStream.toByteArray();
  
  // Validate that the result from the codec is the same as if we compressed the 
  // buffer directly.
  assertArrayEquals(rawCompressed, codecCompressed);

  ByteArrayInputStream inputStream = new ByteArrayInputStream(codecCompressed);    
  CompressionInputStream decompressor = codec.createInputStream(inputStream);
  byte[] codecDecompressed = new byte[inputSize];
  int bytesDecompressed = 0;
  int numBytes;
  while ((numBytes = decompressor.read(codecDecompressed, bytesDecompressed, blockSize)) != 0) {
    bytesDecompressed += numBytes;
    if (bytesDecompressed == inputSize) break;
  }
  
  byte[] rawDecompressed = Snappy.uncompress(rawCompressed);
  
  assertArrayEquals(input, rawDecompressed);
  assertArrayEquals(input, codecDecompressed);
}
 
Example #26
Source File: TestShuffleUtils.java    From tez with Apache License 2.0 4 votes vote down vote up
@Override
public CompressionInputStream createInputStream(InputStream arg0, Decompressor arg1)
    throws IOException {
  return null;
}
 
Example #27
Source File: TestShuffleUtils.java    From tez with Apache License 2.0 4 votes vote down vote up
@Override
public CompressionInputStream createInputStream(InputStream arg0) throws IOException {
  return null;
}
 
Example #28
Source File: NotSoNiceCodec.java    From pxf with Apache License 2.0 4 votes vote down vote up
@Override
public CompressionInputStream createInputStream(InputStream in)
        throws IOException {
    return null;
}
 
Example #29
Source File: NotSoNiceCodec.java    From pxf with Apache License 2.0 4 votes vote down vote up
@Override
public CompressionInputStream createInputStream(InputStream in,
                                                Decompressor decompressor) throws IOException {
    return null;
}
 
Example #30
Source File: NoneCodec.java    From sylph with Apache License 2.0 4 votes vote down vote up
@Override
public CompressionInputStream createInputStream(InputStream in)
        throws IOException
{
    return null;
}