org.apache.hadoop.io.compress.Compressor Java Examples

The following examples show how to use org.apache.hadoop.io.compress.Compressor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Compression.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  codec.getConf().setInt("io.file.buffer.size", 32 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example #2
Source File: TestZlibCompressorDecompressor.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Test
public void testZlibCompressorDecompressorSetDictionary() {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (ZlibFactory.isNativeZlibLoaded(conf)) {
    Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);

    checkSetDictionaryNullPointerException(zlibCompressor);
    checkSetDictionaryNullPointerException(zlibDecompressor);

    checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor);
    checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor);
  } else {
    assertTrue("ZlibFactory is using native libs against request",
        ZlibFactory.isNativeZlibLoaded(conf));
  }
}
 
Example #3
Source File: Anonymizer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example #4
Source File: Compression.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (!isSupported()) {
    throw new IOException(
        "LZO codec class not specified. Did you forget to set property "
            + CONF_LZO_CLASS + "?");
  }
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example #5
Source File: Compression.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  codec.getConf().setInt("io.file.buffer.size", 32 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example #6
Source File: Compression.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        LOG.debug("Got a compressor: " + compressor.hashCode());
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example #7
Source File: Compression.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  codec.getConf().setInt("io.file.buffer.size", 32 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example #8
Source File: Compression.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a compressor: " + compressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example #9
Source File: TestZlibCompressorDecompressor.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testZlibCompressorDecompressorWithConfiguration() {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (ZlibFactory.isNativeZlibLoaded(conf)) {
    byte[] rawData;
    int tryNumber = 5;
    int BYTE_SIZE = 10 * 1024;
    Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
    rawData = generate(BYTE_SIZE);
    try {
      for (int i = 0; i < tryNumber; i++)
        compressDecompressZlib(rawData, (ZlibCompressor) zlibCompressor,
            (ZlibDecompressor) zlibDecompressor);
      zlibCompressor.reinit(conf);
    } catch (Exception ex) {
      fail("testZlibCompressorDecompressorWithConfiguration ex error " + ex);
    }
  } else {
    assertTrue("ZlibFactory is using native libs against request",
        ZlibFactory.isNativeZlibLoaded(conf));
  }
}
 
Example #10
Source File: TestZlibCompressorDecompressor.java    From hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void testZlibCompressorDecompressorSetDictionary() {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (ZlibFactory.isNativeZlibLoaded(conf)) {
    Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);

    checkSetDictionaryNullPointerException(zlibCompressor);
    checkSetDictionaryNullPointerException(zlibDecompressor);

    checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor);
    checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor);
  } else {
    assertTrue("ZlibFactory is using native libs against request",
        ZlibFactory.isNativeZlibLoaded(conf));
  }
}
 
Example #11
Source File: Compression.java    From hbase with Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() {
  CompressionCodec codec = getCodec(conf);
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (LOG.isTraceEnabled()) LOG.trace("Retrieved compressor " + compressor + " from pool.");
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using it.
        LOG.warn("Compressor obtained from CodecPool is already finished()");
      }
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example #12
Source File: Compression.java    From hbase with Apache License 2.0 6 votes vote down vote up
public OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor, int downStreamBufferSize)
    throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  }
  else {
    bos1 = downStream;
  }
  CompressionOutputStream cos =
      createPlainCompressionStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example #13
Source File: Anonymizer.java    From big-c with Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example #14
Source File: Compression.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (!isSupported()) {
    throw new IOException(
        "LZMA codec cannot be loaded. " +
        "You may want to check LD_LIBRARY_PATH.");
  }
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  conf.setInt("io.compression.codec.lzma.buffersize", 64 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example #15
Source File: Compression.java    From RDFS with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  codec.getConf().setInt("io.file.buffer.size", 32 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example #16
Source File: Compression.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (!isSupported()) {
    throw new IOException(
        "LZO codec class not specified. Did you forget to set property "
            + CONF_LZO_CLASS + "?");
  }
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example #17
Source File: Compression.java    From big-c with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (!isSupported()) {
    throw new IOException(
        "LZO codec class not specified. Did you forget to set property "
            + CONF_LZO_CLASS + "?");
  }
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example #18
Source File: Compression.java    From RDFS with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example #19
Source File: CompressorStream.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public CompressorStream(OutputStream out, Compressor compressor, int bufferSize) {
  super(out);

  if (out == null || compressor == null) {
    throw new NullPointerException();
  } else if (bufferSize <= 0) {
    throw new IllegalArgumentException("Illegal bufferSize");
  }

  this.compressor = compressor;
  buffer = new byte[bufferSize];
}
 
Example #20
Source File: CodecPool.java    From tajo with Apache License 2.0 5 votes vote down vote up
/**
 * Return the {@link Compressor} to the pool.
 *
 * @param compressor
 *          the <code>Compressor</code> to be returned to the pool
 */
public static void returnCompressor(Compressor compressor) {
  if (compressor == null) {
    return;
  }
  // if the compressor can't be reused, don't pool it.
  if (compressor.getClass().isAnnotationPresent(DoNotPool.class)) {
    return;
  }
  compressor.reset();
  payback(COMPRESSOR_POOL, compressor);
}
 
Example #21
Source File: Compression.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example #22
Source File: Compression.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example #23
Source File: CodecPool.java    From incubator-tajo with Apache License 2.0 5 votes vote down vote up
/**
 * Get a {@link Compressor} for the given {@link CompressionCodec} from the
 * pool or a new one.
 *
 * @param codec
 *          the <code>CompressionCodec</code> for which to get the
 *          <code>Compressor</code>
 * @param conf the <code>Configuration</code> object which contains confs for creating or reinit the compressor
 * @return <code>Compressor</code> for the given <code>CompressionCodec</code>
 *         from the pool or a new one
 */
public static Compressor getCompressor(CompressionCodec codec, Configuration conf) {
  Compressor compressor = borrow(COMPRESSOR_POOL, codec.getCompressorType());
  if (compressor == null) {
    compressor = codec.createCompressor();
    LOG.info("Got brand-new compressor ["+codec.getDefaultExtension()+"]");
  } else {
    compressor.reinit(conf);
    if(LOG.isDebugEnabled()) {
      LOG.debug("Got recycled compressor");
    }
  }
  return compressor;
}
 
Example #24
Source File: CodecPool.java    From incubator-tajo with Apache License 2.0 5 votes vote down vote up
/**
 * Return the {@link Compressor} to the pool.
 *
 * @param compressor
 *          the <code>Compressor</code> to be returned to the pool
 */
public static void returnCompressor(Compressor compressor) {
  if (compressor == null) {
    return;
  }
  // if the compressor can't be reused, don't pool it.
  if (compressor.getClass().isAnnotationPresent(DoNotPool.class)) {
    return;
  }
  compressor.reset();
  payback(COMPRESSOR_POOL, compressor);
}
 
Example #25
Source File: Compression.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a compression stream without any additional wrapping into
 * buffering streams.
 */
public CompressionOutputStream createPlainCompressionStream(
    OutputStream downStream, Compressor compressor) throws IOException {
  CompressionCodec codec = getCodec(conf);
  ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
  return codec.createOutputStream(downStream, compressor);
}
 
Example #26
Source File: CodecPool.java    From tajo with Apache License 2.0 5 votes vote down vote up
/**
 * Get a {@link Compressor} for the given {@link CompressionCodec} from the
 * pool or a new one.
 *
 * @param codec
 *          the <code>CompressionCodec</code> for which to get the
 *          <code>Compressor</code>
 * @param conf the <code>Configuration</code> object which contains confs for creating or reinit the compressor
 * @return <code>Compressor</code> for the given <code>CompressionCodec</code>
 *         from the pool or a new one
 */
public static Compressor getCompressor(CompressionCodec codec, Configuration conf) {
  Compressor compressor = borrow(COMPRESSOR_POOL, codec.getCompressorType());
  if (compressor == null) {
    compressor = codec.createCompressor();
    LOG.info("Got brand-new compressor ["+codec.getDefaultExtension()+"]");
  } else {
    compressor.reinit(conf);
    if(LOG.isDebugEnabled()) {
      LOG.debug("Got recycled compressor");
    }
  }
  return compressor;
}
 
Example #27
Source File: Compression.java    From hbase with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example #28
Source File: Compression.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example #29
Source File: CompressorStream.java    From big-c with Apache License 2.0 5 votes vote down vote up
public CompressorStream(OutputStream out, Compressor compressor, int bufferSize) {
  super(out);

  if (out == null || compressor == null) {
    throw new NullPointerException();
  } else if (bufferSize <= 0) {
    throw new IllegalArgumentException("Illegal bufferSize");
  }

  this.compressor = compressor;
  buffer = new byte[bufferSize];
}
 
Example #30
Source File: EncodedDataBlock.java    From hbase with Apache License 2.0 5 votes vote down vote up
/**
 * Find the size of compressed data assuming that buffer will be compressed
 * using given algorithm.
 * @param algo compression algorithm
 * @param compressor compressor already requested from codec
 * @param inputBuffer Array to be compressed.
 * @param offset Offset to beginning of the data.
 * @param length Length to be compressed.
 * @return Size of compressed data in bytes.
 * @throws IOException
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH_EXCEPTION",
     justification="No sure what findbugs wants but looks to me like no NPE")
public static int getCompressedSize(Algorithm algo, Compressor compressor,
    byte[] inputBuffer, int offset, int length) throws IOException {

  // Create streams
  // Storing them so we can close them
  final IOUtils.NullOutputStream nullOutputStream = new IOUtils.NullOutputStream();
  final DataOutputStream compressedStream = new DataOutputStream(nullOutputStream);
  OutputStream compressingStream = null;


  try {
    if (compressor != null) {
      compressor.reset();
    }

    compressingStream = algo.createCompressionStream(compressedStream, compressor, 0);

    compressingStream.write(inputBuffer, offset, length);
    compressingStream.flush();

    return compressedStream.size();
  } finally {
    nullOutputStream.close();
    compressedStream.close();
    if (compressingStream != null) {
      compressingStream.close();
    }
  }
}