Java Code Examples for org.apache.hadoop.io.compress.Compressor

The following examples show how to use org.apache.hadoop.io.compress.Compressor. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   Source File: Anonymizer.java    License: Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example 2
Source Project: hadoop-gpu   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  codec.getConf().setInt("io.file.buffer.size", 32 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example 3
Source Project: hadoop   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  codec.getConf().setInt("io.file.buffer.size", 32 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example 4
Source Project: hadoop   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a compressor: " + compressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 5
Source Project: hadoop   Source File: TestZlibCompressorDecompressor.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZlibCompressorDecompressorWithConfiguration() {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (ZlibFactory.isNativeZlibLoaded(conf)) {
    byte[] rawData;
    int tryNumber = 5;
    int BYTE_SIZE = 10 * 1024;
    Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);
    rawData = generate(BYTE_SIZE);
    try {
      for (int i = 0; i < tryNumber; i++)
        compressDecompressZlib(rawData, (ZlibCompressor) zlibCompressor,
            (ZlibDecompressor) zlibDecompressor);
      zlibCompressor.reinit(conf);
    } catch (Exception ex) {
      fail("testZlibCompressorDecompressorWithConfiguration ex error " + ex);
    }
  } else {
    assertTrue("ZlibFactory is using native libs against request",
        ZlibFactory.isNativeZlibLoaded(conf));
  }
}
 
Example 6
Source Project: hadoop   Source File: TestZlibCompressorDecompressor.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZlibCompressorDecompressorSetDictionary() {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (ZlibFactory.isNativeZlibLoaded(conf)) {
    Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);

    checkSetDictionaryNullPointerException(zlibCompressor);
    checkSetDictionaryNullPointerException(zlibDecompressor);

    checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor);
    checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor);
  } else {
    assertTrue("ZlibFactory is using native libs against request",
        ZlibFactory.isNativeZlibLoaded(conf));
  }
}
 
Example 7
Source Project: big-c   Source File: Anonymizer.java    License: Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example 8
Source Project: RDFS   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (!isSupported()) {
    throw new IOException(
        "LZMA codec cannot be loaded. " +
        "You may want to check LD_LIBRARY_PATH.");
  }
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  conf.setInt("io.compression.codec.lzma.buffersize", 64 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example 9
Source Project: hadoop-gpu   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (!isSupported()) {
    throw new IOException(
        "LZO codec class not specified. Did you forget to set property "
            + CONF_LZO_CLASS + "?");
  }
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example 10
Source Project: big-c   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (!isSupported()) {
    throw new IOException(
        "LZO codec class not specified. Did you forget to set property "
            + CONF_LZO_CLASS + "?");
  }
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example 11
Source Project: big-c   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  codec.getConf().setInt("io.file.buffer.size", 32 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example 12
Source Project: big-c   Source File: TestZlibCompressorDecompressor.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testZlibCompressorDecompressorSetDictionary() {
  Configuration conf = new Configuration();
  conf.setBoolean(CommonConfigurationKeys.IO_NATIVE_LIB_AVAILABLE_KEY, true);
  if (ZlibFactory.isNativeZlibLoaded(conf)) {
    Compressor zlibCompressor = ZlibFactory.getZlibCompressor(conf);
    Decompressor zlibDecompressor = ZlibFactory.getZlibDecompressor(conf);

    checkSetDictionaryNullPointerException(zlibCompressor);
    checkSetDictionaryNullPointerException(zlibDecompressor);

    checkSetDictionaryArrayIndexOutOfBoundsException(zlibDecompressor);
    checkSetDictionaryArrayIndexOutOfBoundsException(zlibCompressor);
  } else {
    assertTrue("ZlibFactory is using native libs against request",
        ZlibFactory.isNativeZlibLoaded(conf));
  }
}
 
Example 13
Source Project: hadoop-gpu   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        LOG.debug("Got a compressor: " + compressor.hashCode());
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 14
Source Project: RDFS   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  codec.getConf().setInt("io.file.buffer.size", 32 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example 15
Source Project: hbase   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
public OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor, int downStreamBufferSize)
    throws IOException {
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  }
  else {
    bos1 = downStream;
  }
  CompressionOutputStream cos =
      createPlainCompressionStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example 16
Source Project: hbase   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() {
  CompressionCodec codec = getCodec(conf);
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (LOG.isTraceEnabled()) LOG.trace("Retrieved compressor " + compressor + " from pool.");
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using it.
        LOG.warn("Compressor obtained from CodecPool is already finished()");
      }
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 17
Source Project: RDFS   Source File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (!isSupported()) {
    throw new IOException(
        "LZO codec class not specified. Did you forget to set property "
            + CONF_LZO_CLASS + "?");
  }
  OutputStream bos1 = null;
  if (downStreamBufferSize > 0) {
    bos1 = new BufferedOutputStream(downStream, downStreamBufferSize);
  } else {
    bos1 = downStream;
  }
  conf.setInt("io.compression.codec.lzo.buffersize", 64 * 1024);
  CompressionOutputStream cos =
      codec.createOutputStream(bos1, compressor);
  BufferedOutputStream bos2 =
      new BufferedOutputStream(new FinishOnFlushCompressionStream(cos),
          DATA_OBUF_SIZE);
  return bos2;
}
 
Example 18
Source Project: RDFS   Source File: CompressorStream.java    License: Apache License 2.0 5 votes vote down vote up
public CompressorStream(OutputStream out, Compressor compressor, int bufferSize) {
  super(out);

  if (out == null || compressor == null) {
    throw new NullPointerException();
  } else if (bufferSize <= 0) {
    throw new IllegalArgumentException("Illegal bufferSize");
  }

  this.compressor = compressor;
  buffer = new byte[bufferSize];
}
 
Example 19
Source Project: hadoop   Source File: Compression.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example 20
Source Project: big-c   Source File: CompressorStream.java    License: Apache License 2.0 5 votes vote down vote up
public CompressorStream(OutputStream out, Compressor compressor, int bufferSize) {
  super(out);

  if (out == null || compressor == null) {
    throw new NullPointerException();
  } else if (bufferSize <= 0) {
    throw new IllegalArgumentException("Illegal bufferSize");
  }

  this.compressor = compressor;
  buffer = new byte[bufferSize];
}
 
Example 21
Source Project: hadoop-gpu   Source File: Compression.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example 22
Source Project: big-c   Source File: Compression.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example 23
Source Project: tajo   Source File: CodecPool.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Get a {@link Compressor} for the given {@link CompressionCodec} from the
 * pool or a new one.
 *
 * @param codec
 *          the <code>CompressionCodec</code> for which to get the
 *          <code>Compressor</code>
 * @param conf the <code>Configuration</code> object which contains confs for creating or reinit the compressor
 * @return <code>Compressor</code> for the given <code>CompressionCodec</code>
 *         from the pool or a new one
 */
public static Compressor getCompressor(CompressionCodec codec, Configuration conf) {
  Compressor compressor = borrow(COMPRESSOR_POOL, codec.getCompressorType());
  if (compressor == null) {
    compressor = codec.createCompressor();
    LOG.info("Got brand-new compressor ["+codec.getDefaultExtension()+"]");
  } else {
    compressor.reinit(conf);
    if(LOG.isDebugEnabled()) {
      LOG.debug("Got recycled compressor");
    }
  }
  return compressor;
}
 
Example 24
Source Project: tajo   Source File: CodecPool.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Return the {@link Compressor} to the pool.
 *
 * @param compressor
 *          the <code>Compressor</code> to be returned to the pool
 */
public static void returnCompressor(Compressor compressor) {
  if (compressor == null) {
    return;
  }
  // if the compressor can't be reused, don't pool it.
  if (compressor.getClass().isAnnotationPresent(DoNotPool.class)) {
    return;
  }
  compressor.reset();
  payback(COMPRESSOR_POOL, compressor);
}
 
Example 25
Source Project: RDFS   Source File: Compression.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example 26
Source Project: hbase   Source File: Compression.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public synchronized OutputStream createCompressionStream(
    OutputStream downStream, Compressor compressor,
    int downStreamBufferSize) throws IOException {
  if (downStreamBufferSize > 0) {
    return new BufferedOutputStream(downStream, downStreamBufferSize);
  }

  return downStream;
}
 
Example 27
Source Project: hbase   Source File: Compression.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a compression stream without any additional wrapping into
 * buffering streams.
 */
public CompressionOutputStream createPlainCompressionStream(
    OutputStream downStream, Compressor compressor) throws IOException {
  CompressionCodec codec = getCodec(conf);
  ((Configurable)codec).getConf().setInt("io.file.buffer.size", 32 * 1024);
  return codec.createOutputStream(downStream, compressor);
}
 
Example 28
Source Project: hbase   Source File: EncodedDataBlock.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Find the size of compressed data assuming that buffer will be compressed
 * using given algorithm.
 * @param algo compression algorithm
 * @param compressor compressor already requested from codec
 * @param inputBuffer Array to be compressed.
 * @param offset Offset to beginning of the data.
 * @param length Length to be compressed.
 * @return Size of compressed data in bytes.
 * @throws IOException
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH_EXCEPTION",
     justification="No sure what findbugs wants but looks to me like no NPE")
public static int getCompressedSize(Algorithm algo, Compressor compressor,
    byte[] inputBuffer, int offset, int length) throws IOException {

  // Create streams
  // Storing them so we can close them
  final IOUtils.NullOutputStream nullOutputStream = new IOUtils.NullOutputStream();
  final DataOutputStream compressedStream = new DataOutputStream(nullOutputStream);
  OutputStream compressingStream = null;


  try {
    if (compressor != null) {
      compressor.reset();
    }

    compressingStream = algo.createCompressionStream(compressedStream, compressor, 0);

    compressingStream.write(inputBuffer, offset, length);
    compressingStream.flush();

    return compressedStream.size();
  } finally {
    nullOutputStream.close();
    compressedStream.close();
    if (compressingStream != null) {
      compressingStream.close();
    }
  }
}
 
Example 29
Source Project: incubator-tajo   Source File: CodecPool.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Get a {@link Compressor} for the given {@link CompressionCodec} from the
 * pool or a new one.
 *
 * @param codec
 *          the <code>CompressionCodec</code> for which to get the
 *          <code>Compressor</code>
 * @param conf the <code>Configuration</code> object which contains confs for creating or reinit the compressor
 * @return <code>Compressor</code> for the given <code>CompressionCodec</code>
 *         from the pool or a new one
 */
public static Compressor getCompressor(CompressionCodec codec, Configuration conf) {
  Compressor compressor = borrow(COMPRESSOR_POOL, codec.getCompressorType());
  if (compressor == null) {
    compressor = codec.createCompressor();
    LOG.info("Got brand-new compressor ["+codec.getDefaultExtension()+"]");
  } else {
    compressor.reinit(conf);
    if(LOG.isDebugEnabled()) {
      LOG.debug("Got recycled compressor");
    }
  }
  return compressor;
}
 
Example 30
Source Project: incubator-tajo   Source File: CodecPool.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Return the {@link Compressor} to the pool.
 *
 * @param compressor
 *          the <code>Compressor</code> to be returned to the pool
 */
public static void returnCompressor(Compressor compressor) {
  if (compressor == null) {
    return;
  }
  // if the compressor can't be reused, don't pool it.
  if (compressor.getClass().isAnnotationPresent(DoNotPool.class)) {
    return;
  }
  compressor.reset();
  payback(COMPRESSOR_POOL, compressor);
}