Java Code Examples for org.apache.hadoop.io.compress.CodecPool#getCompressor()

The following examples show how to use org.apache.hadoop.io.compress.CodecPool#getCompressor() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hadoop   File: Anonymizer.java    License: Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example 2
Source Project: hadoop   File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a compressor: " + compressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 3
Source Project: tez   File: IFile.java    License: Apache License 2.0 6 votes vote down vote up
void setupOutputStream(CompressionCodec codec) throws IOException {
  this.checksumOut = new IFileOutputStream(this.rawOut);
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
}
 
Example 4
Source Project: big-c   File: Anonymizer.java    License: Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example 5
Source Project: big-c   File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a compressor: " + compressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 6
Source Project: hbase   File: CellBlockBuilder.java    License: Apache License 2.0 6 votes vote down vote up
private void encodeCellsTo(OutputStream os, CellScanner cellScanner, Codec codec,
    CompressionCodec compressor) throws IOException {
  Compressor poolCompressor = null;
  try {
    if (compressor != null) {
      if (compressor instanceof Configurable) {
        ((Configurable) compressor).setConf(this.conf);
      }
      poolCompressor = CodecPool.getCompressor(compressor);
      os = compressor.createOutputStream(os, poolCompressor);
    }
    Codec.Encoder encoder = codec.getEncoder(os);
    while (cellScanner.advance()) {
      encoder.write(cellScanner.current());
    }
    encoder.flush();
  } catch (BufferOverflowException | IndexOutOfBoundsException e) {
    throw new DoNotRetryIOException(e);
  } finally {
    os.close();
    if (poolCompressor != null) {
      CodecPool.returnCompressor(poolCompressor);
    }
  }
}
 
Example 7
Source Project: RDFS   File: Compression.java    License: Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        LOG.debug("Got a compressor: " + compressor.hashCode());
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 8
Source Project: hadoop   File: IFile.java    License: Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter,
    boolean ownOutputStream)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory = 
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
  this.ownOutputStream = ownOutputStream;
}
 
Example 9
Source Project: hadoop   File: DefaultOutputter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void init(Path path, Configuration conf) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(fs.create(path), compressor);
  } else {
    output = fs.create(path);
  }
  writer = new JsonObjectMapperWriter<T>(output, 
      conf.getBoolean("rumen.output.pretty.print", true));
}
 
Example 10
Source Project: hadoop-gpu   File: SequenceFile.java    License: Apache License 2.0 5 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Path name, Configuration conf, FSDataOutputStream out,
          Class keyClass, Class valClass,
          boolean compress, CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.compress = compress;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    this.compressedValSerializer.open(deflateOut);
  }
}
 
Example 11
Source Project: big-c   File: IFile.java    License: Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter,
    boolean ownOutputStream)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory = 
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
  this.ownOutputStream = ownOutputStream;
}
 
Example 12
Source Project: incubator-tez   File: IFile.java    License: Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream outputStream,
    Class keyClass, Class valueClass,
    CompressionCodec codec, TezCounter writesCounter, TezCounter serializedBytesCounter,
    boolean rle) throws IOException {
  this.rawOut = outputStream;
  this.writtenRecordsCounter = writesCounter;
  this.serializedUncompressedBytes = serializedBytesCounter;
  this.checksumOut = new IFileOutputStream(outputStream);
  this.start = this.rawOut.getPos();
  this.rle = rle;
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  writeHeader(outputStream);
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory =
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
}
 
Example 13
Source Project: hadoop-gpu   File: IFile.java    License: Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    this.compressor.reset();
    this.compressedOut = codec.createOutputStream(checksumOut, compressor);
    this.out = new FSDataOutputStream(this.compressedOut,  null);
    this.compressOutput = true;
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.valueSerializer = serializationFactory.getSerializer(valueClass);
  this.valueSerializer.open(buffer);
}
 
Example 14
Source Project: parquet-mr   File: CodecFactory.java    License: Apache License 2.0 5 votes vote down vote up
HeapBytesCompressor(CompressionCodecName codecName) {
  this.codecName = codecName;
  this.codec = getCodec(codecName);
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    this.compressedOutBuffer = new ByteArrayOutputStream(pageSize);
  } else {
    this.compressor = null;
    this.compressedOutBuffer = null;
  }
}
 
Example 15
Source Project: RDFS   File: IFile.java    License: Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    this.compressor.reset();
    this.compressedOut = codec.createOutputStream(checksumOut, compressor);
    this.out = new FSDataOutputStream(this.compressedOut,  null);
    this.compressOutput = true;
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.valueSerializer = serializationFactory.getSerializer(valueClass);
  this.valueSerializer.open(buffer);
}
 
Example 16
Source Project: RDFS   File: SequenceFile.java    License: Apache License 2.0 5 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Path name, Configuration conf, FSDataOutputStream out,
          Class keyClass, Class valClass,
          boolean compress, CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.compress = compress;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    this.compressedValSerializer.open(deflateOut);
  }
}
 
Example 17
Source Project: presto   File: HadoopCompressor.java    License: Apache License 2.0 4 votes vote down vote up
public HadoopCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize)
{
    this.codec = requireNonNull(codec, "codec is null");
    this.compressor = CodecPool.getCompressor(requireNonNull(codec, "codec is null"));
    this.bufferedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize);
}
 
Example 18
Source Project: hadoop   File: SequenceFile.java    License: Apache License 2.0 4 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Configuration conf, FSDataOutputStream out, boolean ownStream,
          Class keyClass, Class valClass,
          CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.ownOutputStream = ownStream;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  if (this.keySerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Key class: '"
            + keyClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  if (this.uncompressedValSerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Value class: '"
            + valClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    if (this.compressedValSerializer == null) {
      throw new IOException(
          "Could not find a serializer for the Value class: '"
              + valClass.getCanonicalName() + "'. "
              + "Please ensure that the configuration '" +
              CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
              + "properly configured, if you're using"
              + "custom serialization.");
    }
    this.compressedValSerializer.open(deflateOut);
  }
  writeFileHeader();
}
 
Example 19
Source Project: big-c   File: SequenceFile.java    License: Apache License 2.0 4 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Configuration conf, FSDataOutputStream out, boolean ownStream,
          Class keyClass, Class valClass,
          CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.ownOutputStream = ownStream;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  if (this.keySerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Key class: '"
            + keyClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  if (this.uncompressedValSerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Value class: '"
            + valClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    if (this.compressedValSerializer == null) {
      throw new IOException(
          "Could not find a serializer for the Value class: '"
              + valClass.getCanonicalName() + "'. "
              + "Please ensure that the configuration '" +
              CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
              + "properly configured, if you're using"
              + "custom serialization.");
    }
    this.compressedValSerializer.open(deflateOut);
  }
  writeFileHeader();
}
 
Example 20
Source Project: geowave   File: DBScanJobRunner.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void configure(final Job job) throws Exception {
  super.configure(job);
  job.setMapperClass(NNMapReduce.NNMapper.class);
  job.setReducerClass(DBScanMapReduce.DBScanMapHullReducer.class);
  job.setMapOutputKeyClass(PartitionDataWritable.class);
  job.setMapOutputValueClass(AdapterWithObjectWritable.class);
  job.setOutputKeyClass(GeoWaveInputKey.class);
  job.setOutputValueClass(ObjectWritable.class);
  job.setSpeculativeExecution(false);
  final Configuration conf = job.getConfiguration();
  conf.set("mapreduce.map.java.opts", "-Xmx" + memInMB + "m");
  conf.set("mapreduce.reduce.java.opts", "-Xmx" + memInMB + "m");
  conf.setLong("mapred.task.timeout", 2000000);
  conf.setInt("mapreduce.task.io.sort.mb", 250);
  job.getConfiguration().setBoolean("mapreduce.reduce.speculative", false);

  Class<? extends CompressionCodec> bestCodecClass =
      org.apache.hadoop.io.compress.DefaultCodec.class;
  int rank = 0;
  for (final Class<? extends CompressionCodec> codecClass : CompressionCodecFactory.getCodecClasses(
      conf)) {
    int r = 1;
    for (final String codecs : CodecsRank) {
      if (codecClass.getName().contains(codecs)) {
        break;
      }
      r++;
    }
    if ((rank < r) && (r <= CodecsRank.length)) {
      try {
        final CompressionCodec codec = codecClass.newInstance();
        if (Configurable.class.isAssignableFrom(codecClass)) {
          ((Configurable) codec).setConf(conf);
        }
        // throws an exception if not configurable in this context
        CodecPool.getCompressor(codec);
        bestCodecClass = codecClass;
        rank = r;
      } catch (final Throwable ex) {
        // occurs when codec is not installed.
        LOGGER.info("Not configuable in this context", ex);
      }
    }
  }
  LOGGER.warn("Compression with " + bestCodecClass.toString());

  conf.setClass("mapreduce.map.output.compress.codec", bestCodecClass, CompressionCodec.class);
  conf.setBoolean("mapreduce.map.output.compress", true);
  conf.setBooleanIfUnset("first.iteration", firstIteration);
}