Java Code Examples for org.apache.hadoop.io.compress.CodecPool#getCompressor()

The following examples show how to use org.apache.hadoop.io.compress.CodecPool#getCompressor() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Compression.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        LOG.debug("Got a compressor: " + compressor.hashCode());
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 2
Source File: Anonymizer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example 3
Source File: Compression.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a compressor: " + compressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 4
Source File: IFile.java    From tez with Apache License 2.0 6 votes vote down vote up
void setupOutputStream(CompressionCodec codec) throws IOException {
  this.checksumOut = new IFileOutputStream(this.rawOut);
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
}
 
Example 5
Source File: Anonymizer.java    From big-c with Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example 6
Source File: CellBlockBuilder.java    From hbase with Apache License 2.0 6 votes vote down vote up
private void encodeCellsTo(OutputStream os, CellScanner cellScanner, Codec codec,
    CompressionCodec compressor) throws IOException {
  Compressor poolCompressor = null;
  try {
    if (compressor != null) {
      if (compressor instanceof Configurable) {
        ((Configurable) compressor).setConf(this.conf);
      }
      poolCompressor = CodecPool.getCompressor(compressor);
      os = compressor.createOutputStream(os, poolCompressor);
    }
    Codec.Encoder encoder = codec.getEncoder(os);
    while (cellScanner.advance()) {
      encoder.write(cellScanner.current());
    }
    encoder.flush();
  } catch (BufferOverflowException | IndexOutOfBoundsException e) {
    throw new DoNotRetryIOException(e);
  } finally {
    os.close();
    if (poolCompressor != null) {
      CodecPool.returnCompressor(poolCompressor);
    }
  }
}
 
Example 7
Source File: Compression.java    From big-c with Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a compressor: " + compressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example 8
Source File: IFile.java    From hadoop with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter,
    boolean ownOutputStream)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory = 
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
  this.ownOutputStream = ownOutputStream;
}
 
Example 9
Source File: SequenceFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Path name, Configuration conf, FSDataOutputStream out,
          Class keyClass, Class valClass,
          boolean compress, CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.compress = compress;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    this.compressedValSerializer.open(deflateOut);
  }
}
 
Example 10
Source File: IFile.java    From RDFS with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    this.compressor.reset();
    this.compressedOut = codec.createOutputStream(checksumOut, compressor);
    this.out = new FSDataOutputStream(this.compressedOut,  null);
    this.compressOutput = true;
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.valueSerializer = serializationFactory.getSerializer(valueClass);
  this.valueSerializer.open(buffer);
}
 
Example 11
Source File: CodecFactory.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
HeapBytesCompressor(CompressionCodecName codecName) {
  this.codecName = codecName;
  this.codec = getCodec(codecName);
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    this.compressedOutBuffer = new ByteArrayOutputStream(pageSize);
  } else {
    this.compressor = null;
    this.compressedOutBuffer = null;
  }
}
 
Example 12
Source File: IFile.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    this.compressor.reset();
    this.compressedOut = codec.createOutputStream(checksumOut, compressor);
    this.out = new FSDataOutputStream(this.compressedOut,  null);
    this.compressOutput = true;
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.valueSerializer = serializationFactory.getSerializer(valueClass);
  this.valueSerializer.open(buffer);
}
 
Example 13
Source File: IFile.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream outputStream,
    Class keyClass, Class valueClass,
    CompressionCodec codec, TezCounter writesCounter, TezCounter serializedBytesCounter,
    boolean rle) throws IOException {
  this.rawOut = outputStream;
  this.writtenRecordsCounter = writesCounter;
  this.serializedUncompressedBytes = serializedBytesCounter;
  this.checksumOut = new IFileOutputStream(outputStream);
  this.start = this.rawOut.getPos();
  this.rle = rle;
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  writeHeader(outputStream);
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory =
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
}
 
Example 14
Source File: IFile.java    From big-c with Apache License 2.0 5 votes vote down vote up
public Writer(Configuration conf, FSDataOutputStream out, 
    Class<K> keyClass, Class<V> valueClass,
    CompressionCodec codec, Counters.Counter writesCounter,
    boolean ownOutputStream)
    throws IOException {
  this.writtenRecordsCounter = writesCounter;
  this.checksumOut = new IFileOutputStream(out);
  this.rawOut = out;
  this.start = this.rawOut.getPos();
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
  
  this.keyClass = keyClass;
  this.valueClass = valueClass;

  if (keyClass != null) {
    SerializationFactory serializationFactory = 
      new SerializationFactory(conf);
    this.keySerializer = serializationFactory.getSerializer(keyClass);
    this.keySerializer.open(buffer);
    this.valueSerializer = serializationFactory.getSerializer(valueClass);
    this.valueSerializer.open(buffer);
  }
  this.ownOutputStream = ownOutputStream;
}
 
Example 15
Source File: SequenceFile.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Path name, Configuration conf, FSDataOutputStream out,
          Class keyClass, Class valClass,
          boolean compress, CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.compress = compress;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    this.compressedValSerializer.open(deflateOut);
  }
}
 
Example 16
Source File: DefaultOutputter.java    From hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public void init(Path path, Configuration conf) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(fs.create(path), compressor);
  } else {
    output = fs.create(path);
  }
  writer = new JsonObjectMapperWriter<T>(output, 
      conf.getBoolean("rumen.output.pretty.print", true));
}
 
Example 17
Source File: SequenceFile.java    From big-c with Apache License 2.0 4 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Configuration conf, FSDataOutputStream out, boolean ownStream,
          Class keyClass, Class valClass,
          CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.ownOutputStream = ownStream;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  if (this.keySerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Key class: '"
            + keyClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  if (this.uncompressedValSerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Value class: '"
            + valClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    if (this.compressedValSerializer == null) {
      throw new IOException(
          "Could not find a serializer for the Value class: '"
              + valClass.getCanonicalName() + "'. "
              + "Please ensure that the configuration '" +
              CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
              + "properly configured, if you're using"
              + "custom serialization.");
    }
    this.compressedValSerializer.open(deflateOut);
  }
  writeFileHeader();
}
 
Example 18
Source File: DBScanJobRunner.java    From geowave with Apache License 2.0 4 votes vote down vote up
@Override
public void configure(final Job job) throws Exception {
  super.configure(job);
  job.setMapperClass(NNMapReduce.NNMapper.class);
  job.setReducerClass(DBScanMapReduce.DBScanMapHullReducer.class);
  job.setMapOutputKeyClass(PartitionDataWritable.class);
  job.setMapOutputValueClass(AdapterWithObjectWritable.class);
  job.setOutputKeyClass(GeoWaveInputKey.class);
  job.setOutputValueClass(ObjectWritable.class);
  job.setSpeculativeExecution(false);
  final Configuration conf = job.getConfiguration();
  conf.set("mapreduce.map.java.opts", "-Xmx" + memInMB + "m");
  conf.set("mapreduce.reduce.java.opts", "-Xmx" + memInMB + "m");
  conf.setLong("mapred.task.timeout", 2000000);
  conf.setInt("mapreduce.task.io.sort.mb", 250);
  job.getConfiguration().setBoolean("mapreduce.reduce.speculative", false);

  Class<? extends CompressionCodec> bestCodecClass =
      org.apache.hadoop.io.compress.DefaultCodec.class;
  int rank = 0;
  for (final Class<? extends CompressionCodec> codecClass : CompressionCodecFactory.getCodecClasses(
      conf)) {
    int r = 1;
    for (final String codecs : CodecsRank) {
      if (codecClass.getName().contains(codecs)) {
        break;
      }
      r++;
    }
    if ((rank < r) && (r <= CodecsRank.length)) {
      try {
        final CompressionCodec codec = codecClass.newInstance();
        if (Configurable.class.isAssignableFrom(codecClass)) {
          ((Configurable) codec).setConf(conf);
        }
        // throws an exception if not configurable in this context
        CodecPool.getCompressor(codec);
        bestCodecClass = codecClass;
        rank = r;
      } catch (final Throwable ex) {
        // occurs when codec is not installed.
        LOGGER.info("Not configuable in this context", ex);
      }
    }
  }
  LOGGER.warn("Compression with " + bestCodecClass.toString());

  conf.setClass("mapreduce.map.output.compress.codec", bestCodecClass, CompressionCodec.class);
  conf.setBoolean("mapreduce.map.output.compress", true);
  conf.setBooleanIfUnset("first.iteration", firstIteration);
}
 
Example 19
Source File: SequenceFile.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/** Initialize. */
@SuppressWarnings("unchecked")
void init(Configuration conf, FSDataOutputStream out, boolean ownStream,
          Class keyClass, Class valClass,
          CompressionCodec codec, Metadata metadata) 
  throws IOException {
  this.conf = conf;
  this.out = out;
  this.ownOutputStream = ownStream;
  this.keyClass = keyClass;
  this.valClass = valClass;
  this.codec = codec;
  this.metadata = metadata;
  SerializationFactory serializationFactory = new SerializationFactory(conf);
  this.keySerializer = serializationFactory.getSerializer(keyClass);
  if (this.keySerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Key class: '"
            + keyClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.keySerializer.open(buffer);
  this.uncompressedValSerializer = serializationFactory.getSerializer(valClass);
  if (this.uncompressedValSerializer == null) {
    throw new IOException(
        "Could not find a serializer for the Value class: '"
            + valClass.getCanonicalName() + "'. "
            + "Please ensure that the configuration '" +
            CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
            + "properly configured, if you're using"
            + "custom serialization.");
  }
  this.uncompressedValSerializer.open(buffer);
  if (this.codec != null) {
    ReflectionUtils.setConf(this.codec, this.conf);
    this.compressor = CodecPool.getCompressor(this.codec);
    this.deflateFilter = this.codec.createOutputStream(buffer, compressor);
    this.deflateOut = 
      new DataOutputStream(new BufferedOutputStream(deflateFilter));
    this.compressedValSerializer = serializationFactory.getSerializer(valClass);
    if (this.compressedValSerializer == null) {
      throw new IOException(
          "Could not find a serializer for the Value class: '"
              + valClass.getCanonicalName() + "'. "
              + "Please ensure that the configuration '" +
              CommonConfigurationKeys.IO_SERIALIZATIONS_KEY + "' is "
              + "properly configured, if you're using"
              + "custom serialization.");
    }
    this.compressedValSerializer.open(deflateOut);
  }
  writeFileHeader();
}
 
Example 20
Source File: HadoopCompressor.java    From presto with Apache License 2.0 4 votes vote down vote up
public HadoopCompressedSliceOutputSupplier(CompressionCodec codec, int minChunkSize, int maxChunkSize)
{
    this.codec = requireNonNull(codec, "codec is null");
    this.compressor = CodecPool.getCompressor(requireNonNull(codec, "codec is null"));
    this.bufferedOutput = new ChunkedSliceOutput(minChunkSize, maxChunkSize);
}