org.apache.hadoop.io.compress.CodecPool Java Examples

The following examples show how to use org.apache.hadoop.io.compress.CodecPool. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
public void close() throws IOException {
  // Close the underlying stream
  in.close();
  
  // Release the buffer
  dataIn = null;
  buffer = null;
  if(readRecordsCounter != null) {
    readRecordsCounter.increment(numRecordsRead);
  }

  // Return the decompressor
  if (decompressor != null) {
    decompressor.reset();
    CodecPool.returnDecompressor(decompressor);
    decompressor = null;
  }
}
 
Example #2
Source File: AbstractSpreadSheetDocumentRecordReader.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized void  close() throws IOException {
try {
    if (officeReader!=null) {
	officeReader.close();
     }
    } finally {
      if (decompressor != null) { // return this decompressor
        CodecPool.returnDecompressor(decompressor);
        decompressor = null;
      } // return decompressor of linked workbooks
	if (this.currentHFR!=null) {
		currentHFR.close();
	}
    }
  	// do not close the filesystem! will cause exceptions in Spark
}
 
Example #3
Source File: JSONFileRecordReader.java    From ojai with Apache License 2.0 6 votes vote down vote up
@Override
public void close() throws IOException {
  try {
    documentStream.close();
  } catch (Exception e) {
    throw new IOException(
        "Error closing document Stream in JsonFileRecordReader");
  }
  try {
    if (inputStream != null) {
      inputStream.close();
    }
  } finally {
    if (decompressor != null) {
      CodecPool.returnDecompressor(decompressor);
      decompressor = null;
    }
  }
}
 
Example #4
Source File: IFile.java    From RDFS with Apache License 2.0 6 votes vote down vote up
public void close() throws IOException {
  // Return the decompressor
  if (decompressor != null) {
    decompressor.reset();
    CodecPool.returnDecompressor(decompressor);
    decompressor = null;
  }
  
  // Close the underlying stream
  in.close();
  
  // Release the buffer
  dataIn = null;
  buffer = null;
  if(readRecordsCounter != null) {
    readRecordsCounter.increment(numRecordsRead);
  }
}
 
Example #5
Source File: Compression.java    From hbase with Apache License 2.0 6 votes vote down vote up
public Decompressor getDecompressor() {
  CompressionCodec codec = getCodec(conf);
  if (codec != null) {
    Decompressor decompressor = CodecPool.getDecompressor(codec);
    if (LOG.isTraceEnabled()) LOG.trace("Retrieved decompressor " + decompressor + " from pool.");
    if (decompressor != null) {
      if (decompressor.finished()) {
        // Somebody returns the decompressor to CodecPool but is still using it.
        LOG.warn("Deompressor obtained from CodecPool is already finished()");
      }
      decompressor.reset();
    }
    return decompressor;
  }

  return null;
}
 
Example #6
Source File: IFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/**
 * Construct an IFile Reader.
 * 
 * @param conf Configuration File 
 * @param in   The input stream
 * @param length Length of the data in the stream, including the checksum
 *               bytes.
 * @param codec codec
 * @param readsCounter Counter for records read from disk
 * @throws IOException
 */
public Reader(Configuration conf, FSDataInputStream in, long length, 
              CompressionCodec codec,
              Counters.Counter readsCounter) throws IOException {
  readRecordsCounter = readsCounter;
  checksumIn = new IFileInputStream(in,length, conf);
  if (codec != null) {
    decompressor = CodecPool.getDecompressor(codec);
    if (decompressor != null) {
      this.in = codec.createInputStream(checksumIn, decompressor);
    } else {
      LOG.warn("Could not obtain decompressor from CodecPool");
      this.in = checksumIn;
    }
  } else {
    this.in = checksumIn;
  }
  this.dataIn = new DataInputStream(this.in);
  this.fileLength = length;
  
  if (conf != null) {
    bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
  }
}
 
Example #7
Source File: SequenceFile.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
/** Close the file. */
@Override
public synchronized void close() throws IOException {
  // Return the decompressors to the pool
  CodecPool.returnDecompressor(keyLenDecompressor);
  CodecPool.returnDecompressor(keyDecompressor);
  CodecPool.returnDecompressor(valLenDecompressor);
  CodecPool.returnDecompressor(valDecompressor);
  keyLenDecompressor = keyDecompressor = null;
  valLenDecompressor = valDecompressor = null;
  
  if (keyDeserializer != null) {
	keyDeserializer.close();
  }
  if (valDeserializer != null) {
    valDeserializer.close();
  }
  
  // Close the input-stream
  in.close();
}
 
Example #8
Source File: IFile.java    From tez with Apache License 2.0 6 votes vote down vote up
void setupOutputStream(CompressionCodec codec) throws IOException {
  this.checksumOut = new IFileOutputStream(this.rawOut);
  if (codec != null) {
    this.compressor = CodecPool.getCompressor(codec);
    if (this.compressor != null) {
      this.compressor.reset();
      this.compressedOut = codec.createOutputStream(checksumOut, compressor);
      this.out = new FSDataOutputStream(this.compressedOut,  null);
      this.compressOutput = true;
    } else {
      LOG.warn("Could not obtain compressor from CodecPool");
      this.out = new FSDataOutputStream(checksumOut,null);
    }
  } else {
    this.out = new FSDataOutputStream(checksumOut,null);
  }
}
 
Example #9
Source File: MapReduceExcelOutputIntegrationTest.java    From hadoopoffice with Apache License 2.0 6 votes vote down vote up
private InputStream openFile(Path path) throws IOException {
        CompressionCodec codec=new CompressionCodecFactory(miniCluster.getConfig()).getCodec(path);
 	FSDataInputStream fileIn=dfsCluster.getFileSystem().open(path);
	// check if compressed
	if (codec==null) { // uncompressed
		return fileIn;
	} else { // compressed
		Decompressor decompressor = CodecPool.getDecompressor(codec);
		this.openDecompressors.add(decompressor); // to be returned later using close
		if (codec instanceof SplittableCompressionCodec) {
			long end = dfsCluster.getFileSystem().getFileStatus(path).getLen(); 
        		final SplitCompressionInputStream cIn =((SplittableCompressionCodec)codec).createInputStream(fileIn, decompressor, 0, end,SplittableCompressionCodec.READ_MODE.CONTINUOUS);
					return cIn;
      		} else {
        		return codec.createInputStream(fileIn,decompressor);
      		}
	}
}
 
Example #10
Source File: InMemoryMapOutput.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public InMemoryMapOutput(Configuration conf, TaskAttemptID mapId,
                         MergeManagerImpl<K, V> merger,
                         int size, CompressionCodec codec,
                         boolean primaryMapOutput) {
  super(mapId, (long)size, primaryMapOutput);
  this.conf = conf;
  this.merger = merger;
  this.codec = codec;
  byteStream = new BoundedByteArrayOutputStream(size);
  memory = byteStream.getBuffer();
  if (codec != null) {
    decompressor = CodecPool.getDecompressor(codec);
  } else {
    decompressor = null;
  }
}
 
Example #11
Source File: Compression.java    From big-c with Apache License 2.0 6 votes vote down vote up
public Compressor getCompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Compressor compressor = CodecPool.getCompressor(codec);
    if (compressor != null) {
      if (compressor.finished()) {
        // Somebody returns the compressor to CodecPool but is still using
        // it.
        LOG.warn("Compressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a compressor: " + compressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * compressor is referenced after returned back to the codec pool.
       */
      compressor.reset();
    }
    return compressor;
  }
  return null;
}
 
Example #12
Source File: SequenceFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
/** Close the file. */
@Override
public synchronized void close() throws IOException {
  // Return the decompressors to the pool
  CodecPool.returnDecompressor(keyLenDecompressor);
  CodecPool.returnDecompressor(keyDecompressor);
  CodecPool.returnDecompressor(valLenDecompressor);
  CodecPool.returnDecompressor(valDecompressor);
  keyLenDecompressor = keyDecompressor = null;
  valLenDecompressor = valDecompressor = null;
  
  if (keyDeserializer != null) {
	keyDeserializer.close();
  }
  if (valDeserializer != null) {
    valDeserializer.close();
  }
  
  // Close the input-stream
  in.close();
}
 
Example #13
Source File: SequenceFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
/** Close the file. */
@Override
public synchronized void close() throws IOException {
  keySerializer.close();
  uncompressedValSerializer.close();
  if (compressedValSerializer != null) {
    compressedValSerializer.close();
  }

  CodecPool.returnCompressor(compressor);
  compressor = null;
  
  if (out != null) {
    
    // Close the underlying stream iff we own it...
    if (ownOutputStream) {
      out.close();
    } else {
      out.flush();
    }
    out = null;
  }
}
 
Example #14
Source File: Anonymizer.java    From big-c with Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example #15
Source File: Anonymizer.java    From hadoop with Apache License 2.0 6 votes vote down vote up
private JsonGenerator createJsonGenerator(Configuration conf, Path path) 
throws IOException {
  FileSystem outFS = path.getFileSystem(conf);
  CompressionCodec codec =
    new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  Compressor compressor = null;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(outFS.create(path), compressor);
  } else {
    output = outFS.create(path);
  }

  JsonGenerator outGen = outFactory.createJsonGenerator(output, 
                                                        JsonEncoding.UTF8);
  outGen.useDefaultPrettyPrinter();
  
  return outGen;
}
 
Example #16
Source File: SequenceFile.java    From hadoop with Apache License 2.0 6 votes vote down vote up
/** Close the file. */
@Override
public synchronized void close() throws IOException {
  keySerializer.close();
  uncompressedValSerializer.close();
  if (compressedValSerializer != null) {
    compressedValSerializer.close();
  }

  CodecPool.returnCompressor(compressor);
  compressor = null;
  
  if (out != null) {
    
    // Close the underlying stream iff we own it...
    if (ownOutputStream) {
      out.close();
    } else {
      out.flush();
    }
    out = null;
  }
}
 
Example #17
Source File: HadoopUtils.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
public static BufferedReader getBufferedReader(File file, MapredContext context)
        throws IOException {
    URI fileuri = file.toURI();
    Path path = new Path(fileuri);

    Configuration conf = context.getJobConf();
    CompressionCodecFactory ccf = new CompressionCodecFactory(conf);
    CompressionCodec codec = ccf.getCodec(path);

    if (codec == null) {
        return new BufferedReader(new FileReader(file));
    } else {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        FileInputStream fis = new FileInputStream(file);
        CompressionInputStream cis = codec.createInputStream(fis, decompressor);
        BufferedReader br = new BufferedReaderExt(new InputStreamReader(cis), decompressor);
        return br;
    }
}
 
Example #18
Source File: Compression.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public Decompressor getDecompressor() throws IOException {
  CompressionCodec codec = getCodec();
  if (codec != null) {
    Decompressor decompressor = CodecPool.getDecompressor(codec);
    if (decompressor != null) {
      if (decompressor.finished()) {
        // Somebody returns the decompressor to CodecPool but is still using
        // it.
        LOG.warn("Deompressor obtained from CodecPool already finished()");
      } else {
        if(LOG.isDebugEnabled()) {
          LOG.debug("Got a decompressor: " + decompressor.hashCode());
        }
      }
      /**
       * Following statement is necessary to get around bugs in 0.18 where a
       * decompressor is referenced after returned back to the codec pool.
       */
      decompressor.reset();
    }
    return decompressor;
  }

  return null;
}
 
Example #19
Source File: SequenceFile.java    From gemfirexd-oss with Apache License 2.0 6 votes vote down vote up
/** Close the file. */
@Override
public synchronized void close() throws IOException {
  // Return the decompressors to the pool
  CodecPool.returnDecompressor(keyLenDecompressor);
  CodecPool.returnDecompressor(keyDecompressor);
  CodecPool.returnDecompressor(valLenDecompressor);
  CodecPool.returnDecompressor(valDecompressor);
  keyLenDecompressor = keyDecompressor = null;
  valLenDecompressor = valDecompressor = null;
  
  if (keyDeserializer != null) {
	keyDeserializer.close();
  }
  if (valDeserializer != null) {
    valDeserializer.close();
  }
  
  // Close the input-stream
  in.close();
}
 
Example #20
Source File: InMemoryMapOutput.java    From big-c with Apache License 2.0 6 votes vote down vote up
public InMemoryMapOutput(Configuration conf, TaskAttemptID mapId,
                         MergeManagerImpl<K, V> merger,
                         int size, CompressionCodec codec,
                         boolean primaryMapOutput) {
  super(mapId, (long)size, primaryMapOutput);
  this.conf = conf;
  this.merger = merger;
  this.codec = codec;
  byteStream = new BoundedByteArrayOutputStream(size);
  memory = byteStream.getBuffer();
  if (codec != null) {
    decompressor = CodecPool.getDecompressor(codec);
  } else {
    decompressor = null;
  }
}
 
Example #21
Source File: SplittableXmlInputFormat.java    From Hive-XML-SerDe with Apache License 2.0 6 votes vote down vote up
private InputStream getInputStream(JobConf jobConf, FileSplit split) throws IOException, ClassNotFoundException {
    FSDataInputStream fsin = null;

    // open the file and seek to the start of the split
    long splitStart = split.getStart();
    long splitEnd = splitStart + split.getLength();
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(jobConf);
    fsin = fs.open(split.getPath());
    fsin.seek(splitStart);

    Configuration conf = new Configuration();
    CompressionCodecFactory compressionCodecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec = compressionCodecFactory.getCodec(split.getPath());
    Decompressor decompressor = CodecPool.getDecompressor(codec);
    if (codec instanceof SplittableCompressionCodec) {
        return ((SplittableCompressionCodec) codec).createInputStream(fsin,
            decompressor,
            splitStart,
            splitEnd,
            SplittableCompressionCodec.READ_MODE.BYBLOCK);
    } else {
        return codec.createInputStream(fsin, decompressor);
    }
}
 
Example #22
Source File: IFile.java    From big-c with Apache License 2.0 6 votes vote down vote up
/**
 * Construct an IFile Reader.
 * 
 * @param conf Configuration File 
 * @param in   The input stream
 * @param length Length of the data in the stream, including the checksum
 *               bytes.
 * @param codec codec
 * @param readsCounter Counter for records read from disk
 * @throws IOException
 */
public Reader(Configuration conf, FSDataInputStream in, long length, 
              CompressionCodec codec,
              Counters.Counter readsCounter) throws IOException {
  readRecordsCounter = readsCounter;
  checksumIn = new IFileInputStream(in,length, conf);
  if (codec != null) {
    decompressor = CodecPool.getDecompressor(codec);
    if (decompressor != null) {
      this.in = codec.createInputStream(checksumIn, decompressor);
    } else {
      LOG.warn("Could not obtain decompressor from CodecPool");
      this.in = checksumIn;
    }
  } else {
    this.in = checksumIn;
  }
  this.dataIn = new DataInputStream(this.in);
  this.fileLength = length;
  
  if (conf != null) {
    bufferSize = conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE);
  }
}
 
Example #23
Source File: CellBlockBuilder.java    From hbase with Apache License 2.0 6 votes vote down vote up
private void encodeCellsTo(OutputStream os, CellScanner cellScanner, Codec codec,
    CompressionCodec compressor) throws IOException {
  Compressor poolCompressor = null;
  try {
    if (compressor != null) {
      if (compressor instanceof Configurable) {
        ((Configurable) compressor).setConf(this.conf);
      }
      poolCompressor = CodecPool.getCompressor(compressor);
      os = compressor.createOutputStream(os, poolCompressor);
    }
    Codec.Encoder encoder = codec.getEncoder(os);
    while (cellScanner.advance()) {
      encoder.write(cellScanner.current());
    }
    encoder.flush();
  } catch (BufferOverflowException | IndexOutOfBoundsException e) {
    throw new DoNotRetryIOException(e);
  } finally {
    os.close();
    if (poolCompressor != null) {
      CodecPool.returnCompressor(poolCompressor);
    }
  }
}
 
Example #24
Source File: FlexibleDelimitedFileReaderWriterFactory.java    From secor with Apache License 2.0 5 votes vote down vote up
public FlexibleDelimitedFileWriter(LogFilePath path, CompressionCodec codec) throws IOException {
  Path fsPath = new Path(path.getLogFilePath());
  FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath());
  this.mCountingStream = new CountingOutputStream(fs.create(fsPath));
  this.mWriter = (codec == null) ? new BufferedOutputStream(
  this.mCountingStream) : new BufferedOutputStream(
  codec.createOutputStream(this.mCountingStream,
  mCompressor = CodecPool.getCompressor(codec)));
}
 
Example #25
Source File: CSVFileRecordReader.java    From components with Apache License 2.0 5 votes vote down vote up
public synchronized void close() throws IOException {
  try {
    if (in != null) {
      in.close();
    }
  } finally {
    if (decompressor != null) {
      CodecPool.returnDecompressor(decompressor);
      decompressor = null;
    }
  }
}
 
Example #26
Source File: IFile.java    From tez with Apache License 2.0 5 votes vote down vote up
/**
 * Construct an IFile Reader.
 *
 * @param in   The input stream
 * @param length Length of the data in the stream, including the checksum
 *               bytes.
 * @param codec codec
 * @param readsCounter Counter for records read from disk
 * @throws IOException
 */
public Reader(InputStream in, long length,
              CompressionCodec codec,
              TezCounter readsCounter, TezCounter bytesReadCounter,
              boolean readAhead, int readAheadLength,
              int bufferSize, boolean isCompressed) throws IOException {
  if (in != null) {
    checksumIn = new IFileInputStream(in, length, readAhead,
        readAheadLength/* , isCompressed */);
    if (isCompressed && codec != null) {
      decompressor = CodecPool.getDecompressor(codec);
      if (decompressor != null) {
        this.in = codec.createInputStream(checksumIn, decompressor);
      } else {
        LOG.warn("Could not obtain decompressor from CodecPool");
        this.in = checksumIn;
      }
    } else {
      this.in = checksumIn;
    }
    startPos = checksumIn.getPosition();
  } else {
    this.in = null;
  }

  if (in != null) {
    this.dataIn = new DataInputStream(this.in);
  }
  this.readRecordsCounter = readsCounter;
  this.bytesReadCounter = bytesReadCounter;
  this.fileLength = length;
  this.bufferSize = Math.max(0, bufferSize);
}
 
Example #27
Source File: IFile.java    From incubator-tez with Apache License 2.0 5 votes vote down vote up
/**
 * Construct an IFile Reader.
 *
 * @param in   The input stream
 * @param length Length of the data in the stream, including the checksum
 *               bytes.
 * @param codec codec
 * @param readsCounter Counter for records read from disk
 * @throws IOException
 */
public Reader(InputStream in, long length,
              CompressionCodec codec,
              TezCounter readsCounter, TezCounter bytesReadCounter,
              boolean readAhead, int readAheadLength,
              int bufferSize, boolean isCompressed) throws IOException {
  this.isCompressed = isCompressed;
  checksumIn = new IFileInputStream(in, length, readAhead, readAheadLength/*, isCompressed*/);
  if (isCompressed && codec != null) {
    decompressor = CodecPool.getDecompressor(codec);
    if (decompressor != null) {
      this.in = codec.createInputStream(checksumIn, decompressor);
    } else {
      LOG.warn("Could not obtain decompressor from CodecPool");
      this.in = checksumIn;
    }
  } else {
    this.in = checksumIn;
  }

  this.dataIn = new DataInputStream(this.in);
  startPos = checksumIn.getPosition();
  this.readRecordsCounter = readsCounter;
  this.bytesReadCounter = bytesReadCounter;
  this.fileLength = length;
  this.bufferSize = Math.max(0, bufferSize);
}
 
Example #28
Source File: DefaultOutputter.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Override
public void init(Path path, Configuration conf) throws IOException {
  FileSystem fs = path.getFileSystem(conf);
  CompressionCodec codec = new CompressionCodecFactory(conf).getCodec(path);
  OutputStream output;
  if (codec != null) {
    compressor = CodecPool.getCompressor(codec);
    output = codec.createOutputStream(fs.create(path), compressor);
  } else {
    output = fs.create(path);
  }
  writer = new JsonObjectMapperWriter<T>(output, 
      conf.getBoolean("rumen.output.pretty.print", true));
}
 
Example #29
Source File: DelimitedTextFileReaderWriterFactory.java    From secor with Apache License 2.0 5 votes vote down vote up
public DelimitedTextFileReader(LogFilePath path, CompressionCodec codec) throws IOException {
    Path fsPath = new Path(path.getLogFilePath());
    FileSystem fs = FileUtil.getFileSystem(path.getLogFilePath());
    InputStream inputStream = fs.open(fsPath);
    this.mReader = (codec == null) ? new BufferedInputStream(inputStream)
            : new BufferedInputStream(
            codec.createInputStream(inputStream,
                                    mDecompressor = CodecPool.getDecompressor(codec)));
    this.mOffset = path.getOffset();
}
 
Example #30
Source File: TestLineRecordReader.java    From big-c with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleClose() throws IOException {
  URL testFileUrl = getClass().getClassLoader().
      getResource("recordSpanningMultipleSplits.txt.bz2");
  assertNotNull("Cannot find recordSpanningMultipleSplits.txt.bz2",
      testFileUrl);
  File testFile = new File(testFileUrl.getFile());
  Path testFilePath = new Path(testFile.getAbsolutePath());
  long testFileSize = testFile.length();
  Configuration conf = new Configuration();
  conf.setInt(org.apache.hadoop.mapreduce.lib.input.
      LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE);
  FileSplit split = new FileSplit(testFilePath, 0, testFileSize,
      (String[])null);

  LineRecordReader reader = new LineRecordReader(conf, split);
  LongWritable key = new LongWritable();
  Text value = new Text();
  //noinspection StatementWithEmptyBody
  while (reader.next(key, value)) ;
  reader.close();
  reader.close();

  BZip2Codec codec = new BZip2Codec();
  codec.setConf(conf);
  Set<Decompressor> decompressors = new HashSet<Decompressor>();
  for (int i = 0; i < 10; ++i) {
    decompressors.add(CodecPool.getDecompressor(codec));
  }
  assertEquals(10, decompressors.size());
}