org.apache.hadoop.io.compress.bzip2.CBZip2InputStream Java Examples

The following examples show how to use org.apache.hadoop.io.compress.bzip2.CBZip2InputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BZip2Codec.java    From hadoop with Apache License 2.0 6 votes vote down vote up
public BZip2CompressionInputStream(InputStream in, long start, long end,
    READ_MODE readMode) throws IOException {
  super(in, start, end);
  needsReset = false;
  bufferedIn = new BufferedInputStream(super.in);
  this.startingPos = super.getPos();
  this.readMode = readMode;
  if (this.startingPos == 0) {
    // We only strip header if it is start of file
    bufferedIn = readStreamHeader();
  }
  input = new CBZip2InputStream(bufferedIn, readMode);
  if (this.isHeaderStripped) {
    input.updateReportedByteCount(HEADER_LEN);
  }

  if (this.isSubHeaderStripped) {
    input.updateReportedByteCount(SUB_HEADER_LEN);
  }

  this.updatePos(false);
}
 
Example #2
Source File: BZip2Codec.java    From big-c with Apache License 2.0 6 votes vote down vote up
public BZip2CompressionInputStream(InputStream in, long start, long end,
    READ_MODE readMode) throws IOException {
  super(in, start, end);
  needsReset = false;
  bufferedIn = new BufferedInputStream(super.in);
  this.startingPos = super.getPos();
  this.readMode = readMode;
  if (this.startingPos == 0) {
    // We only strip header if it is start of file
    bufferedIn = readStreamHeader();
  }
  input = new CBZip2InputStream(bufferedIn, readMode);
  if (this.isHeaderStripped) {
    input.updateReportedByteCount(HEADER_LEN);
  }

  if (this.isSubHeaderStripped) {
    input.updateReportedByteCount(SUB_HEADER_LEN);
  }

  this.updatePos(false);
}
 
Example #3
Source File: BZip2Codec.java    From hadoop with Apache License 2.0 5 votes vote down vote up
private void internalReset() throws IOException {
  if (needsReset) {
    needsReset = false;
    BufferedInputStream bufferedIn = readStreamHeader();
    input = new CBZip2InputStream(bufferedIn, this.readMode);
  }
}
 
Example #4
Source File: BZip2Codec.java    From big-c with Apache License 2.0 5 votes vote down vote up
private void internalReset() throws IOException {
  if (needsReset) {
    needsReset = false;
    BufferedInputStream bufferedIn = readStreamHeader();
    input = new CBZip2InputStream(bufferedIn, this.readMode);
  }
}
 
Example #5
Source File: BZip2Codec.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private void internalReset() throws IOException {
  if (needsReset) {
    needsReset = false;
    BufferedInputStream bufferedIn = readStreamHeader();
    input = new CBZip2InputStream(bufferedIn);
  }
}
 
Example #6
Source File: BZip2Codec.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
private void internalReset() throws IOException {
  if (needsReset) {
    needsReset = false;
    BufferedInputStream bufferedIn = readStreamHeader();
    input = new CBZip2InputStream(bufferedIn);
  }
}
 
Example #7
Source File: BZip2Codec.java    From hadoop with Apache License 2.0 4 votes vote down vote up
/**
 * Creates CompressionInputStream to be used to read off uncompressed data
 * in one of the two reading modes. i.e. Continuous or Blocked reading modes
 *
 * @param seekableIn The InputStream
 * @param start The start offset into the compressed stream
 * @param end The end offset into the compressed stream
 * @param readMode Controls whether progress is reported continuously or
 *                 only at block boundaries.
 *
 * @return CompressionInputStream for BZip2 aligned at block boundaries
 */
public SplitCompressionInputStream createInputStream(InputStream seekableIn,
    Decompressor decompressor, long start, long end, READ_MODE readMode)
    throws IOException {

  if (!(seekableIn instanceof Seekable)) {
    throw new IOException("seekableIn must be an instance of " +
        Seekable.class.getName());
  }

  //find the position of first BZip2 start up marker
  ((Seekable)seekableIn).seek(0);

  // BZip2 start of block markers are of 6 bytes.  But the very first block
  // also has "BZh9", making it 10 bytes.  This is the common case.  But at
  // time stream might start without a leading BZ.
  final long FIRST_BZIP2_BLOCK_MARKER_POSITION =
    CBZip2InputStream.numberOfBytesTillNextMarker(seekableIn);
  long adjStart = Math.max(0L, start - FIRST_BZIP2_BLOCK_MARKER_POSITION);

  ((Seekable)seekableIn).seek(adjStart);
  SplitCompressionInputStream in =
    new BZip2CompressionInputStream(seekableIn, adjStart, end, readMode);


  // The following if clause handles the following case:
  // Assume the following scenario in BZip2 compressed stream where
  // . represent compressed data.
  // .....[48 bit Block].....[48 bit   Block].....[48 bit Block]...
  // ........................[47 bits][1 bit].....[48 bit Block]...
  // ................................^[Assume a Byte alignment here]
  // ........................................^^[current position of stream]
  // .....................^^[We go back 10 Bytes in stream and find a Block marker]
  // ........................................^^[We align at wrong position!]
  // ...........................................................^^[While this pos is correct]

  if (in.getPos() < start) {
    ((Seekable)seekableIn).seek(start);
    in = new BZip2CompressionInputStream(seekableIn, start, end, readMode);
  }

  return in;
}
 
Example #8
Source File: BZip2Codec.java    From big-c with Apache License 2.0 4 votes vote down vote up
/**
 * Creates CompressionInputStream to be used to read off uncompressed data
 * in one of the two reading modes. i.e. Continuous or Blocked reading modes
 *
 * @param seekableIn The InputStream
 * @param start The start offset into the compressed stream
 * @param end The end offset into the compressed stream
 * @param readMode Controls whether progress is reported continuously or
 *                 only at block boundaries.
 *
 * @return CompressionInputStream for BZip2 aligned at block boundaries
 */
public SplitCompressionInputStream createInputStream(InputStream seekableIn,
    Decompressor decompressor, long start, long end, READ_MODE readMode)
    throws IOException {

  if (!(seekableIn instanceof Seekable)) {
    throw new IOException("seekableIn must be an instance of " +
        Seekable.class.getName());
  }

  //find the position of first BZip2 start up marker
  ((Seekable)seekableIn).seek(0);

  // BZip2 start of block markers are of 6 bytes.  But the very first block
  // also has "BZh9", making it 10 bytes.  This is the common case.  But at
  // time stream might start without a leading BZ.
  final long FIRST_BZIP2_BLOCK_MARKER_POSITION =
    CBZip2InputStream.numberOfBytesTillNextMarker(seekableIn);
  long adjStart = Math.max(0L, start - FIRST_BZIP2_BLOCK_MARKER_POSITION);

  ((Seekable)seekableIn).seek(adjStart);
  SplitCompressionInputStream in =
    new BZip2CompressionInputStream(seekableIn, adjStart, end, readMode);


  // The following if clause handles the following case:
  // Assume the following scenario in BZip2 compressed stream where
  // . represent compressed data.
  // .....[48 bit Block].....[48 bit   Block].....[48 bit Block]...
  // ........................[47 bits][1 bit].....[48 bit Block]...
  // ................................^[Assume a Byte alignment here]
  // ........................................^^[current position of stream]
  // .....................^^[We go back 10 Bytes in stream and find a Block marker]
  // ........................................^^[We align at wrong position!]
  // ...........................................................^^[While this pos is correct]

  if (in.getPos() < start) {
    ((Seekable)seekableIn).seek(start);
    in = new BZip2CompressionInputStream(seekableIn, start, end, readMode);
  }

  return in;
}