Java Code Examples for htsjdk.samtools.util.BlockCompressedInputStream#isValidFile()

The following examples show how to use htsjdk.samtools.util.BlockCompressedInputStream#isValidFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * @param file the file to check.
 * @return true if this looks like a BAM file.
 * @throws IOException if an IO Error occurs
 */
public static boolean isBAMFile(final File file) throws IOException {
  final boolean result;
  try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file))) {
    if (!BlockCompressedInputStream.isValidFile(bis)) {
      return false;
    }
    final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE;
    bis.mark(buffSize);
    final byte[] buffer = new byte[buffSize];
    final int len = IOUtils.readAmount(bis, buffer, 0, buffSize);
    bis.reset();
    final byte[] magicBuf = new byte[4];
    final int magicLength = IOUtils.readAmount(new BlockCompressedInputStream(new ByteArrayInputStream(buffer, 0, len)), magicBuf, 0, 4);
    //checks we read 4 bytes and they were "BAM\1" in ascii
    result = magicLength == 4 && Arrays.equals(new byte[]{(byte) 66, (byte) 65, (byte) 77, (byte) 1}, magicBuf);

  }
  return result;
}
 
Example 2
Source File: TabixIndexer.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * check if given file is block compressed
 * @param file file to check
 * @return true iff file is block compressed
 * @throws IOException if an IO error occurs
 */
public static boolean isBlockCompressed(File file) throws IOException {
  final boolean result;
  try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file))) {
    result = BlockCompressedInputStream.isValidFile(bis);
  }
  return result;
}
 
Example 3
Source File: BGZFEnhancedGzipCodec.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Override
public SplitCompressionInputStream createInputStream(InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
  if (!(seekableIn instanceof Seekable)) {
    throw new IOException("seekableIn must be an instance of " +
        Seekable.class.getName());
  }
  if (!BlockCompressedInputStream.isValidFile(new BufferedInputStream(seekableIn))) {
    // data is regular gzip, not BGZF
    ((Seekable)seekableIn).seek(0);
    final CompressionInputStream compressionInputStream = createInputStream(seekableIn,
        decompressor);
    return new SplitCompressionInputStream(compressionInputStream, start, end) {
      @Override
      public int read(byte[] b, int off, int len) throws IOException {
        return compressionInputStream.read(b, off, len);
      }
      @Override
      public void resetState() throws IOException {
        compressionInputStream.resetState();
      }
      @Override
      public int read() throws IOException {
        return compressionInputStream.read();
      }
    };
  }
  BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn);
  long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end);
  ((Seekable)seekableIn).seek(adjustedStart);
  return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end);
}
 
Example 4
Source File: VCFInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Override
protected boolean isSplitable(JobContext context, Path filename) {
	Configuration conf = context.getConfiguration();
	final CompressionCodec codec =
			new CompressionCodecFactory(context.getConfiguration()).getCodec(filename);
	if (codec == null) {
		return true;
	}
	if (codec instanceof BGZFCodec || codec instanceof BGZFEnhancedGzipCodec) {
		boolean splittable;
		try {
			try (FSDataInputStream in = filename.getFileSystem(conf).open(filename)) {
				splittable = BlockCompressedInputStream.isValidFile(new BufferedInputStream(in));
			}
		} catch (IOException e) {
			// can't determine if BGZF or GZIP, conservatively assume latter
			splittable = false;
		}
		if (!splittable) {
			logger.warn("{} is not splittable, consider using block-compressed gzip (BGZF)", filename);
		}
		return splittable;
	} else if (codec instanceof GzipCodec) {
		logger.warn("Using GzipCodec, which is not splittable, consider using block compressed gzip (BGZF) and BGZFCodec/BGZFEnhancedGzipCodec.");
	}
	return codec instanceof SplittableCompressionCodec;
}
 
Example 5
Source File: IOUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * creates an input stream from a zipped stream
 * @return tries to create a block gzipped input stream and if it's not block gzipped it produces to a gzipped stream instead
 * @throws ZipException if !in.markSupported()
 */
public static InputStream makeZippedInputStream(InputStream in) throws IOException {
    Utils.nonNull(in);
    if (BlockCompressedInputStream.isValidFile(in)) {
            return new BlockCompressedInputStream(in);
    } else {
        return new GZIPInputStream(in);
    }
}
 
Example 6
Source File: VCFFileMerger.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
private static boolean isBlockCompressed(List<Path> parts) throws IOException {
  try (InputStream in = new BufferedInputStream(Files.newInputStream(parts.get(0)))) {
    return BlockCompressedInputStream.isValidFile(in);
  }
}
 
Example 7
Source File: VariantsSparkSinkUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static boolean isBlockCompressed(String outputFile) throws IOException {
    try (InputStream in = new BufferedInputStream(openFile(outputFile))) {
        return BlockCompressedInputStream.isValidFile(in);
    }
}