htsjdk.samtools.util.BlockCompressedInputStream Java Examples

The following examples show how to use htsjdk.samtools.util.BlockCompressedInputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BlockCompressedLineReaderTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void testLinearRead() throws IOException {
  final File dir = FileUtils.createTempDir("bclr", "test");
  try {
    final File sam = FileHelper.resourceToFile("com/rtg/sam/resources/readerWindow1.sam.gz", new File(dir, "readerWindow1.sam.gz"));
    try (BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(sam))) {
      try (BufferedReader br = new BufferedReader(new InputStreamReader(GzipUtils.createGzipInputStream(new FileInputStream(sam))))) {
        String lineA;
        String lineB;
        while (true) {
          lineA = br.readLine();
          lineB = bclr.readLine();
          if (lineA == null || lineB == null) {
            break;
          }
          assertEquals(lineA, lineB);
        }
        assertNull(lineA);
        assertNull(lineB);
      }
    }
  } finally {
    assertTrue(FileHelper.deleteAll(dir));
  }
}
 
Example #2
Source File: BGZF_FastaIndexer.java    From cramtools with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
	Params params = new Params();
	JCommander jc = new JCommander(params);
	jc.parse(args);

	for (File file : params.files) {
		log.info("Indexing file: " + file.getAbsolutePath());
		BlockCompressedInputStream bcis = new BlockCompressedInputStream(new SeekableFileStream(file));
		bcis.available();
		BGZF_FastaIndexer mli = new BGZF_FastaIndexer(bcis);

		PrintWriter writer = new PrintWriter(file.getAbsolutePath() + ".fai");

		FAIDX_FastaIndexEntry e;
		while (!writer.checkError() && (e = mli.readNext()) != null)
			writer.println(e);

		writer.close();
	}
}
 
Example #3
Source File: BGZF_ReferenceSequenceFile.java    From cramtools with Apache License 2.0 6 votes vote down vote up
public BGZF_ReferenceSequenceFile(File file) throws FileNotFoundException {
	if (!file.canRead())
		throw new RuntimeException("Cannot find or read fasta file: " + file.getAbsolutePath());

	File indexFile = new File(file.getAbsolutePath() + ".fai");
	if (!indexFile.canRead())
		throw new RuntimeException("Cannot find or read fasta index file: " + indexFile.getAbsolutePath());

	Scanner scanner = new Scanner(indexFile);
	int seqID = 0;
	dictionary = new SAMSequenceDictionary();
	while (scanner.hasNextLine()) {
		String line = scanner.nextLine();
		FAIDX_FastaIndexEntry entry = FAIDX_FastaIndexEntry.fromString(seqID++, line);
		index.put(entry.getName(), entry);
		dictionary.addSequence(new SAMSequenceRecord(entry.getName(), entry.getLen()));
	}
	scanner.close();

	if (index.isEmpty())
		log.warn("No entries in the index: " + indexFile.getAbsolutePath());

	is = new BlockCompressedInputStream(new SeekableFileStream(file));
}
 
Example #4
Source File: CheckTerminatorBlock.java    From picard with MIT License 6 votes vote down vote up
@Override protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    try {
        final FileTermination term = BlockCompressedInputStream.checkTermination(INPUT);
        System.err.println(term.name());
        if (term == FileTermination.DEFECTIVE) {
            return 100;
        }
        else {
            return 0;
        }
    }
    catch (IOException ioe) {
        throw new PicardException("Exception reading terminator block of file: " + INPUT.getAbsolutePath());
    }
}
 
Example #5
Source File: BclReader.java    From picard with MIT License 6 votes vote down vote up
public int seek(final List<File> files, final TileIndex tileIndex, final int currentTile) {
    int count = 0;
    int numClustersInTile = 0;
    for (final InputStream inputStream : streams) {
        final TileIndex.TileIndexRecord tileIndexRecord = tileIndex.findTile(currentTile);
        final BclIndexReader bclIndexReader = new BclIndexReader(files.get(count));
        final long virtualFilePointer = bclIndexReader.get(tileIndexRecord.getZeroBasedTileNumber());
        if (!(inputStream instanceof BlockCompressedInputStream)) {
            throw new UnsupportedOperationException("Seeking only allowed on bzgf");
        } else {
            try {
                if (tileIndex.getNumTiles() != bclIndexReader.getNumTiles()) {
                    throw new PicardException(String.format("%s.getNumTiles(%d) != %s.getNumTiles(%d)",
                            tileIndex.getFile().getAbsolutePath(), tileIndex.getNumTiles(), bclIndexReader.getBciFile().getAbsolutePath(), bclIndexReader.getNumTiles()));
                }
                ((BlockCompressedInputStream) inputStream).seek(virtualFilePointer);
                numClustersInTile = tileIndexRecord.getNumClustersInTile();
            } catch (final IOException e) {
                throw new PicardException("Problem seeking to " + virtualFilePointer, e);
            }
        }
        count++;
    }
    return numClustersInTile;
}
 
Example #6
Source File: SamPositionReaderTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void testSomeMethod() throws IOException {
  try (InputStream is = Resources.getResourceAsStream("com/rtg/sam/resources/mixed.sam.gz")) {
    try (SamPositionReader spr = new SamPositionReader(new BlockCompressedLineReader(new BlockCompressedInputStream(is)), 0)) {
      int ref = 0;
      int i = 0;
      while (spr.hasNext()) {
        spr.next();
        if (i >= ENTRIES[ref]) {
          i = 0;
          ++ref;
        }
        assertEquals(EXP_REF_NAME[ref], spr.getReferenceName());
        assertEquals(ref, spr.getReferenceId());
        assertEquals(START[ref][i], spr.getStartPosition());
        assertEquals(LENGTH[ref][i], spr.getLengthOnReference());
        assertEquals(BINS[ref], spr.getBinNum());
        assertEquals(VIRTUAL_OFFSETS[ref][i], spr.getVirtualOffset());
        assertEquals(VIRTUAL_OFFSET_ENDS[ref][i], spr.getNextVirtualOffset());
        assertTrue(spr.hasReference());
        assertTrue(spr.hasCoordinates());
        assertFalse(spr.isUnmapped());
        ++i;
      }
    }
  }
}
 
Example #7
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * @param file the file to check.
 * @return true if this looks like a BAM file.
 * @throws IOException if an IO Error occurs
 */
public static boolean isBAMFile(final File file) throws IOException {
  final boolean result;
  try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file))) {
    if (!BlockCompressedInputStream.isValidFile(bis)) {
      return false;
    }
    final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE;
    bis.mark(buffSize);
    final byte[] buffer = new byte[buffSize];
    final int len = IOUtils.readAmount(bis, buffer, 0, buffSize);
    bis.reset();
    final byte[] magicBuf = new byte[4];
    final int magicLength = IOUtils.readAmount(new BlockCompressedInputStream(new ByteArrayInputStream(buffer, 0, len)), magicBuf, 0, 4);
    //checks we read 4 bytes and they were "BAM\1" in ascii
    result = magicLength == 4 && Arrays.equals(new byte[]{(byte) 66, (byte) 65, (byte) 77, (byte) 1}, magicBuf);

  }
  return result;
}
 
Example #8
Source File: BlockCompressedLineReaderTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void test() throws IOException {
  final File dir = FileUtils.createTempDir("bclr", "test");
  try {
    final File sam = FileHelper.resourceToFile("com/rtg/sam/resources/readerWindow1.sam.gz", new File(dir, "readerWindow1.sam.gz"));
    try (BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(sam))) {
      final long firstSeekPos = (44947L << 16) | 22870;
      bclr.seek(firstSeekPos);
      assertEquals(firstSeekPos, bclr.getFilePointer());
      final String line = bclr.readLine();
      assertTrue(line.startsWith("857\t147\tsimulatedSequence2\t32834"));
      assertEquals(firstSeekPos, bclr.getLineFilePointer());
      assertEquals(firstSeekPos + line.length() + 1, bclr.getFilePointer());
      final String line2 = bclr.readLine();
      assertTrue(line2.startsWith("251\t99\tsimulatedSequence2\t33229"));
      assertEquals((int) '9', bclr.peek());
      final String line3 = bclr.readLine();
      assertTrue(line3.startsWith("91\t163\tsimulatedSequence2\t33238"));
      assertEquals(3, bclr.getLineNumber());
    }
  } finally {
    assertTrue(FileHelper.deleteAll(dir));
  }
}
 
Example #9
Source File: TabixHeaderTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void test() throws IOException {
  final TabixHeader th1;
  try (BlockCompressedInputStream is = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/tabix/resources/tabixmerge1.sam.gz.tbi"))) {
    th1 = TabixHeader.readHeader(is);
    assertEquals(4, th1.getNumSequences());
    checkOptions(th1.getOptions());
    assertTrue(Arrays.equals(new String[]{"simulatedSequence1", "simulatedSequence2", "simulatedSequence3", "simulatedSequence4"}, th1.getSequenceNamesUnpacked()));
  }
  final TabixHeader th2;
  try (BlockCompressedInputStream is2 = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/tabix/resources/tabixmerge2.sam.gz.tbi"))) {
    th2 = TabixHeader.readHeader(is2);
    assertEquals(5, th2.getNumSequences());
    checkOptions(th2.getOptions());
    assertTrue(Arrays.equals(new String[]{"simulatedSequence4", "simulatedSequence5", "simulatedSequence6", "simulatedSequence7", "simulatedSequence8"}, th2.getSequenceNamesUnpacked()));
    final TabixHeader merged = TabixHeader.mergeHeaders(th1, th2);
    assertEquals(8, merged.getNumSequences());
    checkOptions(th2.getOptions());
    assertTrue(Arrays.equals(new String[]{"simulatedSequence1", "simulatedSequence2", "simulatedSequence3", "simulatedSequence4", "simulatedSequence5", "simulatedSequence6", "simulatedSequence7", "simulatedSequence8"}, merged.getSequenceNamesUnpacked()));
  }
}
 
Example #10
Source File: TabixIndexMergeTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void testSam() throws Exception {
  final File dir = FileUtils.createTempDir("indexmerge", "test");
  try {
    final ArrayList<File> files = new ArrayList<>();
    final ArrayList<Long> dataFileSizes = new ArrayList<>();
    for (int i = 1; i <= 4; ++i) {
      final String samFileName = String.format(SAM_FILES, i);
      final File samFile = new File(dir, samFileName);
      final File tbiFile = new File(dir, samFileName + ".tbi");
      FileHelper.resourceToFile(String.format("%s/%s", SAM_RESOURCE, samFileName), samFile);
      FileHelper.resourceToFile(String.format("%s/%s.tbi", SAM_RESOURCE, samFileName), tbiFile);
      files.add(tbiFile);
      dataFileSizes.add(samFile.length());
    }
    final File mergedIndex = new File(dir, "merged.sam.gz.tbi");
    TabixIndexMerge.mergeTabixFiles(mergedIndex, files, dataFileSizes);
    try (InputStream fis = new BlockCompressedInputStream(new FileInputStream(mergedIndex))) {
      final String indexDebug = IndexTestUtils.tbiIndexToUniqueString(fis);
      mNano.check("merged.sam.gz.tbi.debug", indexDebug);
    }
  } finally {
    assertTrue(FileHelper.deleteAll(dir));
  }
}
 
Example #11
Source File: VcfDecomposerCliTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
private void runResourceTest(String inResourceLoc, String expResourceLoc, boolean useRef, String... extrArgs) throws IOException {
  try (TestDirectory dir = new TestDirectory()) {
    final File in = FileHelper.resourceToFile(inResourceLoc, new File(dir, new File(Resources.getResource(inResourceLoc).getFile()).getName()));
    final File out = new File(dir, "out.vcf.gz");
    String[] args = {
      "-i", in.getPath(), "-o", out.getPath()
    };
    if (useRef) {
      final File sdf = ReaderTestUtils.getDNASubDir(REF, dir);
      args = Utils.append(args, "-t", sdf.getPath());
    }
    args = Utils.append(args, extrArgs);
    final String output = checkMainInitOk(args);
    mNano.check(expResourceLoc + ".txt", output, true);

    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out));

    final String o = StringUtils.grep(FileHelper.gzFileToString(out), "^[^#]").replaceAll("[\r\n]+", "\n");
    mNano.check(expResourceLoc, o, true);
  }
}
 
Example #12
Source File: MendeliannessCheckerTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void testOptions() throws IOException {
  try (TestDirectory dir = new TestDirectory("mendelianness")) {
    final File sdf = ReaderTestUtils.getDNADir(">chr21\nacgt", dir);
    final File file1 = FileHelper.resourceToFile("com/rtg/vcf/mendelian/resources/merge.vcf", new File(dir, "merge.vcf"));
    final File inconsistent = new File(dir, "failed.vcf.gz");
    final File consistent = new File(dir, "nonfailed.vcf.gz");
    final File annot = new File(dir, "checked.vcf.gz");
    final MainResult res = MainResult.run(getCli(), "-t", sdf.getPath(), "-i", file1.getPath(), "--all-records", "--output", annot.getPath(), "--output-inconsistent", inconsistent.getPath(), "--output-consistent", consistent.getPath());
    assertEquals(res.err(), 0, res.rc());
    final String s = res.out().replaceAll("Checking: [^\n]*\n", "Checking: \n");
    mNano.check("mendelian.out.txt", s);
    mNano.check("mendelian.annotated.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(annot)));
    mNano.check("mendelian.inconsistent.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(inconsistent)));
    mNano.check("mendelian.consistent.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(consistent)));
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(inconsistent));
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(consistent));

    final MainResult res2 = MainResult.run(getCli(), "-t", sdf.getPath(), "-i", file1.getPath());
    assertEquals(0, res2.rc());
    final String s3 = res2.out().replaceAll("Checking: [^\n]*\n", "Checking: \n");
    //System.err.println(s3);
    mNano.check("mendelian2.out.txt", s3);
  }

}
 
Example #13
Source File: VcfMergeTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void checkMerge(String id, String resourcea, String resourceb, String... argsIn) throws Exception {
  try (final TestDirectory dir = new TestDirectory("vcfmerge")) {
    final File snpsA = BgzipFileHelper.bytesToBgzipFile(FileHelper.resourceToString("com/rtg/vcf/resources/" + resourcea).getBytes(), new File(dir, "fileA.vcf.gz"));
    new TabixIndexer(snpsA, TabixIndexer.indexFileName(snpsA)).saveVcfIndex();
    final File snpsB = BgzipFileHelper.bytesToBgzipFile(FileHelper.resourceToString("com/rtg/vcf/resources/" + resourceb).getBytes(), new File(dir, "fileB.vcf.gz"));
    new TabixIndexer(snpsB, TabixIndexer.indexFileName(snpsB)).saveVcfIndex();
    final File output = new File(dir, "out.vcf.gz");
    final String out = checkMainInit(Utils.append(argsIn, "-o", output.toString(), "--stats", snpsA.toString(), snpsB.toString())).out();
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(output));
    assertTrue(new File(dir, output.getName() + ".tbi").isFile());
    mNano.check("vcfmerge_out_" + id + ".vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(output)), false);
    mNano.check("vcfmerge_stats_" + id + ".txt", out);

    final File inlist = new File(dir, "infiles.txt");
    FileUtils.stringToFile(snpsA.getAbsolutePath() + StringUtils.LS + snpsB.getAbsolutePath() + StringUtils.LS, inlist);
    final File output2 = new File(dir, "out2.vcf.gz");
    checkMainInit(Utils.append(argsIn, "-o", output2.toString(), "--stats", "-I", inlist.toString()));
    mNano.check("vcfmerge_out_" + id + ".vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(output2)), false);
    mNano.check("vcfmerge_stats_" + id + ".txt", out);
  }
}
 
Example #14
Source File: TabixIndexMerge.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Merge indexes for files that will be concatenated.
 * @param output output index file
 * @param files <code>tabix</code> index files
 * @param dataFileSizes file size of corresponding data files
 * @throws IOException if an IO error occurs
 */
public static void mergeTabixFiles(File output, List<File> files, List<Long> dataFileSizes) throws IOException {
  long pointerAdjust = 0;
  final SequenceIndex[][] indexesSquared = new SequenceIndex[files.size()][];
  final String[][] sequenceNames = new String[files.size()][];
  TabixHeader mergedHeader = null;
  for (int i = 0; i < files.size(); ++i) {
    final File tbiFile = files.get(i);
    try (BlockCompressedInputStream bcis = new BlockCompressedInputStream(tbiFile)) {
      final TabixHeader th = TabixHeader.readHeader(bcis);
      sequenceNames[i] = th.getSequenceNamesUnpacked();
      if (mergedHeader != null) {
        mergedHeader = TabixHeader.mergeHeaders(mergedHeader, th);
      } else {
        mergedHeader = th;
      }
      indexesSquared[i] = loadFileIndexes(bcis, th.getNumSequences(), pointerAdjust);
    }
    pointerAdjust += dataFileSizes.get(i);
  }
  final List<SequenceIndex> indexes = collapseIndexes(indexesSquared, sequenceNames);
  TabixIndexer.mergeChunks(indexes);
  try (BlockCompressedOutputStream fos = new BlockCompressedOutputStream(output)) {
    TabixIndexer.writeIndex(indexes, mergedHeader.getOptions(), Arrays.asList(mergedHeader.getSequenceNamesUnpacked()), fos);
  }
}
 
Example #15
Source File: TabixIndexer.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Creates a <code>TABIX</code> index for given BED file and saves it.
 * @throws IOException if an IO Error occurs.
 * @throws UnindexableDataException If data cannot be indexed because of properties of the data
 */
public void saveBedIndex() throws IOException, UnindexableDataException {
  int skip = 0;
  //by setting true we indicate we want to be able to read at least a portion from the start of the inputstream without affecting the main run
  mInputHandler.start(true);
  try {
    final BlockCompressedLineReader bcli = new BlockCompressedLineReader(new BlockCompressedInputStream(mInputHandler.getInputStream()));
    String line;
    while ((line = bcli.readLine()) != null) {
      if (BED_SKIP_LINES.matcher(line).matches()) {
        ++skip;
      } else {
        break;
      }
    }
  } finally {
    //resets the stream
    mInputHandler.close();
  }
  saveIndex(new BedIndexerFactory(skip));
}
 
Example #16
Source File: BGZFSplitCompressionInputStream.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public BGZFSplitCompressionInputStream(InputStream in, long start, long end)
    throws IOException {
  super(in, start, end);
  bufferedIn = new BufferedInputStream(super.in);
  this.startingPos = super.getPos();
  input = new BlockCompressedInputStream(bufferedIn);
  this.updatePos(false);
}
 
Example #17
Source File: TabixIndexerTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public void testMixed() throws Exception {
  final File index = file("index");
  final File input = FileHelper.resourceToFile("com/rtg/sam/resources/mixed.sam.gz", file("mixed.sam.gz"));
  new TabixIndexer(input, index).saveSamIndex();

  final String myBai = IndexTestUtils.tbiIndexToUniqueString(new BlockCompressedInputStream(lazyStream(index)));
  final String exp;
  try (InputStream baiIs = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/sam/resources/mixed.sam.gz.tbi"))) {
    exp = IndexTestUtils.tbiIndexToUniqueString(baiIs);
  }

  assertEquals(exp, myBai);
}
 
Example #18
Source File: TabixIndexerTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public void testLargish() throws Exception {
  final File index = file("index");
  final File input = FileHelper.resourceToFile("com/rtg/sam/resources/mmmm.sam.gz", file("mmmm.sam.gz"));
  new TabixIndexer(input, index).saveSamIndex();
  final String myBai = IndexTestUtils.tbiIndexToUniqueString(new BlockCompressedInputStream(lazyStream(index)));
  final String exp;
  try (InputStream baiIs = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/sam/resources/mmmm.sam.gz.tbi"))) {
    exp = IndexTestUtils.tbiIndexToUniqueString(baiIs);
  }

  assertEquals(exp, myBai);
}
 
Example #19
Source File: TabixIndexerTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public void testVcf() throws Exception {
  final File index = file("index");
  final File input = FileHelper.resourceToFile("com/rtg/sam/resources/vcf.txt.gz", file("vcf.txt.gz"));
  new TabixIndexer(input, index).saveVcfIndex();
  final String myBai = IndexTestUtils.tbiIndexToUniqueString(new BlockCompressedInputStream(lazyStream(index)));
  final String exp;
  try (InputStream baiIs = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/sam/resources/vcf.txt.gz.tbi"))) {
    exp = IndexTestUtils.tbiIndexToUniqueString(baiIs);
  }

  assertEquals(exp, myBai);
}
 
Example #20
Source File: TabixLineReader.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
SingleRestrictionLineReader(File input, TabixIndexReader tir, RegionRestriction region) throws IOException {
  if (region == null) {
    throw new NullPointerException();
  }
  mSequence = region.getSequenceName();
  mBeg = region.getStart();
  mEnd = region.getEnd();
  final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(input));
  mBCPositionReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions());
  mRange = tir.getFilePointers(region);
  if (mRange != null) {
    mBCPositionReader.seek(mRange.start(0));
  }
}
 
Example #21
Source File: BaseBclReader.java    From picard with MIT License 5 votes vote down vote up
InputStream open(final File file, final boolean seekable, final boolean isGzip, final boolean isBgzf) {
    final String filePath = file.getAbsolutePath();

    try {
        // Open up a buffered stream to read from the file and optionally wrap it in a gzip stream if necessary
        if (isBgzf) {
            // Only BlockCompressedInputStreams can seek, and only if they are fed a SeekableStream.
            return new BlockCompressedInputStream(IOUtil.maybeBufferedSeekableStream(file));
        } else if (isGzip) {
            if (seekable) {
                throw new IllegalArgumentException(
                        String.format("Cannot create a seekable reader for gzip bcl: %s.", filePath)
                );
            }
            return (IOUtil.maybeBufferInputStream(new GZIPInputStream(new FileInputStream(file), Defaults.BUFFER_SIZE / 2),
                    Defaults.BUFFER_SIZE / 2));
        } else {
            if (seekable) {
                throw new IllegalArgumentException(
                        String.format("Cannot create a seekable reader for provided bcl: %s.", filePath)
                );
            }
            return IOUtil.maybeBufferInputStream(new FileInputStream(file));
        }
    } catch (final FileNotFoundException fnfe) {
        throw new PicardException("File not found: (" + filePath + ")", fnfe);
    } catch (final IOException ioe) {
        throw new PicardException("Error reading file: (" + filePath + ")", ioe);
    }
}
 
Example #22
Source File: TabixLineReader.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
SingleRestrictionLineReader(File input, TabixIndexReader tir) throws IOException {
  mSequence = null;
  mBeg = -1;
  mEnd = -1;
  final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(input));
  mBCPositionReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions());
  mRange = new VirtualOffsets(0, 0xFFFFFFFFFFFFFFFFL, null);
}
 
Example #23
Source File: SplittingBAMIndexer.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
private static PtrSkipPair readAlignment(final ByteBuffer byteBuffer,
		final BlockCompressedInputStream in) throws IOException
{
	final long ptr = in.getFilePointer();
	final int read = readBytes(byteBuffer, in, 4);
	if (read != 4) {
		if (read == 0)
			return null;
		ioError(
			"Invalid alignment at virtual offset %#x: "+
			"less than 4 bytes long", in.getFilePointer());
	}
	return new PtrSkipPair(ptr, byteBuffer.getInt(0));
}
 
Example #24
Source File: TabixLineReader.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
MultiRestrictionLineReader(File input, TabixIndexReader tir, ReferenceRanges<String> ranges) throws IOException {
  if (ranges == null) {
    throw new NullPointerException();
  }
  //Diagnostic.developerLog("Creating MultiRestrictionLineReader");
  final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(new ClosedFileInputStream(input)));
  mReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions());
  final VirtualOffsets offsets = tir.getFilePointers(ranges);
  mOffsets = offsets == null ? new VirtualOffsets() : offsets;
  mSequenceLookup = tir.mSequenceLookup;
  populateNext(true);
}
 
Example #25
Source File: BGZFEnhancedGzipCodec.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Override
public SplitCompressionInputStream createInputStream(InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
  if (!(seekableIn instanceof Seekable)) {
    throw new IOException("seekableIn must be an instance of " +
        Seekable.class.getName());
  }
  if (!BlockCompressedInputStream.isValidFile(new BufferedInputStream(seekableIn))) {
    // data is regular gzip, not BGZF
    ((Seekable)seekableIn).seek(0);
    final CompressionInputStream compressionInputStream = createInputStream(seekableIn,
        decompressor);
    return new SplitCompressionInputStream(compressionInputStream, start, end) {
      @Override
      public int read(byte[] b, int off, int len) throws IOException {
        return compressionInputStream.read(b, off, len);
      }
      @Override
      public void resetState() throws IOException {
        compressionInputStream.resetState();
      }
      @Override
      public int read() throws IOException {
        return compressionInputStream.read();
      }
    };
  }
  BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn);
  long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end);
  ((Seekable)seekableIn).seek(adjustedStart);
  return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end);
}
 
Example #26
Source File: VCFInputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Override
protected boolean isSplitable(JobContext context, Path filename) {
	Configuration conf = context.getConfiguration();
	final CompressionCodec codec =
			new CompressionCodecFactory(context.getConfiguration()).getCodec(filename);
	if (codec == null) {
		return true;
	}
	if (codec instanceof BGZFCodec || codec instanceof BGZFEnhancedGzipCodec) {
		boolean splittable;
		try {
			try (FSDataInputStream in = filename.getFileSystem(conf).open(filename)) {
				splittable = BlockCompressedInputStream.isValidFile(new BufferedInputStream(in));
			}
		} catch (IOException e) {
			// can't determine if BGZF or GZIP, conservatively assume latter
			splittable = false;
		}
		if (!splittable) {
			logger.warn("{} is not splittable, consider using block-compressed gzip (BGZF)", filename);
		}
		return splittable;
	} else if (codec instanceof GzipCodec) {
		logger.warn("Using GzipCodec, which is not splittable, consider using block compressed gzip (BGZF) and BGZFCodec/BGZFEnhancedGzipCodec.");
	}
	return codec instanceof SplittableCompressionCodec;
}
 
Example #27
Source File: TestBGZFSplitGuesser.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
private void canReadFromBlockStart(long blockStart) throws IOException {
  BlockCompressedInputStream blockCompressedInputStream = new
      BlockCompressedInputStream(file);
  blockCompressedInputStream.setCheckCrcs(true);
  blockCompressedInputStream.seek(blockStart << 16);
  byte[] b = new byte[100];
  blockCompressedInputStream.read(b);
}
 
Example #28
Source File: TestBAMOutputFormat.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
private List<SAMRecord> getRecordsAtSplits(File bam, SplittingBAMIndex index) throws IOException {
    List<SAMRecord> records = new ArrayList<>();
    BAMRecordCodec codec = new BAMRecordCodec(samFileHeader);
    BlockCompressedInputStream bci = new BlockCompressedInputStream(bam);
    codec.setInputStream(bci);
    for (Long offset : index.getVirtualOffsets()) {
        bci.seek(offset);
        SAMRecord record = codec.decode();
        if (record != null) {
            records.add(record);
        }
    }
    return records;
}
 
Example #29
Source File: IOUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * creates an input stream from a zipped stream
 * @return tries to create a block gzipped input stream and if it's not block gzipped it produces to a gzipped stream instead
 * @throws ZipException if !in.markSupported()
 */
public static InputStream makeZippedInputStream(InputStream in) throws IOException {
    Utils.nonNull(in);
    if (BlockCompressedInputStream.isValidFile(in)) {
            return new BlockCompressedInputStream(in);
    } else {
        return new GZIPInputStream(in);
    }
}
 
Example #30
Source File: CalculateGenotypePosteriorsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test //test for https://github.com/broadinstitute/gatk/issues/4346
public void gzipOutputIsGzipped() throws IOException {
    final File out = createTempFile("out", ".vcf.gz");

    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.addOutput(out)
        .addVCF(getTestFile("overlappingVariants.vcf"));

    runCommandLine(args);

    try( final InputStream in = new BufferedInputStream(new FileInputStream(out))) {
        Assert.assertTrue(BlockCompressedInputStream.isValidFile(in));
    }
}