Java Code Examples for htsjdk.samtools.util.BlockCompressedInputStream

The following examples show how to use htsjdk.samtools.util.BlockCompressedInputStream. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: rtg-tools   Source File: SamUtils.java    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * @param file the file to check.
 * @return true if this looks like a BAM file.
 * @throws IOException if an IO Error occurs
 */
public static boolean isBAMFile(final File file) throws IOException {
  final boolean result;
  try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file))) {
    if (!BlockCompressedInputStream.isValidFile(bis)) {
      return false;
    }
    final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE;
    bis.mark(buffSize);
    final byte[] buffer = new byte[buffSize];
    final int len = IOUtils.readAmount(bis, buffer, 0, buffSize);
    bis.reset();
    final byte[] magicBuf = new byte[4];
    final int magicLength = IOUtils.readAmount(new BlockCompressedInputStream(new ByteArrayInputStream(buffer, 0, len)), magicBuf, 0, 4);
    //checks we read 4 bytes and they were "BAM\1" in ascii
    result = magicLength == 4 && Arrays.equals(new byte[]{(byte) 66, (byte) 65, (byte) 77, (byte) 1}, magicBuf);

  }
  return result;
}
 
Example 2
Source Project: rtg-tools   Source File: TabixIndexMerge.java    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Merge indexes for files that will be concatenated.
 * @param output output index file
 * @param files <code>tabix</code> index files
 * @param dataFileSizes file size of corresponding data files
 * @throws IOException if an IO error occurs
 */
public static void mergeTabixFiles(File output, List<File> files, List<Long> dataFileSizes) throws IOException {
  long pointerAdjust = 0;
  final SequenceIndex[][] indexesSquared = new SequenceIndex[files.size()][];
  final String[][] sequenceNames = new String[files.size()][];
  TabixHeader mergedHeader = null;
  for (int i = 0; i < files.size(); ++i) {
    final File tbiFile = files.get(i);
    try (BlockCompressedInputStream bcis = new BlockCompressedInputStream(tbiFile)) {
      final TabixHeader th = TabixHeader.readHeader(bcis);
      sequenceNames[i] = th.getSequenceNamesUnpacked();
      if (mergedHeader != null) {
        mergedHeader = TabixHeader.mergeHeaders(mergedHeader, th);
      } else {
        mergedHeader = th;
      }
      indexesSquared[i] = loadFileIndexes(bcis, th.getNumSequences(), pointerAdjust);
    }
    pointerAdjust += dataFileSizes.get(i);
  }
  final List<SequenceIndex> indexes = collapseIndexes(indexesSquared, sequenceNames);
  TabixIndexer.mergeChunks(indexes);
  try (BlockCompressedOutputStream fos = new BlockCompressedOutputStream(output)) {
    TabixIndexer.writeIndex(indexes, mergedHeader.getOptions(), Arrays.asList(mergedHeader.getSequenceNamesUnpacked()), fos);
  }
}
 
Example 3
Source Project: rtg-tools   Source File: TabixIndexer.java    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Creates a <code>TABIX</code> index for given BED file and saves it.
 * @throws IOException if an IO Error occurs.
 * @throws UnindexableDataException If data cannot be indexed because of properties of the data
 */
public void saveBedIndex() throws IOException, UnindexableDataException {
  int skip = 0;
  //by setting true we indicate we want to be able to read at least a portion from the start of the inputstream without affecting the main run
  mInputHandler.start(true);
  try {
    final BlockCompressedLineReader bcli = new BlockCompressedLineReader(new BlockCompressedInputStream(mInputHandler.getInputStream()));
    String line;
    while ((line = bcli.readLine()) != null) {
      if (BED_SKIP_LINES.matcher(line).matches()) {
        ++skip;
      } else {
        break;
      }
    }
  } finally {
    //resets the stream
    mInputHandler.close();
  }
  saveIndex(new BedIndexerFactory(skip));
}
 
Example 4
Source Project: rtg-tools   Source File: VcfMergeTest.java    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void checkMerge(String id, String resourcea, String resourceb, String... argsIn) throws Exception {
  try (final TestDirectory dir = new TestDirectory("vcfmerge")) {
    final File snpsA = BgzipFileHelper.bytesToBgzipFile(FileHelper.resourceToString("com/rtg/vcf/resources/" + resourcea).getBytes(), new File(dir, "fileA.vcf.gz"));
    new TabixIndexer(snpsA, TabixIndexer.indexFileName(snpsA)).saveVcfIndex();
    final File snpsB = BgzipFileHelper.bytesToBgzipFile(FileHelper.resourceToString("com/rtg/vcf/resources/" + resourceb).getBytes(), new File(dir, "fileB.vcf.gz"));
    new TabixIndexer(snpsB, TabixIndexer.indexFileName(snpsB)).saveVcfIndex();
    final File output = new File(dir, "out.vcf.gz");
    final String out = checkMainInit(Utils.append(argsIn, "-o", output.toString(), "--stats", snpsA.toString(), snpsB.toString())).out();
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(output));
    assertTrue(new File(dir, output.getName() + ".tbi").isFile());
    mNano.check("vcfmerge_out_" + id + ".vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(output)), false);
    mNano.check("vcfmerge_stats_" + id + ".txt", out);

    final File inlist = new File(dir, "infiles.txt");
    FileUtils.stringToFile(snpsA.getAbsolutePath() + StringUtils.LS + snpsB.getAbsolutePath() + StringUtils.LS, inlist);
    final File output2 = new File(dir, "out2.vcf.gz");
    checkMainInit(Utils.append(argsIn, "-o", output2.toString(), "--stats", "-I", inlist.toString()));
    mNano.check("vcfmerge_out_" + id + ".vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(output2)), false);
    mNano.check("vcfmerge_stats_" + id + ".txt", out);
  }
}
 
Example 5
public void testOptions() throws IOException {
  try (TestDirectory dir = new TestDirectory("mendelianness")) {
    final File sdf = ReaderTestUtils.getDNADir(">chr21\nacgt", dir);
    final File file1 = FileHelper.resourceToFile("com/rtg/vcf/mendelian/resources/merge.vcf", new File(dir, "merge.vcf"));
    final File inconsistent = new File(dir, "failed.vcf.gz");
    final File consistent = new File(dir, "nonfailed.vcf.gz");
    final File annot = new File(dir, "checked.vcf.gz");
    final MainResult res = MainResult.run(getCli(), "-t", sdf.getPath(), "-i", file1.getPath(), "--all-records", "--output", annot.getPath(), "--output-inconsistent", inconsistent.getPath(), "--output-consistent", consistent.getPath());
    assertEquals(res.err(), 0, res.rc());
    final String s = res.out().replaceAll("Checking: [^\n]*\n", "Checking: \n");
    mNano.check("mendelian.out.txt", s);
    mNano.check("mendelian.annotated.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(annot)));
    mNano.check("mendelian.inconsistent.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(inconsistent)));
    mNano.check("mendelian.consistent.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(consistent)));
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(inconsistent));
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(consistent));

    final MainResult res2 = MainResult.run(getCli(), "-t", sdf.getPath(), "-i", file1.getPath());
    assertEquals(0, res2.rc());
    final String s3 = res2.out().replaceAll("Checking: [^\n]*\n", "Checking: \n");
    //System.err.println(s3);
    mNano.check("mendelian2.out.txt", s3);
  }

}
 
Example 6
private void runResourceTest(String inResourceLoc, String expResourceLoc, boolean useRef, String... extrArgs) throws IOException {
  try (TestDirectory dir = new TestDirectory()) {
    final File in = FileHelper.resourceToFile(inResourceLoc, new File(dir, new File(Resources.getResource(inResourceLoc).getFile()).getName()));
    final File out = new File(dir, "out.vcf.gz");
    String[] args = {
      "-i", in.getPath(), "-o", out.getPath()
    };
    if (useRef) {
      final File sdf = ReaderTestUtils.getDNASubDir(REF, dir);
      args = Utils.append(args, "-t", sdf.getPath());
    }
    args = Utils.append(args, extrArgs);
    final String output = checkMainInitOk(args);
    mNano.check(expResourceLoc + ".txt", output, true);

    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out));

    final String o = StringUtils.grep(FileHelper.gzFileToString(out), "^[^#]").replaceAll("[\r\n]+", "\n");
    mNano.check(expResourceLoc, o, true);
  }
}
 
Example 7
public void testSam() throws Exception {
  final File dir = FileUtils.createTempDir("indexmerge", "test");
  try {
    final ArrayList<File> files = new ArrayList<>();
    final ArrayList<Long> dataFileSizes = new ArrayList<>();
    for (int i = 1; i <= 4; ++i) {
      final String samFileName = String.format(SAM_FILES, i);
      final File samFile = new File(dir, samFileName);
      final File tbiFile = new File(dir, samFileName + ".tbi");
      FileHelper.resourceToFile(String.format("%s/%s", SAM_RESOURCE, samFileName), samFile);
      FileHelper.resourceToFile(String.format("%s/%s.tbi", SAM_RESOURCE, samFileName), tbiFile);
      files.add(tbiFile);
      dataFileSizes.add(samFile.length());
    }
    final File mergedIndex = new File(dir, "merged.sam.gz.tbi");
    TabixIndexMerge.mergeTabixFiles(mergedIndex, files, dataFileSizes);
    try (InputStream fis = new BlockCompressedInputStream(new FileInputStream(mergedIndex))) {
      final String indexDebug = IndexTestUtils.tbiIndexToUniqueString(fis);
      mNano.check("merged.sam.gz.tbi.debug", indexDebug);
    }
  } finally {
    assertTrue(FileHelper.deleteAll(dir));
  }
}
 
Example 8
Source Project: rtg-tools   Source File: TabixHeaderTest.java    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void test() throws IOException {
  final TabixHeader th1;
  try (BlockCompressedInputStream is = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/tabix/resources/tabixmerge1.sam.gz.tbi"))) {
    th1 = TabixHeader.readHeader(is);
    assertEquals(4, th1.getNumSequences());
    checkOptions(th1.getOptions());
    assertTrue(Arrays.equals(new String[]{"simulatedSequence1", "simulatedSequence2", "simulatedSequence3", "simulatedSequence4"}, th1.getSequenceNamesUnpacked()));
  }
  final TabixHeader th2;
  try (BlockCompressedInputStream is2 = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/tabix/resources/tabixmerge2.sam.gz.tbi"))) {
    th2 = TabixHeader.readHeader(is2);
    assertEquals(5, th2.getNumSequences());
    checkOptions(th2.getOptions());
    assertTrue(Arrays.equals(new String[]{"simulatedSequence4", "simulatedSequence5", "simulatedSequence6", "simulatedSequence7", "simulatedSequence8"}, th2.getSequenceNamesUnpacked()));
    final TabixHeader merged = TabixHeader.mergeHeaders(th1, th2);
    assertEquals(8, merged.getNumSequences());
    checkOptions(th2.getOptions());
    assertTrue(Arrays.equals(new String[]{"simulatedSequence1", "simulatedSequence2", "simulatedSequence3", "simulatedSequence4", "simulatedSequence5", "simulatedSequence6", "simulatedSequence7", "simulatedSequence8"}, merged.getSequenceNamesUnpacked()));
  }
}
 
Example 9
public void test() throws IOException {
  final File dir = FileUtils.createTempDir("bclr", "test");
  try {
    final File sam = FileHelper.resourceToFile("com/rtg/sam/resources/readerWindow1.sam.gz", new File(dir, "readerWindow1.sam.gz"));
    try (BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(sam))) {
      final long firstSeekPos = (44947L << 16) | 22870;
      bclr.seek(firstSeekPos);
      assertEquals(firstSeekPos, bclr.getFilePointer());
      final String line = bclr.readLine();
      assertTrue(line.startsWith("857\t147\tsimulatedSequence2\t32834"));
      assertEquals(firstSeekPos, bclr.getLineFilePointer());
      assertEquals(firstSeekPos + line.length() + 1, bclr.getFilePointer());
      final String line2 = bclr.readLine();
      assertTrue(line2.startsWith("251\t99\tsimulatedSequence2\t33229"));
      assertEquals((int) '9', bclr.peek());
      final String line3 = bclr.readLine();
      assertTrue(line3.startsWith("91\t163\tsimulatedSequence2\t33238"));
      assertEquals(3, bclr.getLineNumber());
    }
  } finally {
    assertTrue(FileHelper.deleteAll(dir));
  }
}
 
Example 10
public void testLinearRead() throws IOException {
  final File dir = FileUtils.createTempDir("bclr", "test");
  try {
    final File sam = FileHelper.resourceToFile("com/rtg/sam/resources/readerWindow1.sam.gz", new File(dir, "readerWindow1.sam.gz"));
    try (BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(sam))) {
      try (BufferedReader br = new BufferedReader(new InputStreamReader(GzipUtils.createGzipInputStream(new FileInputStream(sam))))) {
        String lineA;
        String lineB;
        while (true) {
          lineA = br.readLine();
          lineB = bclr.readLine();
          if (lineA == null || lineB == null) {
            break;
          }
          assertEquals(lineA, lineB);
        }
        assertNull(lineA);
        assertNull(lineB);
      }
    }
  } finally {
    assertTrue(FileHelper.deleteAll(dir));
  }
}
 
Example 11
public void testSomeMethod() throws IOException {
  try (InputStream is = Resources.getResourceAsStream("com/rtg/sam/resources/mixed.sam.gz")) {
    try (SamPositionReader spr = new SamPositionReader(new BlockCompressedLineReader(new BlockCompressedInputStream(is)), 0)) {
      int ref = 0;
      int i = 0;
      while (spr.hasNext()) {
        spr.next();
        if (i >= ENTRIES[ref]) {
          i = 0;
          ++ref;
        }
        assertEquals(EXP_REF_NAME[ref], spr.getReferenceName());
        assertEquals(ref, spr.getReferenceId());
        assertEquals(START[ref][i], spr.getStartPosition());
        assertEquals(LENGTH[ref][i], spr.getLengthOnReference());
        assertEquals(BINS[ref], spr.getBinNum());
        assertEquals(VIRTUAL_OFFSETS[ref][i], spr.getVirtualOffset());
        assertEquals(VIRTUAL_OFFSET_ENDS[ref][i], spr.getNextVirtualOffset());
        assertTrue(spr.hasReference());
        assertTrue(spr.hasCoordinates());
        assertFalse(spr.isUnmapped());
        ++i;
      }
    }
  }
}
 
Example 12
Source Project: picard   Source File: BclReader.java    License: MIT License 6 votes vote down vote up
public int seek(final List<File> files, final TileIndex tileIndex, final int currentTile) {
    int count = 0;
    int numClustersInTile = 0;
    for (final InputStream inputStream : streams) {
        final TileIndex.TileIndexRecord tileIndexRecord = tileIndex.findTile(currentTile);
        final BclIndexReader bclIndexReader = new BclIndexReader(files.get(count));
        final long virtualFilePointer = bclIndexReader.get(tileIndexRecord.getZeroBasedTileNumber());
        if (!(inputStream instanceof BlockCompressedInputStream)) {
            throw new UnsupportedOperationException("Seeking only allowed on bzgf");
        } else {
            try {
                if (tileIndex.getNumTiles() != bclIndexReader.getNumTiles()) {
                    throw new PicardException(String.format("%s.getNumTiles(%d) != %s.getNumTiles(%d)",
                            tileIndex.getFile().getAbsolutePath(), tileIndex.getNumTiles(), bclIndexReader.getBciFile().getAbsolutePath(), bclIndexReader.getNumTiles()));
                }
                ((BlockCompressedInputStream) inputStream).seek(virtualFilePointer);
                numClustersInTile = tileIndexRecord.getNumClustersInTile();
            } catch (final IOException e) {
                throw new PicardException("Problem seeking to " + virtualFilePointer, e);
            }
        }
        count++;
    }
    return numClustersInTile;
}
 
Example 13
Source Project: picard   Source File: CheckTerminatorBlock.java    License: MIT License 6 votes vote down vote up
@Override protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    try {
        final FileTermination term = BlockCompressedInputStream.checkTermination(INPUT);
        System.err.println(term.name());
        if (term == FileTermination.DEFECTIVE) {
            return 100;
        }
        else {
            return 0;
        }
    }
    catch (IOException ioe) {
        throw new PicardException("Exception reading terminator block of file: " + INPUT.getAbsolutePath());
    }
}
 
Example 14
Source Project: cramtools   Source File: BGZF_ReferenceSequenceFile.java    License: Apache License 2.0 6 votes vote down vote up
public BGZF_ReferenceSequenceFile(File file) throws FileNotFoundException {
	if (!file.canRead())
		throw new RuntimeException("Cannot find or read fasta file: " + file.getAbsolutePath());

	File indexFile = new File(file.getAbsolutePath() + ".fai");
	if (!indexFile.canRead())
		throw new RuntimeException("Cannot find or read fasta index file: " + indexFile.getAbsolutePath());

	Scanner scanner = new Scanner(indexFile);
	int seqID = 0;
	dictionary = new SAMSequenceDictionary();
	while (scanner.hasNextLine()) {
		String line = scanner.nextLine();
		FAIDX_FastaIndexEntry entry = FAIDX_FastaIndexEntry.fromString(seqID++, line);
		index.put(entry.getName(), entry);
		dictionary.addSequence(new SAMSequenceRecord(entry.getName(), entry.getLen()));
	}
	scanner.close();

	if (index.isEmpty())
		log.warn("No entries in the index: " + indexFile.getAbsolutePath());

	is = new BlockCompressedInputStream(new SeekableFileStream(file));
}
 
Example 15
Source Project: cramtools   Source File: BGZF_FastaIndexer.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
	Params params = new Params();
	JCommander jc = new JCommander(params);
	jc.parse(args);

	for (File file : params.files) {
		log.info("Indexing file: " + file.getAbsolutePath());
		BlockCompressedInputStream bcis = new BlockCompressedInputStream(new SeekableFileStream(file));
		bcis.available();
		BGZF_FastaIndexer mli = new BGZF_FastaIndexer(bcis);

		PrintWriter writer = new PrintWriter(file.getAbsolutePath() + ".fai");

		FAIDX_FastaIndexEntry e;
		while (!writer.checkError() && (e = mli.readNext()) != null)
			writer.println(e);

		writer.close();
	}
}
 
Example 16
SamMultiRestrictingIterator(BlockCompressedInputStream stream, VirtualOffsets offsets, SequencesReader reference, SAMFileHeader header, SamReader.Type type, String label) throws IOException {
  mStream = stream;
  mOffsets = offsets;
  mHeader = header;
  mReference = reference;
  mType = type;
  mLabel = label;

  mCurrentIt = null;
  mCurrentOffset = 0;
  mCurrentTemplate = -1;

  // Set up for first region and if it has no data, skip ahead to find one that does
  populateNext(true);
}
 
Example 17
Source Project: rtg-tools   Source File: TabixHeader.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
static TabixHeader readHeader(BlockCompressedInputStream is) throws IOException {
  final byte[] fixedData = new byte[FIXED_SIZE];
  IOUtils.readFully(is, fixedData, 0, FIXED_SIZE);
  final int numberReferences = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 4);
  final int format = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 8);
  final int seqCol = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 12) - 1;
  final int begCol = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 16) - 1;
  final int endCol = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 20) - 1;
  final int meta = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 24);
  final int skip = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 28);
  final int sequenceNameLength = ByteArrayIOUtils.bytesToIntLittleEndian(fixedData, 32);
  final byte[] sequenceNames = new byte[sequenceNameLength];
  IOUtils.readFully(is, sequenceNames, 0, sequenceNameLength);
  return new TabixHeader(numberReferences, new TabixIndexer.TabixOptions(format, seqCol, begCol, endCol, meta, skip), sequenceNames);
}
 
Example 18
Source Project: rtg-tools   Source File: TabixLineReader.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
SingleRestrictionLineReader(File input, TabixIndexReader tir) throws IOException {
  mSequence = null;
  mBeg = -1;
  mEnd = -1;
  final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(input));
  mBCPositionReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions());
  mRange = new VirtualOffsets(0, 0xFFFFFFFFFFFFFFFFL, null);
}
 
Example 19
Source Project: rtg-tools   Source File: TabixLineReader.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
SingleRestrictionLineReader(File input, TabixIndexReader tir, RegionRestriction region) throws IOException {
  if (region == null) {
    throw new NullPointerException();
  }
  mSequence = region.getSequenceName();
  mBeg = region.getStart();
  mEnd = region.getEnd();
  final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(input));
  mBCPositionReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions());
  mRange = tir.getFilePointers(region);
  if (mRange != null) {
    mBCPositionReader.seek(mRange.start(0));
  }
}
 
Example 20
Source Project: rtg-tools   Source File: TabixLineReader.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
MultiRestrictionLineReader(File input, TabixIndexReader tir, ReferenceRanges<String> ranges) throws IOException {
  if (ranges == null) {
    throw new NullPointerException();
  }
  //Diagnostic.developerLog("Creating MultiRestrictionLineReader");
  final BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(new ClosedFileInputStream(input)));
  mReader = tir.getOptions().mFormat == TabixIndexer.TabixOptions.FORMAT_VCF ? new VcfPositionReader(bclr, tir.getOptions().mSkip) : new GenericPositionReader(bclr, tir.getOptions());
  final VirtualOffsets offsets = tir.getFilePointers(ranges);
  mOffsets = offsets == null ? new VirtualOffsets() : offsets;
  mSequenceLookup = tir.mSequenceLookup;
  populateNext(true);
}
 
Example 21
/**
 * @param stream create reader from given stream
 */
public BlockCompressedLineReader(BlockCompressedInputStream stream) {
  mLineNumber = 0;
  mStream = stream;
  mLineFilePointer = 0;
  mFilePointer = 0;
  mPos = 0;
  mBufferUsed = 0;
  mLineBufferUsed = 0;
  mInit = false;
}
 
Example 22
Source Project: rtg-tools   Source File: ExtractCli.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
private void extractHeader(File input, char metaChar, OutputStream out) throws IOException {
  try (BlockCompressedLineReader bclr = new BlockCompressedLineReader(new BlockCompressedInputStream(input))) {
    String line;
    while ((line = bclr.readLine()) != null && (line.length() == 0 || line.charAt(0) == metaChar)) {
      out.write((line + StringUtils.LS).getBytes());
    }
  }
}
 
Example 23
Source Project: rtg-tools   Source File: TabixIndexer.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * check if given file is block compressed
 * @param file file to check
 * @return true iff file is block compressed
 * @throws IOException if an IO error occurs
 */
public static boolean isBlockCompressed(File file) throws IOException {
  final boolean result;
  try (BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file))) {
    result = BlockCompressedInputStream.isValidFile(bis);
  }
  return result;
}
 
Example 24
Source Project: rtg-tools   Source File: BedReader.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
private BedReader(TabixLineReader reader, File bedFile, int minAnnotations) throws IOException {
  mMinAnnotations = minAnnotations;
  mIn = reader;
  try (BrLineReader headerReader = new BrLineReader(new BufferedReader(new InputStreamReader(new BlockCompressedInputStream(bedFile))))) {
    mHeader = parseHeader(headerReader);
  }
  setNext();
}
 
Example 25
Source Project: rtg-tools   Source File: VcfFilterCliTest.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
private void runResourceTest(String inResourceLoc, String expResourceLoc, String... extraArgs) throws IOException {
  try (TestDirectory dir = new TestDirectory()) {
    final File in = FileHelper.resourceToFile(inResourceLoc, new File(dir, new File(Resources.getResource(inResourceLoc).getFile()).getName()));
    final File out = new File(dir, "out.vcf.gz");
    final String output = checkMainInitOk(Utils.append(extraArgs, "-i", in.getPath(), "-o", out.getPath()));
    mNano.check(expResourceLoc + ".txt", output, true);

    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out));

    final String o = StringUtils.grep(FileHelper.gzFileToString(out), "^[^#]").replaceAll("[\r\n]+", "\n");
    mNano.check(expResourceLoc, o, true);
  }
}
 
Example 26
public void testNanoVcfIds() throws IOException {
  try (final TestDirectory dir = new TestDirectory()) {
    final File inVcf = FileUtils.stringToFile(mNano.loadReference("snpAnnotate_small.vcf"), new File(dir, "input.vcf"));
    final File idVcf = FileUtils.stringToFile(mNano.loadReference("snpAnnotate_small_ids_vcf.vcf"), new File(dir, "id.vcf"));
    final File outFile = new File(dir, "output.vcf.gz");

    final String str = checkMainInitOk("-i", inVcf.getPath(), "--vcf-ids", idVcf.getPath(), "-o", outFile.getPath(), "--fill-an-ac", "--annotation", "NAA,ZY,PD");
    assertEquals("", str);
    assertTrue(outFile.isFile());
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(outFile));
    final String actual = StringUtils.grep(FileHelper.gzFileToString(outFile), "^[^#]").replaceAll("[\r\n]+", "\n");
    mNano.check("snpAnnotate_small_vcf_ids_exp.vcf", actual, false);
  }
}
 
Example 27
Source Project: rtg-tools   Source File: VcfSubsetTest.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
public void testKeepInfoACAN() throws Exception {
  try (TestDirectory td = new TestDirectory()) {
    final File f = FileHelper.resourceToGzFile("com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz"));
    final File out = new File(td, "out.vcf.gz");

    checkMainInitOk("-i", f.getPath(), "-o", out.getPath(), "--keep-info", "AC", "--keep-info", "AN");
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out));
    mNano.check("vcfsubset-keepinfoACAN.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(out)));
  }
}
 
Example 28
Source Project: rtg-tools   Source File: VcfSubsetTest.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
public void testKeepFilter() throws Exception {
  try (TestDirectory td = new TestDirectory()) {
    final File f = FileHelper.resourceToGzFile("com/rtg/vcf/resources/vcfsubset.vcf", new File(td, "vcf.vcf.gz"));
    final File out = new File(td, "out.vcf.gz");
    checkMainInitOk("-i", f.getPath(), "-o", out.getPath(), "--keep-filter", "YEA");
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out));
    mNano.check("vcfsubset-keepfilter.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(out)));

    final File out2 = new File(td, "out2.vcf.gz");
    checkMainInitOk("-i", f.getPath(), "-o", out2.getPath(), "--keep-filter", "PASS");
    assertEquals(BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK, BlockCompressedInputStream.checkTermination(out2));
    mNano.check("vcfsubset-keepfilter-pass.vcf", TestUtils.sanitizeVcfHeader(FileHelper.gzFileToString(out2)));
  }
}
 
Example 29
private static GenericPositionReader makeGpr(String contents) throws IOException {
  final ByteArrayOutputStream baos = new ByteArrayOutputStream();
  try (final BlockCompressedOutputStream out = new BlockCompressedOutputStream(baos, (File) null)) {
    out.write(contents.getBytes());
  }
  final ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
  return new GenericPositionReader(new BlockCompressedLineReader(new BlockCompressedInputStream(bais)), new TabixIndexer.TabixOptions(TabixIndexer.TabixOptions.FORMAT_GENERIC, 0, 1, 1, '#', 0, false));
}
 
Example 30
Source Project: rtg-tools   Source File: TabixIndexerTest.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
public void test() throws Exception {
  final File index = file("index");
  final File sam = FileHelper.resourceToFile("com/rtg/sam/resources/test.sam.gz", file("test.sam.gz"));
  new TabixIndexer(sam, index).saveSamIndex();
  final String myBai = IndexTestUtils.tbiIndexToUniqueString(new BlockCompressedInputStream(lazyStream(index)));
  final String exp;
  try (InputStream baiIs = new BlockCompressedInputStream(Resources.getResourceAsStream("com/rtg/sam/resources/test.sam.gz.tbi"))) {
    exp = IndexTestUtils.tbiIndexToUniqueString(baiIs);
  }

  assertEquals(exp, myBai);
}