htsjdk.samtools.seekablestream.SeekableFileStream Java Examples

The following examples show how to use htsjdk.samtools.seekablestream.SeekableFileStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BGZF_ReferenceSequenceFile.java    From cramtools with Apache License 2.0 6 votes vote down vote up
public BGZF_ReferenceSequenceFile(File file) throws FileNotFoundException {
	if (!file.canRead())
		throw new RuntimeException("Cannot find or read fasta file: " + file.getAbsolutePath());

	File indexFile = new File(file.getAbsolutePath() + ".fai");
	if (!indexFile.canRead())
		throw new RuntimeException("Cannot find or read fasta index file: " + indexFile.getAbsolutePath());

	Scanner scanner = new Scanner(indexFile);
	int seqID = 0;
	dictionary = new SAMSequenceDictionary();
	while (scanner.hasNextLine()) {
		String line = scanner.nextLine();
		FAIDX_FastaIndexEntry entry = FAIDX_FastaIndexEntry.fromString(seqID++, line);
		index.put(entry.getName(), entry);
		dictionary.addSequence(new SAMSequenceRecord(entry.getName(), entry.getLen()));
	}
	scanner.close();

	if (index.isEmpty())
		log.warn("No entries in the index: " + indexFile.getAbsolutePath());

	is = new BlockCompressedInputStream(new SeekableFileStream(file));
}
 
Example #2
Source File: BGZF_FastaIndexer.java    From cramtools with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws IOException {
	Params params = new Params();
	JCommander jc = new JCommander(params);
	jc.parse(args);

	for (File file : params.files) {
		log.info("Indexing file: " + file.getAbsolutePath());
		BlockCompressedInputStream bcis = new BlockCompressedInputStream(new SeekableFileStream(file));
		bcis.available();
		BGZF_FastaIndexer mli = new BGZF_FastaIndexer(bcis);

		PrintWriter writer = new PrintWriter(file.getAbsolutePath() + ".fai");

		FAIDX_FastaIndexEntry e;
		while (!writer.checkError() && (e = mli.readNext()) != null)
			writer.println(e);

		writer.close();
	}
}
 
Example #3
Source File: TestVCFRoundTrip.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Test
public void testRoundTripWithMerge() throws Exception {
    Path vcfPath = new Path("file://" + testVCFFileName);

    // run a MR job to write out a VCF file
    Path outputPath = doMapReduce(vcfPath, false);

    // merge the output
    VCFHeader vcfHeader = VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new
        File(testVCFFileName)));
    final File outFile = File.createTempFile("testVCFWriter",
        testVCFFileName.substring(testVCFFileName.lastIndexOf(".")));
    outFile.deleteOnExit();
    VCFFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
        vcfHeader);
    List<VariantContext> actualVariants = new ArrayList<>();
    VCFFileReader vcfFileReaderActual = parseVcf(outFile);
    Iterators.addAll(actualVariants, vcfFileReaderActual.iterator());

    // verify the output is the same as the input
    List<VariantContext> expectedVariants = new ArrayList<>();
    VCFFileReader vcfFileReader = parseVcf(new File(testVCFFileName));
    Iterators.addAll(expectedVariants, vcfFileReader.iterator());

    // use a VariantContextComparator to check variants are equal
    VariantContextComparator vcfRecordComparator = vcfHeader.getVCFRecordComparator();
    assertEquals(expectedVariants.size(), actualVariants.size());
    for (int i = 0; i < expectedVariants.size(); i++) {
        assertEquals(0, vcfRecordComparator.compare(expectedVariants.get(i),
            actualVariants.get(i)));
    }
}
 
Example #4
Source File: CreateHadoopBamSplittingIndex.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static void createOnlySplittingIndex(final File inputBam, final File index, final long granularity) {
    assertIsBam(inputBam);
    try(SeekableStream in = new SeekableFileStream(inputBam);
        BufferedOutputStream out = new BufferedOutputStream(new FileOutputStream(index))) {
            BAMSBIIndexer.createIndex(in, out, granularity);
    } catch (final IOException e) {
        throw new UserException("Couldn't create splitting index", e);
    }
}
 
Example #5
Source File: TestVCFOutputFormat.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
private VCFHeader readHeader() throws IOException {
    String header_file = ClassLoader.getSystemClassLoader().getResource("test.vcf").getFile();
    VCFHeader header = VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new File(header_file)));
    return header;
}
 
Example #6
Source File: TestVCFRoundTrip.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
@Test
public void testRoundTrip() throws Exception {
    Path vcfPath = new Path("file://" + testVCFFileName);

    // run a MR job to write out a VCF file
    Path outputPath = doMapReduce(vcfPath, true);

    // verify the output is the same as the input
    List<VariantContext> expectedVariants = new ArrayList<>();
    VCFFileReader vcfFileReader = parseVcf(new File(testVCFFileName));
    Iterators.addAll(expectedVariants, vcfFileReader.iterator());

    int splits = 0;
    List<VariantContext> actualVariants = new ArrayList<>();
    File[] vcfFiles = new File(outputPath.toUri()).listFiles(
        pathname -> (!pathname.getName().startsWith(".") &&
            !pathname.getName().startsWith("_")));
    Arrays.sort(vcfFiles); // ensure files are sorted by name
    for (File vcf : vcfFiles) {
        splits++;
        Iterators.addAll(actualVariants, parseVcf(vcf).iterator());
        if (BGZFCodec.class.equals(codecClass)) {
            assertTrue(BlockCompressedInputStream.isValidFile(
                new BufferedInputStream(new FileInputStream(vcf))));
        } else if (BGZFEnhancedGzipCodec.class.equals(codecClass)) {
            assertTrue(VCFFormat.isGzip(
                new BufferedInputStream(new FileInputStream(vcf))));
        }
    }

    switch (expectedSplits) {
        case EXACTLY_ONE:
            assertEquals("Should be exactly one split", 1, splits);
            break;
        case MORE_THAN_ONE:
            assertTrue("Should be more than one split", splits > 1);
            break;
        case ANY:
        default:
            break;
    }

    // use a VariantContextComparator to check variants are equal
    VCFHeader vcfHeader = VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new
        File(testVCFFileName)));
    VariantContextComparator vcfRecordComparator = vcfHeader.getVCFRecordComparator();
    assertEquals(expectedVariants.size(), actualVariants.size());
    for (int i = 0; i < expectedVariants.size(); i++) {
        assertEquals(0, vcfRecordComparator.compare(expectedVariants.get(i),
            actualVariants.get(i)));
    }
}