htsjdk.samtools.SamFiles Java Examples

The following examples show how to use htsjdk.samtools.SamFiles. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Test whether an index file exists
 * @param file supplying alignments
 * @return true if the file has an index
 */
public static boolean isIndexed(File file) {
  final SamReader.Type t = getSamType(file);
  if (t == null) {
    return false;
  } else if (t == SamReader.Type.SAM_TYPE) { // We support tabixed block-compressed SAM
    return TabixIndexer.indexFileName(file).exists();
  } else {
    return SamFiles.findIndex(file) != null;
  }
}
 
Example #2
Source File: HaplotypeBAMWriterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Path testWriteToFile
    (
        final String outputFileExtension,
        final List<Haplotype> haplotypes,
        final Locatable genomeLoc,
        final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods,
        final boolean createIndex,
        final boolean createMD5
    ) throws IOException
{
    final Path outPath = GATKBaseTest.createTempFile("haplotypeBamWriterTest", outputFileExtension).toPath();
    final SAMFileDestination fileDest = new SAMFileDestination(outPath, createIndex, createMD5, samHeader, "TestHaplotypeRG");

    try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, fileDest)) {
        haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
                haplotypes,
                genomeLoc,
                haplotypes,
                new HashSet<>(), // called haplotypes
                readLikelihoods);
    }

    Assert.assertEquals(getReadCounts(outPath), 5);

    final File expectedMD5File = new File(outPath.toFile().getAbsolutePath() + ".md5");
    Assert.assertEquals(expectedMD5File.exists(), createMD5);
    if (createIndex) {
        Assert.assertNotNull(SamFiles.findIndex(outPath));
    } else {
        Assert.assertNull(SamFiles.findIndex(outPath));
    }
    return outPath;
}
 
Example #3
Source File: BAMRecordReader.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
           throws IOException
{
	// This method should only be called once (see Hadoop API). However,
	// there seems to be disagreement between implementations that call
	// initialize() and Hadoop-BAM's own code that relies on
	// {@link BAMInputFormat} to call initialize() when the reader is
	// created. Therefore we add this check for the time being. 
	if(isInitialized)
		close();
	isInitialized = true;
	reachedEnd = false;

	final Configuration conf = ctx.getConfiguration();

	final FileVirtualSplit split = (FileVirtualSplit)spl;
	final Path             file  = split.getPath();
	final FileSystem       fs    = file.getFileSystem(conf);

	ValidationStringency stringency = SAMHeaderReader.getValidationStringency(conf);
	boolean useIntelInflater = BAMInputFormat.useIntelInflater(conf);

	java.nio.file.Path index = SamFiles.findIndex(NIOFileUtil.asPath(fs.makeQualified(file).toUri()));
	Path fileIndex = index == null ? null : new Path(index.toUri());
	SeekableStream indexStream = fileIndex == null ? null : WrapSeekable.openPath(fs, fileIndex);
	in = WrapSeekable.openPath(fs, file);
	SamReader samReader = createSamReader(in, indexStream, stringency, useIntelInflater);
	final SAMFileHeader header = samReader.getFileHeader();

	long virtualStart = split.getStartVirtualOffset();

	fileStart  = virtualStart >>> 16;
	virtualEnd = split.getEndVirtualOffset();

	SamReader.PrimitiveSamReader primitiveSamReader =
			((SamReader.PrimitiveSamReaderToSamReaderAdapter) samReader).underlyingReader();
	bamFileReader = (BAMFileReader) primitiveSamReader;

	if (logger.isDebugEnabled()) {
		final long recordStart = virtualStart & 0xffff;
		logger.debug("Initialized BAMRecordReader; byte offset: {}, record offset: {}",
			fileStart, recordStart);
	}

	if (conf.getBoolean("hadoopbam.bam.keep-paired-reads-together", false)) {
		throw new IllegalArgumentException("Property hadoopbam.bam.keep-paired-reads-together is no longer honored.");
	}

	boolean boundedTraversal = BAMInputFormat.isBoundedTraversal(conf);
	if (boundedTraversal && split.getIntervalFilePointers() != null) {
		// return reads for intervals
		List<Interval> intervals = BAMInputFormat.getIntervals(conf);
		QueryInterval[] queryIntervals = BAMInputFormat.prepareQueryIntervals(intervals, header.getSequenceDictionary());
		iterator = bamFileReader.createIndexIterator(queryIntervals, false, split.getIntervalFilePointers());
	} else if (boundedTraversal && split.getIntervalFilePointers() == null) {
		// return unmapped reads
		iterator = bamFileReader.queryUnmapped();
	} else {
		// return everything
		BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(virtualStart, virtualEnd));
		iterator = bamFileReader.getIterator(splitSpan);
	}
}
 
Example #4
Source File: HaplotypeCallerIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test(dataProvider = "outputFileVariations")
public void testOutputFileArgumentVariations(
        final boolean createBamoutIndex,
        final boolean createBamoutMD5,
        final boolean createVCFOutIndex,
        final boolean createVCFOutMD5) throws IOException {
    Utils.resetRandomGenerator();

    // run on small interval to test index/md5 outputs
    final String testInterval = "20:10000000-10001000";

    final File vcfOutput = createTempFile("testOutputFileArgumentVariations", ".vcf");
    final File bamOutput = createTempFile("testOutputFileArgumentVariations", ".bam");

    ArgumentsBuilder argBuilder = new ArgumentsBuilder();

    argBuilder.addInput(new File(NA12878_20_21_WGS_bam));
    argBuilder.addReference(new File(b37_reference_20_21));
    argBuilder.addOutput(new File(vcfOutput.getAbsolutePath()));
    argBuilder.add("L", testInterval);
    argBuilder.add(AssemblyBasedCallerArgumentCollection.BAM_OUTPUT_SHORT_NAME, bamOutput.getAbsolutePath());
    argBuilder.add("pairHMM", "AVX_LOGLESS_CACHING");
    argBuilder.add(StandardArgumentDefinitions.CREATE_OUTPUT_BAM_INDEX_LONG_NAME, createBamoutIndex);
    argBuilder.add(StandardArgumentDefinitions.CREATE_OUTPUT_BAM_MD5_LONG_NAME, createBamoutMD5);
    argBuilder.add(StandardArgumentDefinitions.CREATE_OUTPUT_VARIANT_INDEX_LONG_NAME, createVCFOutIndex);
    argBuilder.add(StandardArgumentDefinitions.CREATE_OUTPUT_VARIANT_MD5_LONG_NAME, createVCFOutMD5);

    runCommandLine(argBuilder.getArgsArray());

    Assert.assertTrue(vcfOutput.exists(), "No VCF output file was created");

    // validate vcfout companion files
    final File vcfOutFileIndex = new File(vcfOutput.getAbsolutePath() + FileExtensions.TRIBBLE_INDEX);
    final File vcfOutFileMD5 = new File(vcfOutput.getAbsolutePath() + ".md5");
    Assert.assertEquals(vcfOutFileIndex.exists(), createVCFOutIndex, "The index file argument was not honored");
    Assert.assertEquals(vcfOutFileMD5.exists(), createVCFOutMD5, "The md5 file argument was not honored");

    // validate bamout companion files
    if (createBamoutIndex) {
        Assert.assertNotNull(SamFiles.findIndex(bamOutput));
    } else {
        Assert.assertNull(SamFiles.findIndex(bamOutput));
    }

    final File expectedBamoutMD5File = new File(bamOutput.getAbsolutePath() + ".md5");
    Assert.assertEquals(expectedBamoutMD5File.exists(), createBamoutMD5);

    // Check the output BAN header contains all of the inout BAM header Program Records (@PG)
    SamAssertionUtils.assertOutBamContainsInBamProgramRecords(new File(NA12878_20_21_WGS_bam), bamOutput);
}