Java Code Examples for htsjdk.samtools.QueryInterval

The following examples show how to use htsjdk.samtools.QueryInterval. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: hmftools   Source File: FusionFinder.java    License: GNU General Public License v3.0 6 votes vote down vote up
private List<ReadRecord> findMissingReads(final String readId, final String chromosome, int position, boolean expectPair)
{
    final QueryInterval[] queryIntervals = new QueryInterval[1];
    int chrIndex = mSamReader.getFileHeader().getSequenceIndex(chromosome);

    // build a buffer around this position to pick up a second read
    if(expectPair)
    {
        int buffer = mConfig.MaxFragmentLength;
        queryIntervals[0] = new QueryInterval(chrIndex, position - buffer, position + buffer);
    }
    else
    {
        queryIntervals[0] = new QueryInterval(chrIndex, position, position);
    }

    final List<SAMRecord> records = mBamSlicer.slice(mSamReader, queryIntervals);

    return records.stream().filter(x -> x.getReadName().equals(readId)).map(x -> ReadRecord.from(x)).collect(Collectors.toList());
}
 
Example 2
Source Project: hmftools   Source File: BamSlicer.java    License: GNU General Public License v3.0 6 votes vote down vote up
public void slice(@NotNull final SamReader samReader, final List<GenomeRegion> regions, @NotNull final Consumer<SAMRecord> consumer)
{
    mConsumerHalt = false;

    final QueryInterval[] queryIntervals = createIntervals(regions, samReader.getFileHeader());

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals))
    {
        while (!mConsumerHalt && iterator.hasNext())
        {
            final SAMRecord record = iterator.next();

            if (passesFilters(record))
            {
                consumer.accept(record);
            }
        }
    }
}
 
Example 3
Source Project: hmftools   Source File: BamSlicer.java    License: GNU General Public License v3.0 6 votes vote down vote up
public void slice(@NotNull final SamReader samReader, final QueryInterval[] queryIntervals, @NotNull final Consumer<SAMRecord> consumer)
{
    mConsumerHalt = false;

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals))
    {
        while (!mConsumerHalt && iterator.hasNext())
        {
            final SAMRecord record = iterator.next();

            if (passesFilters(record))
            {
                consumer.accept(record);
            }
        }
    }
}
 
Example 4
Source Project: hmftools   Source File: BamSlicer.java    License: GNU General Public License v3.0 6 votes vote down vote up
public List<SAMRecord> slice(@NotNull final SamReader samReader, final QueryInterval[] queryIntervals)
{
    final List<SAMRecord> records = Lists.newArrayList();

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals))
    {
        while (iterator.hasNext())
        {
            final SAMRecord record = iterator.next();

            if (passesFilters(record))
            {
                records.add(record);
            }
        }
    }

    return records;
}
 
Example 5
Source Project: hmftools   Source File: BamSlicer.java    License: GNU General Public License v3.0 6 votes vote down vote up
private static QueryInterval[] createIntervals(@NotNull final List<GenomeRegion> regions, @NotNull final SAMFileHeader header)
{
    final QueryInterval[] queryIntervals = new QueryInterval[regions.size()];

    for (int i = 0; i < regions.size(); ++i)
    {
        final GenomeRegion region = regions.get(i);
        int sequenceIndex = header.getSequenceIndex(region.chromosome());

        if (sequenceIndex < 0)
            return null;


        queryIntervals[i] = new QueryInterval(sequenceIndex, (int) region.start(), (int) region.end());
    }

    return queryIntervals;
}
 
Example 6
private static void sliceFromVCF(@NotNull CommandLine cmd) throws IOException {
    String inputPath = cmd.getOptionValue(INPUT);
    String vcfPath = cmd.getOptionValue(VCF);
    int proximity = Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500"));

    SamReaderFactory readerFactory = createFromCommandLine(cmd);
    SamReader reader = readerFactory.open(new File(inputPath));

    QueryInterval[] intervals = getIntervalsFromVCF(vcfPath, reader.getFileHeader(), proximity);
    CloseableIterator<SAMRecord> iterator = reader.queryOverlapping(intervals);
    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true)
            .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));

    writeToSlice(writer, iterator);

    writer.close();
    reader.close();
}
 
Example 7
Source Project: Hadoop-BAM   Source File: BAMInputFormat.java    License: MIT License 6 votes vote down vote up
/**
 * Converts a List of SimpleIntervals into the format required by the SamReader query API
 * @param rawIntervals SimpleIntervals to be converted
 * @return A sorted, merged list of QueryIntervals suitable for passing to the SamReader query API
 */
static QueryInterval[] prepareQueryIntervals( final List<Interval>
		rawIntervals, final SAMSequenceDictionary sequenceDictionary ) {
	if ( rawIntervals == null || rawIntervals.isEmpty() ) {
		return null;
	}

	// Convert each SimpleInterval to a QueryInterval
	final QueryInterval[] convertedIntervals =
			rawIntervals.stream()
					.map(rawInterval -> convertSimpleIntervalToQueryInterval(rawInterval, sequenceDictionary))
					.toArray(QueryInterval[]::new);

	// Intervals must be optimized (sorted and merged) in order to use the htsjdk query API
	return QueryInterval.optimizeIntervals(convertedIntervals);
}
 
Example 8
Source Project: Hadoop-BAM   Source File: BAMInputFormat.java    License: MIT License 6 votes vote down vote up
/**
 * Converts an interval in SimpleInterval format into an htsjdk QueryInterval.
 *
 * In doing so, a header lookup is performed to convert from contig name to index
 *
 * @param interval interval to convert
 * @param sequenceDictionary sequence dictionary used to perform the conversion
 * @return an equivalent interval in QueryInterval format
 */
private static QueryInterval convertSimpleIntervalToQueryInterval( final Interval interval,	final SAMSequenceDictionary sequenceDictionary ) {
	if (interval == null) {
		throw new IllegalArgumentException("interval may not be null");
	}
	if (sequenceDictionary == null) {
		throw new IllegalArgumentException("sequence dictionary may not be null");
	}

	final int contigIndex = sequenceDictionary.getSequenceIndex(interval.getContig());
	if ( contigIndex == -1 ) {
		throw new IllegalArgumentException("Contig " + interval.getContig() + " not present in reads sequence " +
				"dictionary");
	}

	return new QueryInterval(contigIndex, interval.getStart(), interval.getEnd());
}
 
Example 9
/**
 * Converts a List of SimpleIntervals into the format required by the SamReader query API
 * @param rawIntervals SimpleIntervals to be converted
 * @return A sorted, merged list of QueryIntervals suitable for passing to the SamReader query API
 */
private QueryInterval[] prepareQueryIntervals( final List<SimpleInterval> rawIntervals ) {
    if ( rawIntervals == null || rawIntervals.isEmpty() ) {
        return null;
    }

    // This might take a while with large interval lists, so log a status message
    logger.debug("Preparing intervals for traversal");

    // Convert each SimpleInterval to a QueryInterval
    final QueryInterval[] convertedIntervals =
            rawIntervals.stream()
            .map(rawInterval -> IntervalUtils.convertSimpleIntervalToQueryInterval(rawInterval, reader.getFileHeader().getSequenceDictionary()))
            .toArray(QueryInterval[]::new);

    // Intervals must be optimized (sorted and merged) in order to use the htsjdk query API
    return QueryInterval.optimizeIntervals(convertedIntervals);
}
 
Example 10
private static void sliceFromURLs(@NotNull URL indexUrl, @NotNull URL bamUrl, @NotNull CommandLine cmd) throws IOException {
    File indexFile = downloadIndex(indexUrl);
    indexFile.deleteOnExit();

    SamReader reader = createFromCommandLine(cmd).open(SamInputResource.of(bamUrl).index(indexFile));

    BAMIndex bamIndex;
    if (indexFile.getPath().contains(".crai")) {
        SeekableStream craiIndex = CRAIIndex.openCraiFileAsBaiStream(indexFile, reader.getFileHeader().getSequenceDictionary());
        bamIndex = new DiskBasedBAMFileIndex(craiIndex, reader.getFileHeader().getSequenceDictionary());
    } else {
        bamIndex = new DiskBasedBAMFileIndex(indexFile, reader.getFileHeader().getSequenceDictionary(), false);
    }

    Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan = queryIntervalsAndSpan(reader, bamIndex, cmd);
    Optional<Chunk> unmappedChunk = getUnmappedChunk(bamIndex, HttpUtils.getHeaderField(bamUrl, "Content-Length"), cmd);
    List<Chunk> sliceChunks = sliceChunks(queryIntervalsAndSpan, unmappedChunk);
    SamReader cachingReader = createCachingReader(indexFile, bamUrl, cmd, sliceChunks);

    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true)
            .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));

    queryIntervalsAndSpan.ifPresent(pair -> {
        LOGGER.info("Slicing bam on bed regions...");
        CloseableIterator<SAMRecord> bedIterator = getIterator(cachingReader, pair.getKey(), pair.getValue().toCoordinateArray());
        writeToSlice(writer, bedIterator);
        LOGGER.info("Done writing bed slices.");
    });

    unmappedChunk.ifPresent(chunk -> {
        LOGGER.info("Slicing unmapped reads...");
        CloseableIterator<SAMRecord> unmappedIterator = cachingReader.queryUnmapped();
        writeToSlice(writer, unmappedIterator);
        LOGGER.info("Done writing unmapped reads.");
    });

    reader.close();
    writer.close();
    cachingReader.close();
}
 
Example 11
@NotNull
private static Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan(@NotNull SamReader reader, @NotNull BAMIndex bamIndex,
        @NotNull CommandLine cmd) throws IOException {
    if (cmd.hasOption(BED)) {
        String bedPath = cmd.getOptionValue(BED);
        LOGGER.info("Reading query intervals from BED file: {}", bedPath);
        QueryInterval[] intervals = getIntervalsFromBED(bedPath, reader.getFileHeader());
        BAMFileSpan span = BAMFileReader.getFileSpan(intervals, bamIndex);
        return Optional.of(Pair.of(intervals, span));
    }
    return Optional.empty();
}
 
Example 12
@NotNull
private static List<Chunk> sliceChunks(@NotNull Optional<Pair<QueryInterval[], BAMFileSpan>> queryIntervalsAndSpan,
        @NotNull Optional<Chunk> unmappedChunk) {
    List<Chunk> chunks = Lists.newArrayList();
    chunks.add(HEADER_CHUNK);
    queryIntervalsAndSpan.ifPresent(pair -> {
        chunks.addAll(expandChunks(pair.getValue().getChunks()));
        LOGGER.info("Generated {} query intervals which map to {} bam chunks", pair.getKey().length, chunks.size());
    });
    unmappedChunk.ifPresent(chunks::add);
    return Chunk.optimizeChunkList(chunks, 0);
}
 
Example 13
@NotNull
private static QueryInterval[] getIntervalsFromBED(@NotNull String bedPath, @NotNull SAMFileHeader header) throws IOException {
    Slicer bedSlicer = SlicerFactory.fromBedFile(bedPath);
    List<QueryInterval> queryIntervals = Lists.newArrayList();
    for (GenomeRegion region : bedSlicer.regions()) {
        queryIntervals.add(new QueryInterval(header.getSequenceIndex(region.chromosome()), (int) region.start(), (int) region.end()));
    }
    return QueryInterval.optimizeIntervals(queryIntervals.toArray(new QueryInterval[queryIntervals.size()]));
}
 
Example 14
@NotNull
private static CloseableIterator<SAMRecord> getIterator(@NotNull SamReader reader, @NotNull QueryInterval[] intervals,
        @NotNull final long[] filePointers) {
    if (reader instanceof SamReader.PrimitiveSamReaderToSamReaderAdapter) {
        SamReader.PrimitiveSamReaderToSamReaderAdapter adapter = (SamReader.PrimitiveSamReaderToSamReaderAdapter) reader;
        if (adapter.underlyingReader() instanceof BAMFileReader) {
            BAMFileReader bamReader = (BAMFileReader) adapter.underlyingReader();
            return bamReader.createIndexIterator(intervals, false, filePointers);
        }
    }
    return reader.queryOverlapping(intervals);
}
 
Example 15
Source Project: hmftools   Source File: SAMSlicer.java    License: GNU General Public License v3.0 5 votes vote down vote up
public void slice(@NotNull final SamReader samReader, @NotNull final Consumer<SAMRecord> consumer) {
    final Set<String> processed = Sets.newHashSet();
    final QueryInterval[] queryIntervals = createIntervals(regions, samReader.getFileHeader());

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals)) {
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            if (samRecordMeetsQualityRequirements(record)) {
                if (processed.add(record.toString())) {
                    consumer.accept(record);
                }
            }
        }
    }
}
 
Example 16
Source Project: hmftools   Source File: SAMSlicer.java    License: GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static QueryInterval[] createIntervals(@NotNull final Collection<GenomeRegion> regions, @NotNull final SAMFileHeader header) {
    final List<QueryInterval> queryIntervals = Lists.newArrayList();
    for (final GenomeRegion region : regions) {
        int sequenceIndex = header.getSequenceIndex(region.chromosome());
        if (sequenceIndex > -1) {
            queryIntervals.add(new QueryInterval(sequenceIndex, (int) region.start(), (int) region.end()));
        }
    }
    return QueryInterval.optimizeIntervals(queryIntervals.toArray(new QueryInterval[queryIntervals.size()]));
}
 
Example 17
Source Project: hmftools   Source File: SamSlicer.java    License: GNU General Public License v3.0 5 votes vote down vote up
public void slice(@NotNull final SamReader samReader, @NotNull final Consumer<SAMRecord> consumer) {
    final QueryInterval[] queryIntervals = createIntervals(regions, samReader.getFileHeader());

    try (final SAMRecordIterator iterator = samReader.queryOverlapping(queryIntervals)) {
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            if (samRecordMeetsQualityRequirements(record)) {
                consumer.accept(record);
            }
        }
    }
}
 
Example 18
Source Project: hmftools   Source File: SamSlicer.java    License: GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static QueryInterval[] createIntervals(@NotNull final Collection<GenomeRegion> regions, @NotNull final SAMFileHeader header) {
    final List<QueryInterval> queryIntervals = Lists.newArrayList();
    for (final GenomeRegion region : regions) {
        int sequenceIndex = header.getSequenceIndex(region.chromosome());
        if (sequenceIndex > -1) {
            queryIntervals.add(new QueryInterval(sequenceIndex, (int) region.start(), (int) region.end()));
        }
    }
    return QueryInterval.optimizeIntervals(queryIntervals.toArray(new QueryInterval[queryIntervals.size()]));
}
 
Example 19
Source Project: hmftools   Source File: MNVValidator.java    License: GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private SAMRecordIterator queryBam(@NotNull final PotentialMNVRegion potentialMnvRegion) {
    final int referenceIndex = getReferenceIndex(potentialMnvRegion);
    final QueryInterval[] queryIntervals =
            new QueryInterval[] { new QueryInterval(referenceIndex, potentialMnvRegion.start(), potentialMnvRegion.end() - 1) };
    return tumorReader().queryOverlapping(queryIntervals);
}
 
Example 20
QueryInterval[] getQueryIntervals(ReferenceRanges<?> ranges) {
  final List<QueryInterval> intervals = new ArrayList<>();
  for (Integer seqId : ranges.sequenceIds()) {
    assert seqId != null;
    for (RangeList.RangeData<?> r : ranges.get(seqId).getRangeList()) {
      intervals.add(new QueryInterval(seqId, r.getStart(), r.getEnd()));
    }
  }
  return intervals.toArray(new QueryInterval[0]);
}
 
Example 21
Source Project: picard   Source File: CollectIndependentReplicateMetrics.java    License: MIT License 5 votes vote down vote up
private SortedMap<QueryInterval, List<Allele>> getQueryIntervalsMap(final File vcf) {

        final Map<String, Integer> contigIndexMap = new HashMap<>();
        final VCFFileReader vcfReader = new VCFFileReader(vcf, false);

        // We want to look at unfiltered SNP sites for which the sample is genotyped as a het
        // with high quality.
        final CompoundFilter compoundFilter = new CompoundFilter(true);
        compoundFilter.add(new SnpFilter());
        compoundFilter.add(new PassingVariantFilter());
        compoundFilter.add(new GenotypeQualityFilter(MINIMUM_GQ, SAMPLE));
        compoundFilter.add(new HeterozygosityFilter(true, SAMPLE));

        final Iterator<VariantContext> hetIterator = new FilteringVariantContextIterator(vcfReader.iterator(), compoundFilter);

        for (final VCFContigHeaderLine vcfContig : vcfReader.getFileHeader().getContigLines()) {
            contigIndexMap.put(vcfContig.getID(), vcfContig.getContigIndex());
        }

        // return a TreeMap since the keys are comparable, and this will use their order in the iteration
        final SortedMap<QueryInterval, List<Allele>> map = new TreeMap<>();

        while (hetIterator.hasNext()) {
            final VariantContext vc = hetIterator.next();
            map.put(new QueryInterval(contigIndexMap.get(vc.getContig()), vc.getStart(), vc.getEnd()), vc.getGenotype(SAMPLE).getAlleles());
        }

        vcfReader.close();

        return map;
    }
 
Example 22
Source Project: gatk   Source File: IntervalUtils.java    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Converts an interval in SimpleInterval format into an htsjdk QueryInterval.
 *
 * In doing so, a header lookup is performed to convert from contig name to index
 *
 * @param interval interval to convert
 * @param sequenceDictionary sequence dictionary used to perform the conversion
 * @return an equivalent interval in QueryInterval format
 */
public static QueryInterval convertSimpleIntervalToQueryInterval( final SimpleInterval interval, final SAMSequenceDictionary sequenceDictionary ) {
    Utils.nonNull(interval);
    Utils.nonNull(sequenceDictionary);

    final int contigIndex = sequenceDictionary.getSequenceIndex(interval.getContig());
    if ( contigIndex == -1 ) {
        throw new UserException("Contig " + interval.getContig() + " not present in reads sequence dictionary");
    }

    return new QueryInterval(contigIndex, interval.getStart(), interval.getEnd());
}
 
Example 23
@Test
public void testConvertSimpleIntervalToQueryInterval() {
    final SAMSequenceRecord contigRecord = new SAMSequenceRecord("1", 100);
    final SAMSequenceDictionary dictionary = new SAMSequenceDictionary(Arrays.asList(contigRecord));
    final SimpleInterval originalInterval = new SimpleInterval("1", 5, 10);

    final QueryInterval convertedInterval = IntervalUtils.convertSimpleIntervalToQueryInterval(originalInterval, dictionary);
    Assert.assertEquals(convertedInterval.referenceIndex, 0);
    Assert.assertEquals(convertedInterval.start, 5);
    Assert.assertEquals(convertedInterval.end, 10);
}
 
Example 24
Source Project: picard   Source File: CollectIndependentReplicateMetrics.java    License: MIT License 4 votes vote down vote up
private static QueryInterval queryIntervalFromSamRecord(final SAMRecord samRecord) {
    return new QueryInterval(samRecord.getReferenceIndex(), samRecord.getStart(), samRecord.getEnd());
}
 
Example 25
Source Project: Hadoop-BAM   Source File: BAMRecordReader.java    License: MIT License 4 votes vote down vote up
@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
           throws IOException
{
	// This method should only be called once (see Hadoop API). However,
	// there seems to be disagreement between implementations that call
	// initialize() and Hadoop-BAM's own code that relies on
	// {@link BAMInputFormat} to call initialize() when the reader is
	// created. Therefore we add this check for the time being. 
	if(isInitialized)
		close();
	isInitialized = true;
	reachedEnd = false;

	final Configuration conf = ctx.getConfiguration();

	final FileVirtualSplit split = (FileVirtualSplit)spl;
	final Path             file  = split.getPath();
	final FileSystem       fs    = file.getFileSystem(conf);

	ValidationStringency stringency = SAMHeaderReader.getValidationStringency(conf);
	boolean useIntelInflater = BAMInputFormat.useIntelInflater(conf);

	java.nio.file.Path index = SamFiles.findIndex(NIOFileUtil.asPath(fs.makeQualified(file).toUri()));
	Path fileIndex = index == null ? null : new Path(index.toUri());
	SeekableStream indexStream = fileIndex == null ? null : WrapSeekable.openPath(fs, fileIndex);
	in = WrapSeekable.openPath(fs, file);
	SamReader samReader = createSamReader(in, indexStream, stringency, useIntelInflater);
	final SAMFileHeader header = samReader.getFileHeader();

	long virtualStart = split.getStartVirtualOffset();

	fileStart  = virtualStart >>> 16;
	virtualEnd = split.getEndVirtualOffset();

	SamReader.PrimitiveSamReader primitiveSamReader =
			((SamReader.PrimitiveSamReaderToSamReaderAdapter) samReader).underlyingReader();
	bamFileReader = (BAMFileReader) primitiveSamReader;

	if (logger.isDebugEnabled()) {
		final long recordStart = virtualStart & 0xffff;
		logger.debug("Initialized BAMRecordReader; byte offset: {}, record offset: {}",
			fileStart, recordStart);
	}

	if (conf.getBoolean("hadoopbam.bam.keep-paired-reads-together", false)) {
		throw new IllegalArgumentException("Property hadoopbam.bam.keep-paired-reads-together is no longer honored.");
	}

	boolean boundedTraversal = BAMInputFormat.isBoundedTraversal(conf);
	if (boundedTraversal && split.getIntervalFilePointers() != null) {
		// return reads for intervals
		List<Interval> intervals = BAMInputFormat.getIntervals(conf);
		QueryInterval[] queryIntervals = BAMInputFormat.prepareQueryIntervals(intervals, header.getSequenceDictionary());
		iterator = bamFileReader.createIndexIterator(queryIntervals, false, split.getIntervalFilePointers());
	} else if (boundedTraversal && split.getIntervalFilePointers() == null) {
		// return unmapped reads
		iterator = bamFileReader.queryUnmapped();
	} else {
		// return everything
		BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(virtualStart, virtualEnd));
		iterator = bamFileReader.getIterator(splitSpan);
	}
}
 
Example 26
Source Project: picard   Source File: CollectIndependentReplicateMetrics.java    License: MIT License 2 votes vote down vote up
/**
 * a small utility to inform if one interval is cleanly before another, meaning that they do not overlap and
 * the first is prior (in genomic order) to the second
 *
 * @param lhs the "first" {@link QueryInterval}
 * @param rhs the "second" {@link QueryInterval}
 * @return true if the to intervals do not intersect _and_ the first is prior to the second in genomic order
 */
private static boolean isCleanlyBefore(final QueryInterval lhs, final QueryInterval rhs) {
    return !lhs.overlaps(rhs) && lhs.compareTo(rhs) < 0;
}