htsjdk.samtools.util.Locatable Java Examples

The following examples show how to use htsjdk.samtools.util.Locatable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Tests the sorting as well when using combining intervals
 */
@Test(dataProvider = "combineIntervalsTesting")
public void testCombineIntervalsWithShuffling(List<Locatable> locatables1, List<Locatable> locatables2, List<Locatable> gtOutput) {

    if (locatables1 != null) {
        Collections.shuffle(locatables1, new Random(4040));
    }
    if (locatables2 != null) {
        Collections.shuffle(locatables2, new Random(4040));
    }

    final SAMSequenceDictionary dictionary = getSamSequenceDictionaryForCombineIntervalTests();

    final List<Locatable> outputs = IntervalUtils.combineAndSortBreakpoints(locatables1, locatables2, dictionary);
    Assert.assertEquals(outputs.size(), gtOutput.size());
    Assert.assertEquals(outputs, gtOutput);
}
 
Example #2
Source File: HaplotypeBAMWriterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "ReadsLikelikhoodData")
public void testDontWriteHaplotypes
    (
        final String haplotypeBaseSignature,
        final List<Haplotype> haplotypes,
        final Locatable genomeLoc,
        final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
    )
{
    final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature);

    try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, mockDest)) {
        haplotypeBAMWriter.setWriteHaplotypes(false);

        haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
                haplotypes,
                genomeLoc,
                haplotypes,
                new HashSet<>(), // called haplotypes
                readLikelihoods);
    }

    Assert.assertFalse(mockDest.foundBases);
    Assert.assertTrue(mockDest.readCount == 4); // 4 samples + 0 haplotypes
}
 
Example #3
Source File: SparkSharder.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static <L extends Locatable, SB extends ShardBoundary> JavaRDD<Shard<L>> shard(JavaSparkContext ctx, JavaRDD<L> locatables, Class<L> locatableClass,
                                                            SAMSequenceDictionary sequenceDictionary, JavaRDD<SB> intervals,
                                                            int maxLocatableLength, boolean useShuffle) {

    JavaRDD<ShardBoundary> paddedIntervals = intervals.map(ShardBoundary::paddedShardBoundary);
    if (useShuffle) {
        throw new UnsupportedOperationException("Shuffle not supported when sharding an RDD of intervals.");
    }
    return joinOverlapping(ctx, locatables, locatableClass, sequenceDictionary, paddedIntervals, maxLocatableLength,
            new MapFunction<Tuple2<ShardBoundary, Iterable<L>>, Shard<L>>() {
                private static final long serialVersionUID = 1L;
                @Override
                public Shard<L> call(Tuple2<ShardBoundary, Iterable<L>> value) {
                    return value._1().createShard(value._2());
                }
            });
}
 
Example #4
Source File: HMMPostProcessor.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calculates the probability that some of the targets in the range [firstTargetIndex, firstTargetIndex + segmentLength)
 * are called as a hidden state {@param call}
 *
 * @param firstTargetIndex segment start target index
 * @param segmentLength number of targets in the segment
 * @param call the called state
 * @param fbResult forward-backward result
 * @param <STATE> type of hidden state
 * @param <TARGET> type of target
 * @return a double
 */
public static <STATE, TARGET extends Locatable> double logSomeProbability(
        final int firstTargetIndex,
        final int segmentLength,
        final STATE call,
        final ForwardBackwardAlgorithm.Result<?, TARGET, STATE> fbResult) {
    /* trivial case when the segment has length 0 */
    if (segmentLength == 0) {
        return 0;
    } else {
        final Set<STATE> otherStates = new HashSet<>(fbResult.model().hiddenStates());
        otherStates.remove(call);
        final List<Set<STATE>> otherStatesConstraints = Collections.nCopies(segmentLength, otherStates);
        final double logOtherStates = fbResult.logConstrainedProbability(firstTargetIndex, otherStatesConstraints);
        return GATKProtectedMathUtils.logProbComplement(logOtherStates);
    }
}
 
Example #5
Source File: BasicValidationResult.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected BasicValidationResult createRecord(final DataLine dataLine) {
    final String contig = dataLine.get(BasicValidationResultTableColumn.CONTIG);
    final int start = dataLine.getInt(BasicValidationResultTableColumn.START);
    final int end = dataLine.getInt(BasicValidationResultTableColumn.END);
    final Allele ref = Allele.create(dataLine.get(BasicValidationResultTableColumn.REF).getBytes(), true);
    final Allele alt = Allele.create(dataLine.get(BasicValidationResultTableColumn.ALT).getBytes(), false);
    final int discoveryAltCount = dataLine.getInt(BasicValidationResultTableColumn.DISCOVERY_ALT_COVERAGE);
    final int discoveryRefCount = dataLine.getInt(BasicValidationResultTableColumn.DISCOVERY_REF_COVERAGE);
    final int validationAltCount = dataLine.getInt(BasicValidationResultTableColumn.VALIDATION_ALT_COVERAGE);
    final int validationRefCount = dataLine.getInt(BasicValidationResultTableColumn.VALIDATION_REF_COVERAGE);
    final int minValidationReadCount = dataLine.getInt(BasicValidationResultTableColumn.MIN_VAL_COUNT);
    final double power = dataLine.getDouble(BasicValidationResultTableColumn.POWER);
    final boolean isOutOfNoiseFloor = dataLine.getBoolean(BasicValidationResultTableColumn.IS_NOT_NOISE.toString());
    final boolean isEnoughValidationReads = dataLine.getBoolean(BasicValidationResultTableColumn.IS_ENOUGH_VALIDATION_COVERAGE.toString());
    final String filters = dataLine.get(BasicValidationResultTableColumn.DISCOVERY_VCF_FILTER);
    final long numAltSupportingReadsInNormal = dataLine.getLong(BasicValidationResultTableColumn.NUM_ALT_READS_IN_VALIDATION_NORMAL);

    final Locatable interval = new SimpleInterval(contig, start, end);

    return new BasicValidationResult(interval, minValidationReadCount, isEnoughValidationReads, isOutOfNoiseFloor,
            power, validationAltCount, validationRefCount, discoveryAltCount, discoveryRefCount, ref, alt,
            filters, numAltSupportingReadsInNormal);
}
 
Example #6
Source File: HMMSegmentProcessor.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calculates the posterior expectation of the mean of hidden state values on a segment
 *
 * @param firstTargetIndex first target index of the segment
 * @param segmentLength length of the segment
 * @param fbResult result of forward-backward algorithm
 * @param <STATE> type of hidden state; must implement {@link ScalarProducer}
 * @param <TARGET> type of target; must implement {@link Locatable}
 * @throws IllegalStateException if segment length is negative
 * @return segment mean
 */
public static <STATE extends ScalarProducer, TARGET extends Locatable> double calculateSegmentMean(
        final int firstTargetIndex,
        final int segmentLength,
        final ForwardBackwardAlgorithm.Result<?, TARGET, STATE> fbResult) {
    Utils.validateArg(segmentLength >= 0, "Segment length must be non-negative");
    /* trivial case when the segment has length 0 */
    if (segmentLength == 0) {
        return Double.NaN;
    } else {
        final List<STATE> hiddenStates = fbResult.model().hiddenStates();
        final double[] hiddenStateValues = hiddenStates.stream()
                .mapToDouble(STATE::getScalar)
                .toArray();
        return IntStream.range(firstTargetIndex, firstTargetIndex + segmentLength).boxed()
                .flatMapToDouble(targetIndex -> IntStream.range(0, hiddenStates.size())
                        .mapToDouble(si -> hiddenStateValues[si] *
                                FastMath.exp(fbResult.logProbability(targetIndex, hiddenStates.get(si)))))
                .sum() / segmentLength;
    }
}
 
Example #7
Source File: SparkSharder.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static <L extends Locatable, I extends Locatable, T> JavaRDD<T> joinOverlapping(JavaSparkContext ctx, JavaRDD<L> locatables, Class<L> locatableClass,
                                                                                        SAMSequenceDictionary sequenceDictionary, JavaRDD<I> intervals,
                                                                                        int maxLocatableLength, MapFunction<Tuple2<I, Iterable<L>>, T> f) {
    return joinOverlapping(ctx, locatables, locatableClass, sequenceDictionary, intervals, maxLocatableLength,
            (FlatMapFunction2<Iterator<L>, Iterator<I>, T>) (locatablesIterator, shardsIterator) -> Iterators.transform(locatablesPerShard(locatablesIterator, shardsIterator, sequenceDictionary, maxLocatableLength), new Function<Tuple2<I,Iterable<L>>, T>() {
                @Nullable
                @Override
                public T apply(@Nullable Tuple2<I, Iterable<L>> input) {
                    try {
                        return f.call(input);
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            }));
}
 
Example #8
Source File: SparkSharder.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Join an RDD of locatables with a set of intervals, and apply a function to process the locatables that overlap each interval.
 * @param ctx the Spark Context
 * @param locatables the locatables RDD, must be coordinate sorted
 * @param locatableClass the class of the locatables, must be a subclass of {@link Locatable}
 * @param sequenceDictionary the sequence dictionary to use to find contig lengths
 * @param intervals the collection of intervals to apply the function to
 * @param maxLocatableLength the maximum length of a {@link Locatable}, if any is larger than this size then an exception will be thrown
 * @param f the function to process intervals and overlapping locatables with
 * @param <L> the {@link Locatable} type
 * @param <I> the interval type
 * @param <T> the return type of <code>f</code>
 * @return
 */
private static <L extends Locatable, I extends Locatable, T> JavaRDD<T> joinOverlapping(JavaSparkContext ctx, JavaRDD<L> locatables, Class<L> locatableClass,
                                                                                        SAMSequenceDictionary sequenceDictionary, List<I> intervals,
                                                                                        int maxLocatableLength, MapFunction<Tuple2<I, Iterable<L>>, T> f) {
    return joinOverlapping(ctx, locatables, locatableClass, sequenceDictionary, intervals, maxLocatableLength,
            (FlatMapFunction2<Iterator<L>, Iterator<I>, T>) (locatablesIterator, shardsIterator) -> Iterators.transform(locatablesPerShard(locatablesIterator, shardsIterator, sequenceDictionary, maxLocatableLength), new Function<Tuple2<I,Iterable<L>>, T>() {
                @Nullable
                @Override
                public T apply(@Nullable Tuple2<I, Iterable<L>> input) {
                    try {
                        return f.call(input);
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            }));
}
 
Example #9
Source File: DepthOfCoverage.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 *
 * @param alignmentContext current alignment context
 * @param referenceContext Reference bases spanning the current locus. Will be an empty, but non-null, context object
 *                         if there is no backing source of reference data (in which case all queries on it will return
 *                         an empty array/iterator). Can request extra bases of context around the current locus
 *                         by invoking {@link ReferenceContext#setWindow} on this object before calling {@link ReferenceContext#getBases}
 * @param featureContext Features spanning the current locus. Will be an empty, but non-null, context object
 *                       if there is no backing source of Feature data (in which case all queries on it will return an
 * @param activeIntervals
 */
@Override
public void apply(AlignmentContext alignmentContext, ReferenceContext referenceContext, FeatureContext featureContext, Set<Locatable> activeIntervals) {
    // TODO evaluate consequences of supporting nonexistant references
    if (includeRefNBases || (hasReference() && BaseUtils.isRegularBase(referenceContext.getBase()))) {
        final Map<DoCOutputType.Partition, Map<String, int[]>> countsByPartition = CoverageUtils.getBaseCountsByPartition(alignmentContext, minBaseQuality, maxBaseQuality, countType, partitionTypes, getHeaderForReads());

        if (!omitDepthOutput) {
            writer.writePerLocusDepthSummary(referenceContext.getInterval(), countsByPartition, globalIdentifierMap, includeDeletions);
        }

        // Update the traversing partitioners with this locus data:
        coverageTotalsForEntireTraversal.addLocusData(countsByPartition);

        // Update all of the active intervals that we are tracking seperately with the generated counts
        for (Locatable loc : activeCoveragePartitioner.keySet()) {
            // For genes, we don't want to update the interval for non-exon bases
            if (loc.contains(alignmentContext)) {
                activeCoveragePartitioner.get(loc).addLocusData(countsByPartition);
            }
        }
    }
}
 
Example #10
Source File: HaplotypeBAMWriterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "ReadsLikelikhoodData")
public void testCalledHaplotypeWriter
    (
        final String haplotypeBaseSignature,
        final List<Haplotype> haplotypes,
        final Locatable genomeLoc,
        final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
    )
{
    final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature);

    Set<Haplotype> calledHaplotypes = new LinkedHashSet<>(1);
    calledHaplotypes.addAll(haplotypes);

    try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.CALLED_HAPLOTYPES, mockDest)) {
        haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
                haplotypes,
                genomeLoc,
                haplotypes,
                calledHaplotypes,
                readLikelihoods);
    }

    Assert.assertTrue(mockDest.foundBases);
    Assert.assertTrue(mockDest.readCount==5); // 4 samples + 1 haplotype
}
 
Example #11
Source File: SomaticGenotypingEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calculate the log likelihoods of the ref/alt het genotype for each alt allele, then subtracts
 * these from the hom ref log likelihood to get the log-odds.
 *
 * @param matrix a matrix of log likelihoods
 */
private <EVIDENCE extends Locatable> PerAlleleCollection<Double> diploidAltLogOdds(final LikelihoodMatrix<EVIDENCE, Allele> matrix) {
    final int refIndex = getRefIndex(matrix);
    final int numReads = matrix.evidenceCount();
    final double homRefLogLikelihood = new IndexRange(0, numReads).sum(r -> matrix.get(refIndex,r));

    final PerAlleleCollection<Double> result = new PerAlleleCollection<>(PerAlleleCollection.Type.ALT_ONLY);
    // hom ref likelihood for the ref allele, het likelihood for alt alleles
    IntStream.range(0, matrix.numberOfAlleles()).filter(a -> a != refIndex)
            .forEach( a -> {
                final double hetLogLikelihood = new IndexRange(0, numReads)
                        .sum(r -> NaturalLogUtils.logSumExp(matrix.get(refIndex, r), matrix.get(a,r)) + NaturalLogUtils.LOG_ONE_HALF);
                result.setAlt(matrix.getAllele(a), homRefLogLikelihood - hetLogLikelihood);
            });
    return result;
}
 
Example #12
Source File: HaplotypeBAMWriterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "ReadsLikelikhoodData")
public void testCreateSAMFromHeader(
        @SuppressWarnings("unused")  final String haplotypeBaseSignature,
        final List<Haplotype> haplotypes,
        final Locatable genomeLoc,
        final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
) throws IOException {
    final Path outputPath = GATKBaseTest.createTempFile("fromHeaderSAM", ".sam").toPath();

    try (HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, outputPath, false, false, samHeader)) {
        haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
                haplotypes,
                genomeLoc,
                haplotypes,
                new HashSet<>(), // called haplotypes
                readLikelihoods);
    }

    final File expectedFile = new File(expectedFilePath, "fromHeaderSAM.sam");
    IntegrationTestSpec.assertEqualTextFiles(outputPath.toFile(), expectedFile);
}
 
Example #13
Source File: SegmentUtils.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public static <T extends Locatable> List<T> readSegmentFile(final File segmentsFile,
                                                            final TableColumnCollection mandatoryColumns,
                                                            final Function<DataLine, T> dataLineToSegmentFunction) {
    Utils.nonNull(segmentsFile);
    IOUtils.canReadFile(segmentsFile);
    try (final TableReader<T> reader = TableUtils.reader(segmentsFile,
            (columns, formatExceptionFactory) -> {
                TableUtils.checkMandatoryColumns(columns, mandatoryColumns, formatExceptionFactory);
                //return the lambda to translate dataLines into called segments
                return dataLineToSegmentFunction;
            })) {
        return reader.stream().collect(Collectors.toList());
    } catch (final IOException | UncheckedIOException e) {
        throw new UserException.CouldNotReadInputFile(segmentsFile, e);
    }
}
 
Example #14
Source File: GenomicsDBTestUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a temporary GenomicsDB containing multiple intervals of data from a set of gvcfs
 * this database will be deleted on jvm shutdown automatically
 * @param gvcf, a GVCF to load from
 * @param intervals the list of intervals to load
 * @param mergeIntervals if true, import data over the span of all intervals
 * @return the created workspace folder containing the new GenomicsDB
 */
public static File createTempGenomicsDB(final File gvcf, final List<Locatable> intervals, final boolean mergeIntervals) {
    final File workspaceDir = BaseTest.createTempDir("genomicsDBWorkspace");

    final CommandLineProgramTester importer = GenomicsDBImport.class::getSimpleName;

    final ArgumentsBuilder args = new ArgumentsBuilder();
    args.addVCF(gvcf);

    final String workspace = new File(workspaceDir, "workspace").getAbsolutePath();
    args.add(GenomicsDBImport.WORKSPACE_ARG_LONG_NAME, workspace);
    intervals.forEach(args::addInterval);
    if (mergeIntervals) {
        args.add(GenomicsDBImport.MERGE_INPUT_INTERVALS_LONG_NAME, true);
    }
    importer.runCommandLine(args);
    return new File(workspace);
}
 
Example #15
Source File: HaplotypeBAMWriterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "ReadsLikelikhoodData")
public void testNoCalledHaplotypes
    (
        final String haplotypeBaseSignature,
        final List<Haplotype> haplotypes,
        final Locatable genomeLoc,
        final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods
    )
{
    final MockValidatingDestination mockDest = new MockValidatingDestination(haplotypeBaseSignature);

    try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.CALLED_HAPLOTYPES, mockDest)) {
        haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
                haplotypes,
                genomeLoc,
                haplotypes,
                new LinkedHashSet<>(),
                readLikelihoods);
    }

    Assert.assertTrue(mockDest.readCount == 0); // no called haplotypes, no reads
}
 
Example #16
Source File: ReferenceConfidenceVariantContextMergerUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "referenceConfidenceMergeData")
public void testReferenceConfidenceMerge(final String testID, final List<VariantContext> toMerge, final Locatable loc,
                                         final boolean returnSiteEvenIfMonomorphic, final boolean uniquifySamples, final VariantContext expectedResult) {
    ReferenceConfidenceVariantContextMerger merger = new ReferenceConfidenceVariantContextMerger();
    final VariantContext result = merger.merge(toMerge, loc, returnSiteEvenIfMonomorphic ? (byte) 'A' : null, true, uniquifySamples);
    if ( result == null ) {
        Assert.assertTrue(expectedResult == null);
        return;
    }
    Assert.assertEquals(result.getAlleles(), expectedResult.getAlleles(),testID);
    Assert.assertEquals(result.getNSamples(), expectedResult.getNSamples(),testID);
    for ( final Genotype expectedGenotype : expectedResult.getGenotypes() ) {
        Assert.assertTrue(result.hasGenotype(expectedGenotype.getSampleName()), "Missing " + expectedGenotype.getSampleName());
        VariantContextTestUtils.assertGenotypesAreEqual(result.getGenotype(expectedGenotype.getSampleName()), expectedGenotype);
    }
}
 
Example #17
Source File: FeatureDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Refill our cache from disk after a cache miss. Will prefetch Features overlapping an additional
 * queryLookaheadBases bases after the end of the provided interval, in addition to those overlapping
 * the interval itself.
 * <p>
 * Calling this has the side effect of invalidating (closing) any currently-open iteration over
 * this data source.
 *
 * @param interval the query interval that produced a cache miss
 */
private void refillQueryCache(final Locatable interval) {
    // Tribble documentation states that having multiple iterators open simultaneously over the same FeatureReader
    // results in undefined behavior
    closeOpenIterationIfNecessary();

    // Expand the end of our query by the configured number of bases, in anticipation of probable future
    // queries with slightly larger start/stop positions.
    //
    // Note that it doesn't matter if we go off the end of the contig in the process, since
    // our reader's query operation is not aware of (and does not care about) contig boundaries.
    // Note: we use addExact to blow up on overflow rather than propagate negative results downstream
    final SimpleInterval queryInterval = new SimpleInterval(interval.getContig(), interval.getStart(), Math.addExact(interval.getEnd(), queryLookaheadBases));

    // Query iterator over our reader will be immediately closed after re-populating our cache
    try (final CloseableTribbleIterator<T> queryIter = featureReader.query(queryInterval.getContig(), queryInterval.getStart(), queryInterval.getEnd())) {
        queryCache.fill(queryIter, queryInterval);
    } catch (final IOException e) {
        throw new GATKException("Error querying file " + featureInput + " over interval " + interval, e);
    }
}
 
Example #18
Source File: GenomicsDBTestUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a temporary GenomicsDB containing a single interval of data from a set of gvcfs
 * this database will be deleted on jvm shutdown automatically
 * @param gvcfs, a List of a GVCFs to load from
 * @param interval the interval to load
 * @return the created workspace folder containing the new GenomicsDB
 */
public static File createTempGenomicsDB(final List<File> gvcfs, final Locatable interval) {
    final File workspaceDir = BaseTest.createTempDir("genomicsDBWorkspace");

    final CommandLineProgramTester importer = GenomicsDBImport.class::getSimpleName;

    final ArgumentsBuilder args = new ArgumentsBuilder();
    gvcfs.forEach(args::addVCF);


    final String workspace = new File(workspaceDir, "workspace").getAbsolutePath();
    args.add(GenomicsDBImport.WORKSPACE_ARG_LONG_NAME, workspace);
    args.addInterval(interval);
    args.add(StandardArgumentDefinitions.ADD_OUTPUT_VCF_COMMANDLINE, "false");
    importer.runCommandLine(args);
    return new File(workspace);
}
 
Example #19
Source File: FeatureDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Returns a List of all Features in this data source that overlap the provided interval.
 * <p>
 * This operation is not affected by intervals provided via {@link #setIntervalsForTraversal(List)}.
 * <p>
 * Requires the backing file to have been indexed using the IndexFeatureFile tool, and to
 * be sorted in increasing order of start position for each contig.
 * <p>
 * Query results are cached to improve the performance of future queries during typical access
 * patterns. See notes to the class as a whole for a description of the caching strategy.
 * <p>
 * Calling this method potentially invalidates (closes) any other open iterator obtained
 * from this data source via a call to {@link #iterator}
 *
 * @param interval retrieve all Features overlapping this interval
 * @return a List of all Features in this data source that overlap the provided interval
 */
public List<T> queryAndPrefetch(final Locatable interval) {
    if (!supportsRandomAccess) {
        throw new UserException("Input " + featureInput.getFeaturePath() + " must support random access to enable queries by interval. " +
                "If it's a file, please index it using the bundled tool " + IndexFeatureFile.class.getSimpleName());
    }

    // If the query can be satisfied using existing cache contents, prepare for retrieval
    // by discarding all Features at the beginning of the cache that end before the start
    // of our query interval.
    if (queryCache.cacheHit(interval)) {
        queryCache.trimToNewStartPosition(interval.getStart());
    }
    // Otherwise, we have a cache miss, so go to disk to refill our cache.
    else {
        refillQueryCache(interval);
    }

    // Return the subset of our cache that overlaps our query interval
    return queryCache.getCachedFeaturesUpToStopPosition(interval.getEnd());
}
 
Example #20
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "provideDataForGetCodonChangeString")
void testGetCodonChangeString( final String refAllele,
                               final String altAllele,
                               final int alleleStart,
                               final int codingSequenceAlleleStart,
                               final int alignedCodingSequenceAlleleStart,
                               final String alignedCodingSeqRefAllele,
                               final String alignedCodingSeqAltAllele,
                               final String alignedAlternateAllele,
                               final int alignedRefAlleleStop,
                               final String contig,
                               final Strand strand,
                               final ReferenceSequence codingSequence,
                               final Locatable startCodon,
                               final String expected ) {

    final SequenceComparison seqComp = new SequenceComparison();
    seqComp.setReferenceAllele(refAllele);
    seqComp.setAlternateAllele(altAllele);
    seqComp.setAlleleStart(alleleStart);
    seqComp.setCodingSequenceAlleleStart(codingSequenceAlleleStart);
    seqComp.setAlignedCodingSequenceAlleleStart(alignedCodingSequenceAlleleStart);
    seqComp.setAlignedCodingSequenceReferenceAllele(alignedCodingSeqRefAllele);
    seqComp.setAlignedCodingSequenceAlternateAllele(alignedCodingSeqAltAllele);
    seqComp.setAlignedAlternateAllele(alignedAlternateAllele);
    seqComp.setAlignedReferenceAlleleStop(alignedRefAlleleStop);
    seqComp.setContig(contig);
    seqComp.setStrand(strand);
    seqComp.setTranscriptCodingSequence(codingSequence);

    Assert.assertEquals(FuncotatorUtils.getCodonChangeString(seqComp, startCodon), expected);
}
 
Example #21
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
Object[][] provideForCreateIntronicCDnaString() {

    final List<Locatable> exonList = new ArrayList<>();
    exonList.add( new SimpleInterval("testContig", 20, 30) );
    exonList.add( new SimpleInterval("testContig", 40, 50) );
    exonList.add( new SimpleInterval("testContig", 60, 70) );
    exonList.add( new SimpleInterval("testContig", 80, 90) );
    exonList.add( new SimpleInterval("testContig", 10000, 15000) );
    exonList.add( new SimpleInterval("testContig", 10000000, 15000000) );

    return new Object[][] {
            // No exons:
            helpProvideForCreateIntronicCDnaString(  8, Collections.emptyList(), "A", "C", null, null ),
            // Start before first exon start:
            helpProvideForCreateIntronicCDnaString(  8, exonList,"A", "T", 0, -12 ),
            // Start inside first exon:
            helpProvideForCreateIntronicCDnaString( 25, exonList,"C", "T", 0, 5 ),
            // Start after first exon end:
            helpProvideForCreateIntronicCDnaString( 31, exonList, "G", "T", 0, 1 ),
            // Start after exon start, just before a "break point":
            helpProvideForCreateIntronicCDnaString( 35, exonList, "ATG", "GCT", 0, 5),
            // Start between exons, just after a "break point":
            helpProvideForCreateIntronicCDnaString( 36, exonList, "ATG", "GCT", 1, -4),
            // Start between exons:
            helpProvideForCreateIntronicCDnaString( 500, exonList, "TTTTTTTTT", "AAAAAAAAA", 3, 410),
            // Start after last exon:
            helpProvideForCreateIntronicCDnaString( 50000000, exonList, "A", "GTG",5, 35000000 ),

    };
}
 
Example #22
Source File: LocusWalkerByIntervalUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public TestTransformedLocusWalker(Collection<Locatable> objectsToTestOverlapTo) {
    for (Locatable l : objectsToTestOverlapTo) {
        overlapBases.put(l, 0);
        wasOpened.put(l, false);
        wasClosed.put(l, false);
    }
}
 
Example #23
Source File: HaplotypeBAMWriterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private Path testWriteToFile
    (
        final String outputFileExtension,
        final List<Haplotype> haplotypes,
        final Locatable genomeLoc,
        final AlleleLikelihoods<GATKRead, Haplotype> readLikelihoods,
        final boolean createIndex,
        final boolean createMD5
    ) throws IOException
{
    final Path outPath = GATKBaseTest.createTempFile("haplotypeBamWriterTest", outputFileExtension).toPath();
    final SAMFileDestination fileDest = new SAMFileDestination(outPath, createIndex, createMD5, samHeader, "TestHaplotypeRG");

    try (final HaplotypeBAMWriter haplotypeBAMWriter = new HaplotypeBAMWriter(HaplotypeBAMWriter.WriterType.ALL_POSSIBLE_HAPLOTYPES, fileDest)) {
        haplotypeBAMWriter.writeReadsAlignedToHaplotypes(
                haplotypes,
                genomeLoc,
                haplotypes,
                new HashSet<>(), // called haplotypes
                readLikelihoods);
    }

    Assert.assertEquals(getReadCounts(outPath), 5);

    final File expectedMD5File = new File(outPath.toFile().getAbsolutePath() + ".md5");
    Assert.assertEquals(expectedMD5File.exists(), createMD5);
    if (createIndex) {
        Assert.assertNotNull(SamFiles.findIndex(outPath));
    } else {
        Assert.assertNull(SamFiles.findIndex(outPath));
    }
    return outPath;
}
 
Example #24
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider(name = "lexicographicalOrderComparatorData")
public Object[][] lexicographicalOrderComparatorData() {
    final String[] CONTIG_NAMES = new String[] {"A","AA","B","C", null};
    final int[] CONTIG_SIZES = new int[] {200,300,400,500,600};
    final int MIN_INTERVAL_SIZE = 1;
    final int MAX_INTERVAL_SIZE = 100;
    final Random rdn = new Random(1131312131);
    final int CASE_NUMBER = 100;
    final List<Object[]> result = new ArrayList<>(100);
    for (int i = 0; i < CASE_NUMBER; i++) {
        final int locatableCount = rdn.nextInt(100) + 1;
        final List<Locatable> locatables = new ArrayList<>(locatableCount);
        for (int j = 0; j < locatableCount; j++) {
            final int contigIdx = rdn.nextInt(CONTIG_NAMES.length);
            final String contig = CONTIG_NAMES[contigIdx];

            final boolean useSimpleInterval = contig == null ? false : rdn.nextBoolean();
            final int intervalSize = rdn.nextInt(MAX_INTERVAL_SIZE - MIN_INTERVAL_SIZE + 1) + MIN_INTERVAL_SIZE;
            final int start = rdn.nextInt(CONTIG_SIZES[contigIdx] - intervalSize) + 1;
            final int end = start + intervalSize - 1;
            final Locatable loc = useSimpleInterval ? new SimpleInterval(contig,start,end) : new SimpleFeature(contig,start,end);
            locatables.add(loc);
        }
        result.add(new Object[] { locatables });
    }
    return result.toArray(new Object[result.size()][]);
}
 
Example #25
Source File: GenotypeGVCFsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "getGVCFsForGenomicsDB", timeOut = 1000000)
public void assertMatchingGenotypesFromGenomicsDB(File input, File expected, Locatable interval, String reference) throws IOException {
    final File tempGenomicsDB = GenomicsDBTestUtils.createTempGenomicsDB(input, interval);
    final String genomicsDBUri = GenomicsDBTestUtils.makeGenomicsDBUri(tempGenomicsDB);
    runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, NO_EXTRA_ARGS, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes, reference);

    // The default option with GenomicsDB input uses VCFCodec for decoding, test BCFCodec explicitly
    final List<String> args = new ArrayList<String>();
    args.add("--"+GenomicsDBArgumentCollection.USE_BCF_CODEC_LONG_NAME);
    runGenotypeGVCFSAndAssertSomething(genomicsDBUri, expected, args, VariantContextTestUtils::assertVariantContextsHaveSameGenotypes, reference);
}
 
Example #26
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
Object[][] provideForGetClosestExonIndex() {

    final List<Locatable> exonList = new ArrayList<>();
    exonList.add( new SimpleInterval("testContig", 20, 30) );
    exonList.add( new SimpleInterval("testContig", 40, 50) );
    exonList.add( new SimpleInterval("testContig", 60, 70) );
    exonList.add( new SimpleInterval("testContig", 80, 90) );
    exonList.add( new SimpleInterval("testContig", 10000, 15000) );
    exonList.add( new SimpleInterval("testContig", 10000000, 15000000) );

    return new Object[][] {
            // No exons:
            {  8, Collections.emptyList(), -1 },
            // Start before first exon start:
            {  8, exonList, 0 },
            // Start inside first exon:
            { 25, exonList, 0 },
            // Start after first exon end:
            { 31, exonList, 0 },
            // Start after exon start, just before a "break point":
            { 35, exonList, 0 },
            // Start between exons, just after a "break point":
            { 36, exonList, 1 },
            // Start between exons:
            { 500, exonList, 3 },
            // Start after last exon:
            { 50000000, exonList, 5 },
    };
}
 
Example #27
Source File: VariantShard.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * getVariantShardsFromInterval calculates *all* shards overlapping location.
 * @param location the range of sites determines which shards are overlapping
 * @return All overlapping VariantShards
 */
public static List<VariantShard> getVariantShardsFromInterval(final Locatable location) {
    if (location.getContig()==null) {
        // don't feed me unmapped reads!
        throw new GATKException("getVariantShardsFromInterval requires locations to be mapped");
    }
    List<VariantShard> shardList = new ArrayList<>();
    // Get all of the shard numbers that span the start and end of the interval.
    int startShard = location.getStart()/ VARIANT_SHARDSIZE;
    int endShard = location.getEnd()/ VARIANT_SHARDSIZE;
    for (int i = startShard; i <= endShard; ++i) {
        shardList.add(new VariantShard(i, location.getContig()));
    }
    return shardList;
}
 
Example #28
Source File: AlleleFractionSegmentedData.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
AlleleFractionSegmentedData(final AllelicCountCollection allelicCounts,
                            final SimpleIntervalCollection segments) {
    this.allelicCounts = Utils.nonNull(allelicCounts);
    this.segments = Utils.nonNull(segments);

    final List<IndexedAllelicCount> indexedAllelicCounts = new ArrayList<>(allelicCounts.size());
    final List<IndexRange> indexRangesPerSegment = new ArrayList<>(segments.size());

    final OverlapDetector<AllelicCount> allelicCountOverlapDetector = allelicCounts.getOverlapDetector();
    final Comparator<Locatable> comparator = allelicCounts.getComparator();
    int startIndex = 0;
    for (int segmentIndex = 0; segmentIndex < segments.size(); segmentIndex++) {
        final SimpleInterval segment = segments.getRecords().get(segmentIndex);
        final List<AllelicCount> allelicCountsInSegment = allelicCountOverlapDetector.getOverlaps(segment).stream()
                .sorted(comparator)
                .collect(Collectors.toList());
        final int segmentStartIndex = startIndex;
        final int si = segmentIndex;
        IntStream.range(0, allelicCountsInSegment.size()).boxed()
                .map(i -> new IndexedAllelicCount(allelicCountsInSegment.get(i), segmentStartIndex + i, si))
                .forEach(indexedAllelicCounts::add);
        indexRangesPerSegment.add(new IndexRange(segmentStartIndex, segmentStartIndex + allelicCountsInSegment.size()));
        startIndex += allelicCountsInSegment.size();
    }

    this.indexedAllelicCounts = Collections.unmodifiableList(indexedAllelicCounts);
    this.indexRangesPerSegment = Collections.unmodifiableList(indexRangesPerSegment);
}
 
Example #29
Source File: BAQUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test //regression test for https://github.com/broadinstitute/gatk/issues/1234
public void testGetReferenceWindowForReadStopsAtContigStart() throws Exception {
    final int bandwidth = 7;
    GATKRead read = ArtificialReadUtils.createArtificialRead("10M");

    final int start = 2;
    final Locatable pos = new SimpleInterval("1", start, start + read.getLength());
    read.setPosition(pos);
    final SimpleInterval referenceWindowForRead = BAQ.getReferenceWindowForRead(read, bandwidth);
    SimpleInterval refWindow = new SimpleInterval("1", 1, read.getEnd() + (bandwidth / 2));  //start is at 1 because we hit the front end of the reference
    Assert.assertEquals(referenceWindowForRead, refWindow);
}
 
Example #30
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(expectedExceptions = UserException.BadInput.class)
public void testCombineIntervalsErrorOverlapping2() {
    final List<Locatable> input1_1 = new ArrayList<>();
    final List<Locatable> input1_2 = new ArrayList<>();
    input1_1.add(new SimpleInterval("1", 1000, 2000));
    input1_2.add(new SimpleInterval("1", 500, 2500));
    input1_2.add(new SimpleInterval("1", 2501, 3000));
    input1_2.add(new SimpleInterval("1", 4000, 5000));
    input1_2.add(new SimpleInterval("1", 4000, 5000));
    IntervalUtils.combineAndSortBreakpoints(input1_1, input1_2, getSamSequenceDictionaryForCombineIntervalTests());
}