Java Code Examples for htsjdk.samtools.util.IntervalList#add()

The following examples show how to use htsjdk.samtools.util.IntervalList#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IntervalUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Splits an interval list into multiple files.
 * @param fileHeader The sam file header.
 * @param splits Pre-divided genome locs returned by splitFixedIntervals.
 * @param scatterParts The output interval lists to write to.
 */
public static void scatterFixedIntervals(final SAMFileHeader fileHeader, final List<List<GenomeLoc>> splits, final List<File> scatterParts) {
    Utils.nonNull(fileHeader, "fileHeader is null");
    Utils.nonNull(splits, "splits is null");
    Utils.nonNull(scatterParts, "scatterParts is null");
    Utils.containsNoNull(splits, "null split loc");

    if (splits.size() != scatterParts.size()) {
        throw new CommandLineException.BadArgumentValue("splits", String.format("Split points %d does not equal the number of scatter parts %d.", splits.size(), scatterParts.size()));
    }

    int fileIndex = 0;
    int locIndex = 1;
    for (final List<GenomeLoc> split : splits) {
        final IntervalList intervalList = new IntervalList(fileHeader);
        for (final GenomeLoc loc : split) {
            intervalList.add(toInterval(loc, locIndex++));
        }
        intervalList.write(scatterParts.get(fileIndex++));
    }
}
 
Example 2
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions={UserException.MalformedFile.class, UserException.MalformedGenomeLoc.class}, dataProvider="invalidIntervalTestData")
public void testLoadIntervalsInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                              String contig, int intervalStart, int intervalEnd ) throws Exception {

    SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    // Intentionally add an extra contig not in our genomeLocParser's sequence dictionary
    // so that we can add intervals to the Picard interval file for contigs not in our dictionary
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    File picardIntervalFile = createTempFile("testLoadIntervalsInvalidPicardIntervalHandling", ".intervals");
    picardIntervals.write(picardIntervalFile);

    List<String> intervalArgs = new ArrayList<>(1);
    intervalArgs.add(picardIntervalFile.getAbsolutePath());

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.loadIntervals(intervalArgs, IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, genomeLocParser);
}
 
Example 3
Source File: BaitDesigner.java    From picard with MIT License 6 votes vote down vote up
/** Calculates a few statistics about the bait design that can then be output. */
void calculateStatistics(final IntervalList targets, final IntervalList baits) {
    this.TARGET_TERRITORY = (int) targets.getUniqueBaseCount();
    this.TARGET_COUNT = targets.size();
    this.BAIT_TERRITORY = (int) baits.getUniqueBaseCount();
    this.BAIT_COUNT = baits.size();
    this.DESIGN_EFFICIENCY = this.TARGET_TERRITORY / (double) this.BAIT_TERRITORY;

    // Figure out the intersection between all targets and all baits
    final IntervalList tmp = new IntervalList(targets.getHeader());
    final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0, 0);
    detector.addAll(baits.getIntervals(), baits.getIntervals());

    for (final Interval target : targets) {
        final Collection<Interval> overlaps = detector.getOverlaps(target);

        if (overlaps.isEmpty()) {
            this.ZERO_BAIT_TARGETS++;
        } else {
            for (final Interval i : overlaps) tmp.add(target.intersect(i));
        }
    }

    tmp.uniqued();
    this.BAIT_TARGET_TERRITORY_INTERSECTION = (int) tmp.getBaseCount();
}
 
Example 4
Source File: HetPulldownCalculatorUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider(name = "inputGetPileupBaseCount")
public Object[][] inputGetPileupBaseCount() throws IOException {
    try (final SamReader bamReader = SamReaderFactory.makeDefault().open(NORMAL_BAM_FILE)) {
        final IntervalList intervals = new IntervalList(bamReader.getFileHeader());
        intervals.add(new Interval("1", 100, 100));
        intervals.add(new Interval("1", 11000, 11000));
        intervals.add(new Interval("1", 14000, 14000));
        intervals.add(new Interval("1", 14630, 14630));

        final SamLocusIterator locusIterator = new SamLocusIterator(bamReader, intervals);

        final Nucleotide.Counter baseCounts1 = makeBaseCounts(0, 0, 0, 0);
        final Nucleotide.Counter baseCounts2 = makeBaseCounts(0, 9, 0, 0);
        final Nucleotide.Counter baseCounts3 = makeBaseCounts(12, 0, 0, 0);
        final Nucleotide.Counter baseCounts4 = makeBaseCounts(0, 0, 8, 9);

        if (!locusIterator.hasNext()) {
            throw new SAMException("Can't get locus to start iteration. Check that " + NORMAL_BAM_FILE.toString()
                    + " contains 1:0-16000.");
        }
        final SamLocusIterator.LocusInfo locus1 = locusIterator.next();
        final SamLocusIterator.LocusInfo locus2 = locusIterator.next();
        final SamLocusIterator.LocusInfo locus3 = locusIterator.next();
        final SamLocusIterator.LocusInfo locus4 = locusIterator.next();
        locusIterator.close();

        return new Object[][]{
                {locus1, baseCounts1},
                {locus2, baseCounts2},
                {locus3, baseCounts3},
                {locus4, baseCounts4}
        };
    }
}
 
Example 5
Source File: HetPulldownCalculatorUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider(name = "inputGetTumorHetPulldownMin15")
public Object[][] inputGetTumorHetPulldown15() {
    final Pulldown tumorHetPulldown = new Pulldown(normalHeader);
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));

    final IntervalList normalHetIntervals = new IntervalList(tumorHeader);
    normalHetIntervals.add(new Interval("1", 14630, 14630));
    normalHetIntervals.add(new Interval("2", 14689, 14689));

    return new Object[][]{
            {normalHetIntervals, tumorHetPulldown}
    };
}
 
Example 6
Source File: PreprocessIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static IntervalList filterBinsContainingOnlyNs(final IntervalList unfilteredBins, final ReferenceDataSource reference) {
    final IntervalList bins = new IntervalList(reference.getSequenceDictionary());
    for (final Interval unfilteredBin : unfilteredBins) {
        if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> Nucleotide.decode(b) == Nucleotide.N)) {
            bins.add(unfilteredBin);
        }
    }
    return bins;
}
 
Example 7
Source File: PreprocessIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static IntervalList generateBins(final IntervalList preparedIntervalList, final int binLength, final SAMSequenceDictionary sequenceDictionary) {
    if (binLength == 0) {
        return IntervalList.copyOf(preparedIntervalList);
    }
    final IntervalList bins = new IntervalList(sequenceDictionary);
    for (final Interval interval : preparedIntervalList) {
        for (int binStart = interval.getStart(); binStart <= interval.getEnd(); binStart += binLength) {
            final int binEnd = FastMath.min(binStart + binLength - 1, interval.getEnd());
            bins.add(new Interval(interval.getContig(), binStart, binEnd));
        }
    }
    return bins;
}
 
Example 8
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testNoVariants() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval(this.dict.getSequence(0).getSequenceName(), 1, 100));
    final VCFFileReader reader              = getReader(EMPTY_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 9
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testSimpleOverlap() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("2", 167166899, 167166899));
    final VCFFileReader reader              = getReader(CEU_TRIOS_SNPS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertTrue(iterator.hasNext());
    final VariantContext ctx = iterator.next();
    Assert.assertEquals(ctx.getStart(), 167166899);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 10
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testNoOverlapDifferentContig() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("3", 167166899, 167166899));
    final VCFFileReader reader              = getReader(CEU_TRIOS_SNPS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 11
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testSimpleEnclosing() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("12", 68921962, 68921962)); // deletion spans this
    final VCFFileReader reader              = getReader(CEU_TRIOS_INDELS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertTrue(iterator.hasNext());
    final VariantContext ctx = iterator.next();
    Assert.assertEquals(ctx.getStart(), 68921960);
    Assert.assertEquals(ctx.getEnd(), 68921966);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 12
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testVariantOverlappingMultipleIntervalsIsReturnedOnlyOnce() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("12", 68921962, 68921962)); // deletion spans this
    intervalList.add(new Interval("12", 68921964, 68921964)); // deletion spans this
    final VCFFileReader reader              = getReader(CEU_TRIOS_INDELS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertTrue(iterator.hasNext());
    final VariantContext ctx = iterator.next();
    Assert.assertEquals(ctx.getStart(), 68921960);
    Assert.assertEquals(ctx.getEnd(), 68921966);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 13
Source File: WgsMetricsTest.java    From picard with MIT License 5 votes vote down vote up
private IntervalList buildIntervalList(final int start, final int end) {
    final SAMFileHeader header = new SAMFileHeader();
    header.addSequence(new SAMSequenceRecord("CONTIG", 100000000));
    final IntervalList intervals = new IntervalList(header);
    if (0 < start) intervals.add(new Interval("CONTIG", start, end));
    return intervals;
}
 
Example 14
Source File: CollectHsMetricsTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testHsMetricsHandlesIndelsAppropriately() throws IOException {
    final SAMRecordSetBuilder withDeletions = new SAMRecordSetBuilder(true, SortOrder.coordinate);
    final SAMRecordSetBuilder withInsertions = new SAMRecordSetBuilder(true, SortOrder.coordinate);
    final IntervalList targets = new IntervalList(withDeletions.getHeader());
    final IntervalList baits   = new IntervalList(withDeletions.getHeader());
    targets.add(new Interval("chr1", 1000, 1199, false, "t1"));
    baits.add(new Interval("chr1", 950,  1049, false, "b1"));
    baits.add(new Interval("chr1", 1050, 1149, false, "b2"));
    baits.add(new Interval("chr1", 1150, 1249, false, "b3"));

    // Generate 100 reads that fully cover the the target in each BAM
    for (int i=0; i<100; ++i) {
        withDeletions.addFrag( "d" + i, 0, 1000, false, false, "100M20D80M", null, 30);
        withInsertions.addFrag("i" + i, 0, 1000, false, false, "100M50I100M", null, 30);
    }

    // Write things out to file
    final File dir = IOUtil.createTempDir("hsmetrics.", ".test");
    final File bs = new File(dir, "baits.interval_list").getAbsoluteFile();
    final File ts = new File(dir, "targets.interval_list").getAbsoluteFile();
    baits.write(bs);
    targets.write(ts);
    final File withDelBam = writeBam(withDeletions,  new File(dir, "with_del.bam"));
    final File withInsBam = writeBam(withInsertions, new File(dir, "with_ins.bam"));

    // Now run CollectHsMetrics four times
    final File out = Files.createTempFile("hsmetrics.", ".txt").toFile();
    runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath()));
    final HsMetrics delsWithoutIndelHandling = readMetrics(out);
    runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath()));
    final HsMetrics delsWithIndelHandling = readMetrics(out);
    runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath()));
    final HsMetrics insWithoutIndelHandling = readMetrics(out);
    runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath()));
    final HsMetrics insWithIndelHandling = readMetrics(out);

    IOUtil.deleteDirectoryTree(dir);

    Assert.assertEquals(delsWithoutIndelHandling.MEAN_TARGET_COVERAGE, 90.0);  // 100X over 180/200 bases due to deletion
    Assert.assertEquals(delsWithIndelHandling.MEAN_TARGET_COVERAGE, 100.0);    // 100X with counting the deletion

    Assert.assertEquals(insWithoutIndelHandling.PCT_USABLE_BASES_ON_TARGET, 200/250d); // 50/250 inserted bases are not counted as on target
    Assert.assertEquals(insWithIndelHandling.PCT_USABLE_BASES_ON_TARGET,   1.0d);      // inserted bases are counted as on target
}
 
Example 15
Source File: CreateSnpIntervalFromVcf.java    From Drop-seq with MIT License 4 votes vote down vote up
public IntervalList processData(final File vcfFile, final File sdFile, final Set<String> sample, int GQThreshold, final boolean hetSNPsOnly) {

		final VCFFileReader reader = new VCFFileReader(vcfFile, false);
		if (!VCFUtils.GQInHeader(reader)) {
			GQThreshold=-1;
			log.info("Genotype Quality [GQ] not found in header.  Disabling GQ_THRESHOLD parameter");
		}
		
		final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
		SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
		Set<String> sampleListFinal = sample;
		if (sample==null || sample.isEmpty()) {
			ArrayList<String> s = reader.getFileHeader().getSampleNamesInOrder();
			sampleListFinal=new TreeSet<String>(s);
		}

		if (sdFile != null)
			sequenceDictionary = getSequenceDictionary(sdFile);

		final ProgressLogger progress = new ProgressLogger(this.log, 500000);

		final SAMFileHeader samHeader = new SAMFileHeader();
		samHeader.setSequenceDictionary(sequenceDictionary);
		IntervalList result = new IntervalList(samHeader);

		// Go through the input, find sites we want to keep.
		final PeekableIterator<VariantContext> iterator = new PeekableIterator<>(reader.iterator());

		validateRequestedSamples (iterator, sampleListFinal);

		while (iterator.hasNext()) {
			final VariantContext site = iterator.next();
			progress.record(site.getContig(), site.getStart());
			// for now drop any filtered site.
			if (site.isFiltered())
				continue;
			// move onto the next record if the site is not a SNP or the samples aren't all heterozygous.
			if (!site.isSNP())
				continue;
			if (!sitePassesFilters(site, sampleListFinal, GQThreshold, hetSNPsOnly))
				continue;
			Interval varInt = new Interval(site.getContig(), site.getStart(),
					site.getEnd(), true, site.getID());

			// final Interval site = findHeterozygousSites(full, SAMPLE);
			result.add(varInt);

		}

		CloserUtil.close(iterator);
		CloserUtil.close(reader);
		return (result);
	}
 
Example 16
Source File: CollectRnaSeqMetricsTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testPctRibosomalBases() throws Exception {
    final String sequence = "chr1";
    final String ignoredSequence = "chrM";

    // Create some alignments that hit the ribosomal sequence, various parts of the gene, and intergenic.
    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
    // Set seed so that strandedness is consistent among runs.
    builder.setRandomSeed(0);
    final int sequenceIndex = builder.getHeader().getSequenceIndex(sequence);

    builder.addPair("rrnaPair", sequenceIndex, 400, 500, false, false, "35I1M", "35I1M", true, true, -1);
    builder.addFrag("frag1", sequenceIndex, 150, true, false, "34I2M", "*", -1);
    builder.addFrag("frag2", sequenceIndex, 190, true, false, "35I1M", "*", -1);
    builder.addFrag("ignoredFrag", builder.getHeader().getSequenceIndex(ignoredSequence), 1, false);
    final File samFile = File.createTempFile("tmp.collectRnaSeqMetrics.", ".sam");
    samFile.deleteOnExit();
    try (SAMFileWriter samWriter = new SAMFileWriterFactory().makeSAMWriter(builder.getHeader(), false, samFile)) {
        for (final SAMRecord rec : builder.getRecords()) {
            samWriter.addAlignment(rec);
        }
    }

    // Create an interval list with one ribosomal interval.
    final Interval rRnaInterval = new Interval(sequence, 300, 520, true, "rRNA");
    final IntervalList rRnaIntervalList = new IntervalList(builder.getHeader());
    rRnaIntervalList.add(rRnaInterval);
    final File rRnaIntervalsFile = File.createTempFile("tmp.rRna.", ".interval_list");
    rRnaIntervalsFile.deleteOnExit();
    rRnaIntervalList.write(rRnaIntervalsFile);

    // Generate the metrics.
    final File metricsFile = File.createTempFile("tmp.", ".rna_metrics");
    metricsFile.deleteOnExit();

    final String[] args = new String[]{
            "INPUT=" + samFile.getAbsolutePath(),
            "OUTPUT=" + metricsFile.getAbsolutePath(),
            "REF_FLAT=" + getRefFlatFile(sequence).getAbsolutePath(),
            "RIBOSOMAL_INTERVALS=" + rRnaIntervalsFile.getAbsolutePath(),
            "STRAND_SPECIFICITY=SECOND_READ_TRANSCRIPTION_STRAND",
            "IGNORE_SEQUENCE=" + ignoredSequence
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final MetricsFile<RnaSeqMetrics, Comparable<?>> output = new MetricsFile<>();
    output.read(new FileReader(metricsFile));

    final RnaSeqMetrics metrics = output.getMetrics().get(0);

    Assert.assertEquals(metrics.PCT_RIBOSOMAL_BASES, 0.4);
}
 
Example 17
Source File: LiftOverIntervalList.java    From picard with MIT License 4 votes vote down vote up
/**
 * Do the work after command line has been parsed. RuntimeException may be
 * thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsReadable(CHAIN);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (REJECT != null) IOUtil.assertFileIsWritable(REJECT);

    final LiftOver liftOver = new LiftOver(CHAIN);
    liftOver.setLiftOverMinMatch(MIN_LIFTOVER_PCT);

    final IntervalList intervalList = IntervalList.fromFile(INPUT);
    final IntervalList rejects = new IntervalList(intervalList.getHeader());

    final long baseCount = intervalList.getBaseCount();
    LOG.info("Lifting over " + intervalList.getIntervals().size() + " intervals, encompassing " +
            baseCount + " bases.");

    final SAMFileHeader toHeader = new SAMFileHeader(SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath()));
    liftOver.validateToSequences(toHeader.getSequenceDictionary());
    final IntervalList toIntervals = new IntervalList(toHeader);
    for (final Interval fromInterval : intervalList) {
        final Interval toInterval = liftOver.liftOver(fromInterval);
        if (toInterval != null) {
            toIntervals.add(toInterval);
        } else {
            rejects.add(fromInterval);
            LOG.warn("Liftover failed for ", fromInterval, " (len ", fromInterval.length(), ")");
            final List<LiftOver.PartialLiftover> partials = liftOver.diagnosticLiftover(fromInterval);
            for (final LiftOver.PartialLiftover partial : partials) {
                LOG.info(partial);
            }
        }
    }

    toIntervals.sorted().write(OUTPUT);

    if (REJECT != null) {
        rejects.write(REJECT);
    }
    final long rejectBaseCount = rejects.getBaseCount();

    LOG.info(String.format("Liftover Complete. \n" +
                    "%d of %d intervals failed (%g%%) to liftover, encompassing %d of %d bases (%g%%).",
            rejects.getIntervals().size(), intervalList.getIntervals().size(),
            100 * rejects.getIntervals().size() / (double) intervalList.getIntervals().size(),
            rejectBaseCount, baseCount,
            100 * rejectBaseCount / (double) baseCount
    ));

    return rejects.getIntervals().isEmpty() ? 0 : 1;
}
 
Example 18
Source File: IntervalListScatter.java    From picard with MIT License 4 votes vote down vote up
@Override
public IntervalList next() {
    intervalsReturned++;
    final IntervalList runningIntervalList = new IntervalList(header);

    while (!intervalQueue.isEmpty() && intervalsReturned < scatterCount ) {
        final Interval interval = intervalQueue.pollFirst();
        final long currentSize = scatterer.listWeight(runningIntervalList);

        // The mean expected size of the remaining divisions
        // Note 1: While this looks like double counting, it isn't. We subtract here the bases that are in the _current_ running intervalList,
        // and when we create a new intervalList (below) we modify weightRemaining.
        // Note 2: The -1 in the denominator is for "runningIntervalList" that isn't yet counted in accumulatedIntervalLists.size()

        final double projectedSizeOfRemainingDivisions = (weightRemaining - scatterer.listWeight(runningIntervalList)) / ((double) scatterCount - intervalsReturned);

        // split current interval into part that will go into current list (first) and
        // other part that will get put back into queue for next list.
        final List<Interval> split = scatterer.takeSome(interval, idealSplitWeight, currentSize, projectedSizeOfRemainingDivisions);
        if (split.size() != 2) {
            throw new IllegalStateException("takeSome should always return exactly 2 (possibly null) intervals.");
        }

        // push second element back to queue (if exists).
        if (split.get(1) != null) {
            intervalQueue.addFirst(split.get(1));
        }

        // if first is null, we are done with the current list, so pack it in.
        if (split.get(0) == null) {
            weightRemaining -= scatterer.listWeight(runningIntervalList);
            //add running list to return value, and create new running list
            return runningIntervalList;
        } else {
            runningIntervalList.add(split.get(0));
        }
    }
    // Flush the remaining intervals into the last list.
    while(!intervalQueue.isEmpty()){
        runningIntervalList.add(intervalQueue.pollFirst());
    }

    if (!runningIntervalList.getIntervals().isEmpty()){
        return runningIntervalList;
    }
    // you asked for a next() when hasNext() was false
    throw new NoSuchElementException();
}
 
Example 19
Source File: FilterIntervalsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@DataProvider(name = "dataCountBasedFilters")
public Object[][] dataCountBasedFilters() {
    final int numSamples = 100;
    final int numIntervals = 110;
    final int numIntervalsBelowCountThreshold = 10;
    final int numUncorruptedSamples = 10;
    final int lowCountFilterCountThreshold = 5;
    final double epsilon = 1E-1;
    final double percentageOfSamples = 100. * (numSamples - numUncorruptedSamples) / numSamples - epsilon;

    final File intervalsFile = createTempFile("intervals", ".interval_list");
    final File intervalsWithExtraIntervalFile = createTempFile("intervals-with-extra-interval", ".interval_list");
    final List<File> countFiles = new ArrayList<>(numSamples);
    for (int sampleIndex = 0; sampleIndex < numSamples; sampleIndex++) {
        final String sampleName = String.format("sample-%d", sampleIndex);
        final SampleLocatableMetadata sampleMetadata = new SimpleSampleLocatableMetadata(sampleName, SEQUENCE_DICTIONARY);
        final List<SimpleCount> counts = new ArrayList<>(numIntervals);
        for (int intervalIndex = 0; intervalIndex < numIntervals; intervalIndex++) {
            final SimpleInterval interval = new SimpleInterval("20", 10 * intervalIndex + 1, 10 * (intervalIndex + 1));
            if (intervalIndex < numIntervalsBelowCountThreshold && sampleIndex >= numUncorruptedSamples) {
                //corrupt first numIntervalsBelowCountThreshold intervals in last (numSamples - numUncorruptedSamples) samples with low counts
                counts.add(new SimpleCount(interval, lowCountFilterCountThreshold - 1));
            } else {
                counts.add(new SimpleCount(interval, intervalIndex + lowCountFilterCountThreshold));
            }
        }
        final SimpleCountCollection sampleCounts = new SimpleCountCollection(sampleMetadata, counts);
        final File countFile = createTempFile(sampleName, ".tsv");
        sampleCounts.write(countFile);
        countFiles.add(countFile);

        if (sampleIndex == 0) {
            final IntervalList intervals = new IntervalList(sampleCounts.getMetadata().getSequenceDictionary());
            sampleCounts.getIntervals().forEach(i -> intervals.add(new Interval(i)));
            intervals.write(intervalsFile);

            //test that intersection gets rid of extra intervals in the interval list
            final IntervalList intervalsWithExtraInterval = new IntervalList(sampleCounts.getMetadata().getSequenceDictionary());
            sampleCounts.getIntervals().forEach(i -> intervalsWithExtraInterval.add(new Interval(i)));
            intervalsWithExtraInterval.add(new Interval("20", 100000, 200000));
            intervalsWithExtraInterval.write(intervalsWithExtraIntervalFile);
        }
    }

    return new Object[][]{
            //intervals file, count files, lowCountFilterCountThreshold, lowCountFilterPercentageOfSamples,
            //extremeCountFilterMinimumPercentile, extremeCountFilterMaximumPercentile, extremeCountFilterPercentageOfSamples,
            //expected array of indices of retained intervals
            {intervalsFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 99., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 99).boxed().collect(Collectors.toList())},
            {intervalsFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())},
            {intervalsWithExtraIntervalFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 99., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 99).boxed().collect(Collectors.toList())},
            {intervalsWithExtraIntervalFile, countFiles, lowCountFilterCountThreshold, percentageOfSamples, 1., 90., percentageOfSamples,
                    IntStream.range(numIntervalsBelowCountThreshold + 1, numIntervalsBelowCountThreshold + 90).boxed().collect(Collectors.toList())}};
}
 
Example 20
Source File: SNPUMICellReadIteratorWrapperTest.java    From Drop-seq with MIT License 4 votes vote down vote up
@Test(enabled=true)
public void testGeneSNPSplitting () {
	List<String> cellBarcodeList = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
	IntervalList loci = IntervalList.fromFile(snpIntervals);
	// make a new SNP that is 10 bases before the first SNP.  Reads in the test data will hit both.
	Interval i1 = loci.getIntervals().iterator().next();
	Interval i2 = new Interval (i1.getContig(), i1.getStart()-10, i1.getStart()-10, true, "testSNP1");
	loci.add(i2);



	//A read that hits only the default SNP: start read at 76227020
	SAMRecord r1 = getReadByName("NS500217:67:H14GMBGXX:1:11308:22039:11268", smallBAMFile);
	Collection<SAMRecord> tempList = processOneRead(r1, loci, cellBarcodeList);
	Assert.assertTrue(tempList.size()==1);
	Assert.assertEquals(1, getNumGenes(tempList));
	Assert.assertEquals(1, getNumSNPs(tempList));

	//A read that hits both the default SNP and the new SNP: start read at 76227000
	r1 = getReadByName("NS500217:67:H14GMBGXX:1:13202:10555:15929", smallBAMFile);
	tempList = processOneRead(r1, loci, cellBarcodeList);
	Assert.assertTrue(tempList.size()==2);
	Assert.assertEquals(1, getNumGenes(tempList));
	Assert.assertEquals(2, getNumSNPs(tempList));


	//  A read that hits only the default SNP: start read at 76227020, has 2 genes.
	r1 = getReadByName("NS500217:67:H14GMBGXX:1:21206:20467:19854", smallBAMFile);
	tempList = processOneRead(r1, loci, cellBarcodeList);
	Assert.assertTrue(tempList.size()==2);
	Assert.assertEquals(2, getNumGenes(tempList));
	Assert.assertEquals(1, getNumSNPs(tempList));


	//A read that hits both the default SNP and the new SNP: start read at 76227000, has 2 genes.
	r1 = getReadByName("NS500217:67:H14GMBGXX:1:22302:3826:3320", smallBAMFile);
	tempList = processOneRead(r1, loci, cellBarcodeList);
	Assert.assertTrue(tempList.size()==4);
	Assert.assertEquals(2, getNumGenes(tempList));
	Assert.assertEquals(2, getNumSNPs(tempList));


}