htsjdk.samtools.util.IntervalList Java Examples

The following examples show how to use htsjdk.samtools.util.IntervalList. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GenomicsDBImportIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void createAndCheckIntervalListFromExistingWorkspace(final String workspace, final Boolean singleInterval) {
    final ArgumentsBuilder args = new ArgumentsBuilder();
    final String outputIntervalList = workspace + "interval_output";
    args.add(GenomicsDBImport.INCREMENTAL_WORKSPACE_ARG_LONG_NAME, workspace);
    args.add(GenomicsDBImport.INTERVAL_LIST_LONG_NAME, outputIntervalList);

    runCommandLine(args);

    String expectedOutput;
    if (singleInterval) {
        expectedOutput = INTERVAL_PICARD_STYLE_EXPECTED;
    } else {
        expectedOutput = MULTIPLE_NON_ADJACENT_INTERVALS_THAT_WORK_WITH_COMBINE_GVCFS_PICARD_STYLE_EXPECTED;
    }
    final IntervalList generatedInterval = IntervalList.fromFile(new File(outputIntervalList));
    final IntervalList expectedInterval = IntervalList.fromFile(new File(expectedOutput));
    Assert.assertTrue(generatedInterval.sorted().equals(expectedInterval.sorted()));
}
 
Example #2
Source File: FilterIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Object doWork() {
    validateArguments();

    resolveAndValidateIntervals();

    final SimpleIntervalCollection filteredIntervals = filterIntervals();
    logger.info(String.format("Writing filtered intervals to %s...", outputFilteredIntervalsFile.getAbsolutePath()));
    final IntervalList filteredIntervalList = new IntervalList(filteredIntervals.getMetadata().getSequenceDictionary());
    filteredIntervals.getIntervals().forEach(i -> filteredIntervalList.add(new Interval(i)));
    filteredIntervalList.write(outputFilteredIntervalsFile);

    logger.info(String.format("%s complete.", getClass().getSimpleName()));

    return null;
}
 
Example #3
Source File: TargetMetricsCollector.java    From picard with MIT License 6 votes vote down vote up
public TargetMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels,
                              final List<SAMReadGroupRecord> samRgRecords,
                              final ReferenceSequenceFile refFile,
                              final File perTargetCoverage,
                              final File perBaseCoverage,
                              final IntervalList targetIntervals,
                              final IntervalList probeIntervals,
                              final String probeSetName,
                              final int nearProbeDistance,
                              final int minimumMappingQuality,
                              final int minimumBaseQuality,
                              final boolean clipOverlappingReads,
                              final int coverageCap,
                              final int sampleSize) {
    this(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, false, false, coverageCap, sampleSize);
}
 
Example #4
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions={UserException.CouldNotReadInputFile.class, UserException.MalformedGenomeLoc.class, UserException.MalformedFile.class}, dataProvider="invalidIntervalTestData")
public void testIntervalFileToListInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                   String contig, int intervalStart, int intervalEnd ) throws Exception {

    final SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    final IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    final File picardIntervalFile = createTempFile("testIntervalFileToListInvalidPicardIntervalHandling", ".interval_list");
    picardIntervals.write(picardIntervalFile);

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.parseIntervalArguments(genomeLocParser, picardIntervalFile.getAbsolutePath());
}
 
Example #5
Source File: IntervalListTools.java    From picard with MIT License 6 votes vote down vote up
/**
 * Method to scatter an interval list by locus.
 *
 * @param list The list of intervals to scatter
 * @return The scattered intervals, represented as a {@link List} of {@link IntervalList}
 */
private  ScatterSummary writeScatterIntervals(final IntervalList list) {
    final IntervalListScatterer scatterer = SUBDIVISION_MODE.make();
    final IntervalListScatter scatter = new IntervalListScatter(scatterer, list, SCATTER_COUNT);

    final DecimalFormat fileNameFormatter = new DecimalFormat("0000");
    int fileIndex = 1;
    final ScatterSummary summary = new ScatterSummary();
    for (final IntervalList intervals : scatter) {
        summary.size++;
        summary.baseCount += intervals.getBaseCount();
        summary.intervalCount += intervals.getIntervals().size();
        intervals.write(createDirectoryAndGetScatterFile(OUTPUT, SCATTER_COUNT, fileNameFormatter.format(fileIndex++)));
    }

    return summary;
}
 
Example #6
Source File: BaitDesigner.java    From picard with MIT License 6 votes vote down vote up
/** Calculates a few statistics about the bait design that can then be output. */
void calculateStatistics(final IntervalList targets, final IntervalList baits) {
    this.TARGET_TERRITORY = (int) targets.getUniqueBaseCount();
    this.TARGET_COUNT = targets.size();
    this.BAIT_TERRITORY = (int) baits.getUniqueBaseCount();
    this.BAIT_COUNT = baits.size();
    this.DESIGN_EFFICIENCY = this.TARGET_TERRITORY / (double) this.BAIT_TERRITORY;

    // Figure out the intersection between all targets and all baits
    final IntervalList tmp = new IntervalList(targets.getHeader());
    final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0, 0);
    detector.addAll(baits.getIntervals(), baits.getIntervals());

    for (final Interval target : targets) {
        final Collection<Interval> overlaps = detector.getOverlaps(target);

        if (overlaps.isEmpty()) {
            this.ZERO_BAIT_TARGETS++;
        } else {
            for (final Interval i : overlaps) tmp.add(target.intersect(i));
        }
    }

    tmp.uniqued();
    this.BAIT_TARGET_TERRITORY_INTERSECTION = (int) tmp.getBaseCount();
}
 
Example #7
Source File: MultiCellDigitalAlleleCountsTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void getMetaAnalysis() {
	List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
	IntervalList snpIntervals = IntervalList.fromFile(snpIntervalsFile);
	int baseQualityThreshold=10;

	SNPUMIBasePileupIterator sbpi = new SNPUMIBasePileupIterator(
			smallBAMFile, snpIntervals, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG,
			LOCUS_FUNCTION_LIST, STRAND_STRATEGY, cellBarcodeTag,
			molBCTag, snpTag, functionTag, readMQ, assignReadsToAllGenes, cellBarcodes);

	MultiCellDigitalAlleleCountsIterator multiIter = new MultiCellDigitalAlleleCountsIterator(new DigitalAlleleCountsIterator(sbpi, baseQualityThreshold));
	MultiCellDigitalAlleleCounts r = multiIter.next();

	DigitalAlleleCounts c = r.getMetaAnalysis();
	c.toString();
	Assert.assertNotNull(c);
}
 
Example #8
Source File: MaskReferenceSequence.java    From Drop-seq with MIT License 6 votes vote down vote up
private void processByPartialContig (final ReferenceSequenceFile ref, final FastaSequenceFileWriter writer, final File intervalListFile) {
	SAMSequenceDictionary sd = ref.getSequenceDictionary();
	// validate that the intervals and the reference have the same sequence dictionary.
	IntervalList iList = IntervalList.fromFile(intervalListFile);
	iList.getHeader().getSequenceDictionary().assertSameDictionary(sd);
	// map the intervals to a map to each contig.
	Map<String, List<Interval>> intervalsPerContig = getIntervalsForContig(iList);

	for (SAMSequenceRecord r: sd.getSequences()) {
		String contig = r.getSequenceName();
		log.info("Processing partial contig " + contig);
		// this list can be null.
		List<Interval> intervalsToMask = intervalsPerContig.get(contig);
		ReferenceSequence rs = ref.getSequence(contig);
		writeSequence(rs, intervalsToMask, writer);
	}
}
 
Example #9
Source File: HetPulldownCalculatorUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider(name = "inputGetTumorHetPulldown")
public Object[][] inputGetTumorHetPulldown() {
    final Pulldown tumorHetPulldown = new Pulldown(normalHeader);
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 11522, 11522), 7, 4));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 12098, 12098), 8, 6));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
    tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14982, 14982), 6, 5));

    final IntervalList normalHetIntervals = new IntervalList(tumorHeader);
    normalHetIntervals.add(new Interval("1", 11522, 11522));
    normalHetIntervals.add(new Interval("1", 12098, 12098));
    normalHetIntervals.add(new Interval("1", 14630, 14630));
    normalHetIntervals.add(new Interval("2", 14689, 14689));
    normalHetIntervals.add(new Interval("2", 14982, 14982));

    return new Object[][]{
            {normalHetIntervals, tumorHetPulldown}
    };
}
 
Example #10
Source File: IntervalUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Splits an interval list into multiple files.
 * @param fileHeader The sam file header.
 * @param splits Pre-divided genome locs returned by splitFixedIntervals.
 * @param scatterParts The output interval lists to write to.
 */
public static void scatterFixedIntervals(final SAMFileHeader fileHeader, final List<List<GenomeLoc>> splits, final List<File> scatterParts) {
    Utils.nonNull(fileHeader, "fileHeader is null");
    Utils.nonNull(splits, "splits is null");
    Utils.nonNull(scatterParts, "scatterParts is null");
    Utils.containsNoNull(splits, "null split loc");

    if (splits.size() != scatterParts.size()) {
        throw new CommandLineException.BadArgumentValue("splits", String.format("Split points %d does not equal the number of scatter parts %d.", splits.size(), scatterParts.size()));
    }

    int fileIndex = 0;
    int locIndex = 1;
    for (final List<GenomeLoc> split : splits) {
        final IntervalList intervalList = new IntervalList(fileHeader);
        for (final GenomeLoc loc : split) {
            intervalList.add(toInterval(loc, locIndex++));
        }
        intervalList.write(scatterParts.get(fileIndex++));
    }
}
 
Example #11
Source File: HsMetricCollector.java    From picard with MIT License 6 votes vote down vote up
public HsMetricCollector(final Set<MetricAccumulationLevel> accumulationLevels,
                         final List<SAMReadGroupRecord> samRgRecords,
                         final ReferenceSequenceFile refFile,
                         final File perTargetCoverage,
                         final File perBaseCoverage,
                         final IntervalList targetIntervals,
                         final IntervalList probeIntervals,
                         final String probeSetName,
                         final int nearProbeDistance,
                         final int minimumMappingQuality,
                         final int minimumBaseQuality,
                         final boolean clipOverlappingReads,
                         final boolean noSideEffects,
                         final boolean includeIndels,
                         final int coverageCap,
                         final int sampleSize) {
    super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, noSideEffects, includeIndels, coverageCap, sampleSize);
}
 
Example #12
Source File: DbSnpBitSetUtil.java    From picard with MIT License 6 votes vote down vote up
/** Factory method to create both a SNP bitmask and an indel bitmask in a single pass of the VCF.
 * If intervals are given, consider only SNP and indel sites that overlap the intervals.  If log is given,
 * progress loading the variants will be written to the log. */
public static DbSnpBitSets createSnpAndIndelBitSets(final File dbSnpFile,
                                                    final SAMSequenceDictionary sequenceDictionary,
                                                    final IntervalList intervals,
                                                    final Optional<Log> log) {

    final DbSnpBitSets sets = new DbSnpBitSets();
    sets.snps   = new DbSnpBitSetUtil();
    sets.indels = new DbSnpBitSetUtil();

    final Map<DbSnpBitSetUtil, Set<VariantType>> map = new HashMap<>();
    map.put(sets.snps,   EnumSet.of(VariantType.SNP));
    map.put(sets.indels, EnumSet.of(VariantType.insertion, VariantType.deletion));
    loadVcf(dbSnpFile, sequenceDictionary, map, intervals, log);
    return sets;
}
 
Example #13
Source File: VcfToIntervalListTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider="VcfToIntervalListData")
public void testExcludingFiltered(
        final File inputFile,
        final boolean includeFiltered,
        final int expectedIntervalsSize) throws IOException
{
    final File outputFile = File.createTempFile("vcftointervallist_", ".interval_list");
    outputFile.deleteOnExit();
    final List<String> arguments = new ArrayList<>();
    arguments.add("I=" + inputFile.getAbsolutePath());
    arguments.add("O=" + outputFile.getAbsolutePath());
    if (includeFiltered) {
        arguments.add(VcfToIntervalList.INCLUDE_FILTERED_SHORT_NAME + "=true");
    }
    runPicardCommandLine(arguments);

    Assert.assertTrue(outputFile.exists());

    final List<Interval> intervals = IntervalList.fromFile(outputFile).getIntervals();

    Assert.assertEquals(intervals.size(), expectedIntervalsSize);
}
 
Example #14
Source File: DigitalAlleleCountsIteratorTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testGatherDACs() {
	List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
	IntervalList snpIntervals = IntervalList.fromFile(snpIntervalsFile);
	int baseQualityThreshold=10;

	SNPUMIBasePileupIterator sbpi = new SNPUMIBasePileupIterator(
			smallBAMFile, snpIntervals, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG,
			LOCUS_FUNCTION_LIST,STRAND_STRATEGY, cellBarcodeTag, molBCTag, snpTag,
			GeneFunctionCommandLineBase.DEFAULT_FUNCTION_TAG, readMQ, assignReadsToAllGenes,
			cellBarcodes);

	DigitalAlleleCountsIterator daci = new DigitalAlleleCountsIterator(sbpi, baseQualityThreshold);
	int counter=0;
	while (daci.hasNext()) {
		DigitalAlleleCounts dac = daci.next();
		checkAnswerBQ10(dac);
		counter++;
	}
	// assert that you saw 2 dacs, for the 2 cells.
	Assert.assertEquals(2, counter);
	daci.close();
}
 
Example #15
Source File: CollectSamErrorMetrics.java    From picard with MIT License 6 votes vote down vote up
/**
 * Interprets intervals from the input INTERVALS, if there's a file with that name, it opens the file, otherwise
 * it tries to parse it, checks that their dictionaries all agree with the input SequenceDictionary and returns the
 * intersection of all the lists.
 *
 * @param sequenceDictionary a {@link SAMSequenceDictionary} to parse the intervals against.
 * @return the intersection of the interval lists pointed to by the input parameter INTERVALS
 */
private IntervalList getIntervals(final SAMSequenceDictionary sequenceDictionary) {

    IntervalList regionOfInterest = null;

    for (final File intervalListFile : INTERVALS) {

        if (!intervalListFile.exists()) {
            throw new IllegalArgumentException("Input file " + intervalListFile + " doesn't seem to exist. ");
        }

        log.info("Reading IntervalList ", intervalListFile, ".");
        final IntervalList temp = IntervalList.fromFile(intervalListFile);
        sequenceDictionary.assertSameDictionary(temp.getHeader().getSequenceDictionary());

        if (regionOfInterest == null) {
            regionOfInterest = temp;
        } else {
            log.info("Intersecting interval_list: ", intervalListFile, ".");
            regionOfInterest = IntervalList.intersection(regionOfInterest, temp);
        }
    }
    return regionOfInterest;
}
 
Example #16
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testSimpleEnclosing() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("12", 68921962, 68921962)); // deletion spans this
    final VCFFileReader reader              = getReader(CEU_TRIOS_INDELS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertTrue(iterator.hasNext());
    final VariantContext ctx = iterator.next();
    Assert.assertEquals(ctx.getStart(), 68921960);
    Assert.assertEquals(ctx.getEnd(), 68921966);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example #17
Source File: FingerprintChecker.java    From picard with MIT License 5 votes vote down vote up
/**
 * Takes a set of fingerprints and returns an IntervalList containing all the loci that
 * can be productively examined in sequencing data to compare to one or more of the
 * fingerprints.
 */
public IntervalList getLociToGenotype(final Collection<Fingerprint> fingerprints) {
    final IntervalList intervals = new IntervalList(this.haplotypes.getHeader());

    for (final Fingerprint fp : fingerprints) {
        for (final HaplotypeProbabilities genotype : fp.values()) {
            final HaplotypeBlock h = genotype.getHaplotype();
            for (final Snp snp : h.getSnps()) {
                intervals.add(new Interval(snp.getChrom(), snp.getPos(), snp.getPos(), false, snp.getName()));
            }
        }
    }

    return intervals.uniqued();
}
 
Example #18
Source File: FilterIntervalsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "dataAnnotationBasedFilters")
public void testAnnotationBasedFilters(final File intervalsFile,
                                       final List<String> excludedIntervals,
                                       final File annotatedIntervalsFile,
                                       final double minimumGCContent,
                                       final double maximumGCContent,
                                       final double minimumMappability,
                                       final double maximumMappability,
                                       final double minimumSegmentalDuplicationContent,
                                       final double maximumSegmentalDuplicationContent,
                                       final List<Integer> expectedIndices) {
    final File outputFile = createTempFile("filter-intervals-test", ".interval_list");
    final ArgumentsBuilder argsBuilder = new ArgumentsBuilder()
            .add(StandardArgumentDefinitions.INTERVALS_LONG_NAME, intervalsFile.getAbsolutePath())
            .add(CopyNumberStandardArgument.ANNOTATED_INTERVALS_FILE_LONG_NAME, annotatedIntervalsFile.getAbsolutePath())
            .add(FilterIntervals.MINIMUM_GC_CONTENT_LONG_NAME, Double.toString(minimumGCContent))
            .add(FilterIntervals.MAXIMUM_GC_CONTENT_LONG_NAME, Double.toString(maximumGCContent))
            .add(FilterIntervals.MINIMUM_MAPPABILITY_LONG_NAME, Double.toString(minimumMappability))
            .add(FilterIntervals.MAXIMUM_MAPPABILITY_LONG_NAME, Double.toString(maximumMappability))
            .add(FilterIntervals.MINIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME, Double.toString(minimumSegmentalDuplicationContent))
            .add(FilterIntervals.MAXIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME, Double.toString(maximumSegmentalDuplicationContent))
            .add(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString())
            .addOutput(outputFile);
    excludedIntervals.forEach(i -> argsBuilder.add(IntervalArgumentCollection.EXCLUDE_INTERVALS_LONG_NAME, i));
    runCommandLine(argsBuilder);
    final IntervalList result = IntervalList.fromFile(outputFile);
    final IntervalList all = IntervalList.fromFile(intervalsFile);
    final List<Interval> allIntervals = all.getIntervals();
    final IntervalList expected = new IntervalList(all.getHeader().getSequenceDictionary());
    expectedIndices.stream().map(allIntervals::get).map(Interval::new).forEach(expected::add);
    Assert.assertEquals(result, expected);
    Assert.assertNotSame(result, expected);
}
 
Example #19
Source File: IntervalListToolsTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "actionAndTotalBasesData")
public void testActions(final IntervalListTools.Action action, final long bases, final int intervals) throws IOException {
    final IntervalList il = tester(action);
    Assert.assertEquals(il.getBaseCount(), bases, "unexpected number of bases found.");
    Assert.assertEquals(il.getIntervals().size(), intervals, "unexpected number of intervals found.");

    Assert.assertEquals(testerCountOutput(action, IntervalListTools.Output.BASES), bases, "unexpected number of bases written to count_output file.");
    Assert.assertEquals(testerCountOutput(action, IntervalListTools.Output.INTERVALS), intervals, "unexpected number of intervals written to count_output file.");
}
 
Example #20
Source File: VcfToIntervalList.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final IntervalList intervalList = VCFFileReader.fromVcf(INPUT, INCLUDE_FILTERED);

    // Sort and write the output
    intervalList.uniqued().write(OUTPUT);
    return 0;
}
 
Example #21
Source File: Pulldown.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/** Returns a new instance of an IntervalList, constructed from the intervals of the internally held
 * AllelicCounts.  This IntervalList is modifiable and does not change with the state of the Pulldown.   */
public IntervalList getIntervals() {
    final IntervalList intervals = new IntervalList(header);
    intervals.addall(getCounts().stream().map(AllelicCount::getInterval)
            .map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).collect(Collectors.toList()));
    return intervals;
}
 
Example #22
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testVariantOverlappingMultipleIntervalsIsReturnedOnlyOnce() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("12", 68921962, 68921962)); // deletion spans this
    intervalList.add(new Interval("12", 68921964, 68921964)); // deletion spans this
    final VCFFileReader reader              = getReader(CEU_TRIOS_INDELS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertTrue(iterator.hasNext());
    final VariantContext ctx = iterator.next();
    Assert.assertEquals(ctx.getStart(), 68921960);
    Assert.assertEquals(ctx.getEnd(), 68921966);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example #23
Source File: GenotypeSperm.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	// validation
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	IOUtil.assertFileIsReadable(this.CELL_BC_FILE);
	
	List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(this.CELL_BC_FILE);
	log.info("Using " + cellBarcodes.size() + " cells in analysis");
	IntervalList snpIntervals = IntervalList.fromFile(INTERVALS);
	log.info("Using " + snpIntervals.getIntervals().size() + " SNP intervals in analysis");
	PrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(OUTPUT));
	writeHeader(out);

	SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(this.INPUT);			
	
	SNPUMIBasePileupIterator sbpi = getIter(reader, snpIntervals, cellBarcodes);
	
	MultiCellDigitalAlleleCountsIterator multiIter = new MultiCellDigitalAlleleCountsIterator(new DigitalAlleleCountsIterator(sbpi, BASE_QUALITY));

	// sort cell barcodes alphabetically for output.
	Collections.sort(cellBarcodes);
	@SuppressWarnings("unused")
	int counter=0;
	while (multiIter.hasNext()) {
		MultiCellDigitalAlleleCounts mcdac = multiIter.next();
		processMCDAC(cellBarcodes, mcdac, out, AUTO_FLUSH_OUTPUTS);
		counter++;
		if (counter%PROGRESS_RATE==0) log.info("Processed " + counter + " SNPs");
	}
	log.info("Processed " + counter +" total SNPs");
	out.close();
	multiIter.close();
	return 0;
}
 
Example #24
Source File: SplitIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public void onTraversalStart() {
    ParamUtils.isPositive(scatterCount, "scatter-count must be > 0.");

    if (!outputDir.exists() && !outputDir.mkdir()) {
        throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
    }

    // in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
    // to use the sequence dict from a bam or vcf is also supported
    final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();

    final List<SimpleInterval> intervals = hasUserSuppliedIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary)
            : IntervalUtils.getAllIntervalsForReference(sequenceDictionary).stream()
            .filter(contig -> contig.getLengthOnReference() >= minContigSize)
            .collect(Collectors.toList());

    final IntervalList intervalList = new IntervalList(sequenceDictionary);
    intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
    final IntervalListScatterer scatterer = subdivisionMode.make();
    final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount);

    // optionally split interval lists that contain intervals from multiple contigs
    final List<IntervalList> scatteredFinal = !dontMixContigs ? scattered :
            scattered.stream().flatMap(il -> il.getIntervals().stream()
                    .collect(Collectors.groupingBy(Interval::getContig)).entrySet().stream()    // group each interval list into sublists
                    .sorted(Comparator.comparingInt(entry -> sequenceDictionary.getSequenceIndex(entry.getKey())))  // sort entries by contig
                    .map(entry -> entry.getValue()) // discard the keys and just keep the lists of intervals
                    .map(list -> {
                        final IntervalList singleContigList = new IntervalList(sequenceDictionary);
                        singleContigList.addall(list);
                        return singleContigList;
                    })  // turn the intervals back into an IntervalList
            ).collect(Collectors.toList());

    final int maxNumberOfPlaces = Math.max((int)Math.floor(Math.log10(scatterCount-1))+1, DEFAULT_NUMBER_OF_DIGITS);
    final String formatString = "%0" + maxNumberOfPlaces + "d";
    IntStream.range(0, scatteredFinal.size()).forEach(n -> scatteredFinal.get(n).write(new File(outputDir, String.format(formatString, n) + extension)));
}
 
Example #25
Source File: IntervalListScatter.java    From picard with MIT License 5 votes vote down vote up
public IntervalListScatter(final IntervalListScatterer scatterer, final IntervalList intervals, final int scatterCount) {
    if (scatterCount < 1) {
        throw new IllegalArgumentException("scatterCount < 1");
    }
    this.scatterer = scatterer;
    this.intervals = scatterer.preprocessIntervalList(intervals);
    this.scatterCount = scatterCount;
}
 
Example #26
Source File: AbstractWgsMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
/**
 * Creates a collector and initializes the inner data structures
 *
 * @param collectWgsMetrics CollectWgsMetrics, that creates this collector
 * @param coverageCap       coverage cap
 */
AbstractWgsMetricsCollector(CollectWgsMetrics collectWgsMetrics, final int coverageCap, final IntervalList intervals) {
    if (coverageCap <= 0) {
        throw new IllegalArgumentException("Coverage cap must be positive.");
    }
    this.collectWgsMetrics = collectWgsMetrics;
    unfilteredDepthHistogramArray = new long[coverageCap + 1];
    highQualityDepthHistogramArray = new long[coverageCap + 1];
    unfilteredBaseQHistogramArray = new long[Byte.MAX_VALUE];
    this.coverageCap    = coverageCap;
    this.intervals      = intervals;
    this.usingStopAfter = collectWgsMetrics.STOP_AFTER > 0;
}
 
Example #27
Source File: IntervalListToolsTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "actionAndTotalBasesWithUniqueData")
public void testActionsWithUnique(final IntervalListTools.Action action, final long bases, final int intervals) throws IOException {
    final IntervalList il = tester(action, false, true);
    Assert.assertEquals(il.getBaseCount(), bases, "unexpected number of bases found.");
    Assert.assertEquals(il.getIntervals().size(), intervals, "unexpected number of intervals found.");

    Assert.assertEquals(testerCountOutput(action, IntervalListTools.Output.BASES, false, true), bases, "unexpected number of bases written to count_output file.");
    Assert.assertEquals(testerCountOutput(action, IntervalListTools.Output.INTERVALS, false, true), intervals, "unexpected number of intervals written to count_output file.");
}
 
Example #28
Source File: GetBayesianHetCoverage.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * The matched norrmal-tumor workflow
 */
private void runMatchedNormalTumor() {
    final BayesianHetPulldownCalculator normalHetPulldownCalculator, tumorHetPulldownCalculator;
    final Pulldown normalHetPulldown, tumorHetPulldown;

    normalHetPulldownCalculator = new BayesianHetPulldownCalculator(REFERENCE_ARGUMENTS.getReferenceFile(),
            IntervalList.fromFile(snpFile), minimumMappingQuality, minimumBaseQuality, readDepthThreshold,
            VALIDATION_STRINGENCY, errorProbabilityAdjustmentFactor,
            new BalancedHeterozygousPileupPriorModel());

    logger.info("Calculating the Het pulldown from the normal BAM file using the BALANCED prior...");
    normalHetPulldown = normalHetPulldownCalculator.getHetPulldown(normalBamFile, hetCallingStringency);

    logger.info("Writing Het pulldown from normal reads to " + normalHetOutputFile.toString());
    normalHetPulldown.write(normalHetOutputFile, AllelicCountTableColumn.AllelicCountTableVerbosity.FULL);

    tumorHetPulldownCalculator = new BayesianHetPulldownCalculator(REFERENCE_ARGUMENTS.getReferenceFile(),
            normalHetPulldown.getIntervals(), minimumMappingQuality, minimumBaseQuality, readDepthThreshold,
            VALIDATION_STRINGENCY, errorProbabilityAdjustmentFactor,
            new BalancedHeterozygousPileupPriorModel());

    logger.info("Calculating the Het pulldown from the tumor BAM file on Hets detected in the normal BAM file...");
    tumorHetPulldown = tumorHetPulldownCalculator.getTumorHetPulldownFromNormalPulldown(tumorBamFile,
            normalHetPulldown);

    logger.info("Writing Het pulldown from tumor reads to " + tumorHetOutputFile.toString());
    tumorHetPulldown.write(tumorHetOutputFile, AllelicCountTableColumn.AllelicCountTableVerbosity.INTERMEDIATE);
}
 
Example #29
Source File: PreprocessIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static IntervalList filterBinsContainingOnlyNs(final IntervalList unfilteredBins, final ReferenceDataSource reference) {
    final IntervalList bins = new IntervalList(reference.getSequenceDictionary());
    for (final Interval unfilteredBin : unfilteredBins) {
        if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> Nucleotide.decode(b) == Nucleotide.N)) {
            bins.add(unfilteredBin);
        }
    }
    return bins;
}
 
Example #30
Source File: IntervalListToolsTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "testScatterTestcases")
public void testScatterByContent(final IntervalListScattererTest.Testcase tc) throws IOException {

    final List<String> args = new ArrayList<>();

    args.add("ACTION=CONCAT");
    args.add("INPUT=" + tc.file.getAbsolutePath());

    args.add("SUBDIVISION_MODE=" + tc.mode);

    final File ilOutDir = IOUtil.createTempDir("IntervalListTools", "lists");
    dirsToDelete.add(ilOutDir);

    if (tc.scatterWidth == 1) {
        final File subDir = new File(ilOutDir, "temp_1_of_1");
        Assert.assertTrue(subDir.mkdir(), "was unable to create directory");
        args.add("OUTPUT=" + new File(subDir, "scattered.interval_list"));
    } else {
        args.add("OUTPUT=" + ilOutDir);
    }

    args.add("SCATTER_CONTENT=" + tc.mode.make().listWeight(tc.source) / tc.expectedScatter.size());

    Assert.assertEquals(runPicardCommandLine(args), 0);

    final String[] files = ilOutDir.list();
    Assert.assertNotNull(files);
    Arrays.sort(files);

    Assert.assertEquals(files.length, tc.expectedScatter.size());

    final Iterator<IntervalList> intervals = tc.expectedScatter.iterator();

    for (final String fileName : files) {
        final IntervalList intervalList = IntervalList.fromFile(new File(new File(ilOutDir, fileName), "scattered.interval_list"));
        final IntervalList expected = intervals.next();

        Assert.assertEquals(intervalList.getIntervals(), expected);
    }
}