Java Code Examples for htsjdk.samtools.util.IntervalList#fromFile()

The following examples show how to use htsjdk.samtools.util.IntervalList#fromFile() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IntervalListToBed.java    From picard with MIT License 6 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    IntervalList intervals = IntervalList.fromFile(INPUT);
    if (SORT) intervals = intervals.sorted();

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        for (final Interval i : intervals) {
            final String strand = i.isNegativeStrand() ? "-" : "+";
            final List<?> fields = CollectionUtil.makeList(i.getContig(), i.getStart()-1, i.getEnd(), i.getName(), SCORE, strand);
            out.append(fields.stream().map(String::valueOf).collect(Collectors.joining("\t")));
            out.newLine();
        }

        out.close();
    }
    catch (IOException ioe) {
        throw new RuntimeIOException(ioe);
    }

    return 0;
}
 
Example 2
Source File: CreateSnpIntervalFromVcfTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testBasic() throws IOException {
    final CreateSnpIntervalFromVcf clp = new CreateSnpIntervalFromVcf();
    clp.INPUT = TEST_FILE;
    clp.OUTPUT = File.createTempFile("CreateSnpIntervalFromVcfTest.", ".intervals");
    clp.OUTPUT.deleteOnExit();
    Assert.assertEquals(clp.doWork(), 0);
    final IntervalList intervals = IntervalList.fromFile(clp.OUTPUT);
    // 6 variants in input, but one is an indel and one is filtered
    Assert.assertEquals(intervals.size(), EXPECTED_START_POS.length);
    final Iterator<Interval> it = intervals.iterator();
    for (final int startPos : EXPECTED_START_POS) {
        Assert.assertTrue(it.hasNext());
        final Interval interval = it.next();
        Assert.assertEquals(interval.getContig(), EXPECTED_CONTIG);
        Assert.assertEquals(interval.getStart(), startPos);
        Assert.assertEquals(interval.length(), 1);
    }
    Assert.assertFalse(it.hasNext());
}
 
Example 3
Source File: MultiCellDigitalAlleleCountsTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void getMetaAnalysis() {
	List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
	IntervalList snpIntervals = IntervalList.fromFile(snpIntervalsFile);
	int baseQualityThreshold=10;

	SNPUMIBasePileupIterator sbpi = new SNPUMIBasePileupIterator(
			smallBAMFile, snpIntervals, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG,
			LOCUS_FUNCTION_LIST, STRAND_STRATEGY, cellBarcodeTag,
			molBCTag, snpTag, functionTag, readMQ, assignReadsToAllGenes, cellBarcodes);

	MultiCellDigitalAlleleCountsIterator multiIter = new MultiCellDigitalAlleleCountsIterator(new DigitalAlleleCountsIterator(sbpi, baseQualityThreshold));
	MultiCellDigitalAlleleCounts r = multiIter.next();

	DigitalAlleleCounts c = r.getMetaAnalysis();
	c.toString();
	Assert.assertNotNull(c);
}
 
Example 4
Source File: RnaSeqMetricsCollector.java    From picard with MIT License 6 votes vote down vote up
public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile, final Log log) {

        final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<Interval>(0, 0);
        if (ribosomalIntervalsFile != null) {

            final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile);
            if (ribosomalIntervals.size() == 0) {
                log.warn("The RIBOSOMAL_INTERVALS file, " + ribosomalIntervalsFile.getAbsolutePath() + " does not contain intervals");
            }
            try {
                SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary());
            } catch (SequenceUtil.SequenceListsDifferException e) {
                throw new PicardException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(),
                        e);
            }
            final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued();
            final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals();
            ribosomalSequenceOverlapDetector.addAll(intervals, intervals);
        }
        return ribosomalSequenceOverlapDetector;
    }
 
Example 5
Source File: BayesianHetPulldownCalculatorUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@BeforeClass
public void initHetPulldownCalculator() {
    calculator = new BayesianHetPulldownCalculator(REF_FILE, IntervalList.fromFile(SNP_FILE),
            MINIMUM_MAPPING_QUALITY, MINIMUM_BASE_QUALITY, READ_DEPTH_THRESHOLD,
            ValidationStringency.STRICT, ERROR_PROBABILITY_ADJUSTMENT_FACTOR,
            new HeterogeneousHeterozygousPileupPriorModel(MIN_ABNORMAL_FRACTION, MAX_ABNORMAL_FRACTION,
                    MAX_COPY_NUMBER, QUADRATURE_ORDER));
}
 
Example 6
Source File: GetBayesianHetCoverage.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * The matched norrmal-tumor workflow
 */
private void runMatchedNormalTumor() {
    final BayesianHetPulldownCalculator normalHetPulldownCalculator, tumorHetPulldownCalculator;
    final Pulldown normalHetPulldown, tumorHetPulldown;

    normalHetPulldownCalculator = new BayesianHetPulldownCalculator(REFERENCE_ARGUMENTS.getReferenceFile(),
            IntervalList.fromFile(snpFile), minimumMappingQuality, minimumBaseQuality, readDepthThreshold,
            VALIDATION_STRINGENCY, errorProbabilityAdjustmentFactor,
            new BalancedHeterozygousPileupPriorModel());

    logger.info("Calculating the Het pulldown from the normal BAM file using the BALANCED prior...");
    normalHetPulldown = normalHetPulldownCalculator.getHetPulldown(normalBamFile, hetCallingStringency);

    logger.info("Writing Het pulldown from normal reads to " + normalHetOutputFile.toString());
    normalHetPulldown.write(normalHetOutputFile, AllelicCountTableColumn.AllelicCountTableVerbosity.FULL);

    tumorHetPulldownCalculator = new BayesianHetPulldownCalculator(REFERENCE_ARGUMENTS.getReferenceFile(),
            normalHetPulldown.getIntervals(), minimumMappingQuality, minimumBaseQuality, readDepthThreshold,
            VALIDATION_STRINGENCY, errorProbabilityAdjustmentFactor,
            new BalancedHeterozygousPileupPriorModel());

    logger.info("Calculating the Het pulldown from the tumor BAM file on Hets detected in the normal BAM file...");
    tumorHetPulldown = tumorHetPulldownCalculator.getTumorHetPulldownFromNormalPulldown(tumorBamFile,
            normalHetPulldown);

    logger.info("Writing Het pulldown from tumor reads to " + tumorHetOutputFile.toString());
    tumorHetPulldown.write(tumorHetOutputFile, AllelicCountTableColumn.AllelicCountTableVerbosity.INTERMEDIATE);
}
 
Example 7
Source File: FilterIntervalsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "dataCountBasedFilters")
public void testCountBasedFilters(final File intervalsFile,
                                  final List<File> countFiles,
                                  final int lowCountFilterCountThreshold,
                                  final double lowCountFilterPercentageOfSamples,
                                  final double extremeCountFilterMinimumPercentile,
                                  final double extremeCountFilterMaximumPercentile,
                                  final double extremeCountFilterPercentageOfSamples,
                                  final List<Integer> expectedIndices) {
    final File outputFile = createTempFile("filter-intervals-test", ".interval_list");
    final ArgumentsBuilder argsBuilder = new ArgumentsBuilder()
            .add(StandardArgumentDefinitions.INTERVALS_LONG_NAME, intervalsFile.getAbsolutePath())
            .add(FilterIntervals.LOW_COUNT_FILTER_COUNT_THRESHOLD_LONG_NAME, Integer.toString(lowCountFilterCountThreshold))
            .add(FilterIntervals.LOW_COUNT_FILTER_PERCENTAGE_OF_SAMPLES_LONG_NAME, Double.toString(lowCountFilterPercentageOfSamples))
            .add(FilterIntervals.EXTREME_COUNT_FILTER_MINIMUM_PERCENTILE_LONG_NAME, Double.toString(extremeCountFilterMinimumPercentile))
            .add(FilterIntervals.EXTREME_COUNT_FILTER_MAXIMUM_PERCENTILE_LONG_NAME, Double.toString(extremeCountFilterMaximumPercentile))
            .add(FilterIntervals.EXTREME_COUNT_FILTER_PERCENTAGE_OF_SAMPLES_LONG_NAME, Double.toString(extremeCountFilterPercentageOfSamples))
            .add(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString())
            .addOutput(outputFile);
    countFiles.forEach(argsBuilder::addInput);
    runCommandLine(argsBuilder);
    final IntervalList result = IntervalList.fromFile(outputFile);
    final IntervalList all = IntervalList.fromFile(intervalsFile);
    final List<Interval> allIntervals = all.getIntervals();
    final IntervalList expected = new IntervalList(all.getHeader().getSequenceDictionary());
    expectedIndices.stream().map(allIntervals::get).map(Interval::new).forEach(expected::add);
    Assert.assertEquals(result, expected);
    Assert.assertNotSame(result, expected);
}
 
Example 8
Source File: SequenceDictionaryIntersectionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@DataProvider(name="ssdiDataProvider")
public Object[][] ssdiDataProvider() {
    final ArrayList<Object[]> ret = new ArrayList<>();
    final File leftDictFile = new File(TESTDATA_DIR, CHR_BASENAME + "sam");
    final SamReader leftDict = SamReaderFactory.makeDefault().open(leftDictFile);

    for (final boolean chr: new boolean[]{true, false}) {
        final String basename;
        final int expectedIntersection;
        if (chr) {
            basename = CHR_BASENAME;
            expectedIntersection = NUM_SEQUENCES;
        } else {
            basename = NO_CHR_BASENAME;
            expectedIntersection = NUM_NON_CHR_SEQUENCES;
        }
        final File samFile = new File(TESTDATA_DIR, basename + "sam");
        final SamReader samReader = SamReaderFactory.makeDefault().open(samFile);
        ret.add(new Object[]{leftDict, samReader, leftDictFile.getName(), samFile.getName(), expectedIntersection});
        ret.add(new Object[]{leftDict, samReader.getFileHeader(), leftDictFile.getName(), samFile.getName(), expectedIntersection});
        ret.add(new Object[]{leftDict, samReader.getFileHeader().getSequenceDictionary(), leftDictFile.getName(), samFile.getName(), expectedIntersection});
        final File vcfFile = new File(TESTDATA_DIR, basename + "vcf");
        final VCFFileReader vcfReader = new VCFFileReader(vcfFile, false);
        ret.add(new Object[]{leftDict, vcfReader, leftDictFile.getName(), vcfFile.getName(), expectedIntersection});
        final File intervalListFile = new File(TESTDATA_DIR, basename + "interval_list");
        final IntervalList intervalList = IntervalList.fromFile(intervalListFile);
        ret.add(new Object[]{leftDict, intervalList, leftDictFile.getName(), intervalListFile.getName(), expectedIntersection});
    }

    return ret.toArray(new Object[ret.size()][]);
}
 
Example 9
Source File: IntervalListToolsTest.java    From picard with MIT License 5 votes vote down vote up
private IntervalList tester(IntervalListTools.Action action, boolean invert, boolean unique) throws IOException {
    final File ilOut = File.createTempFile("IntervalListTools", ".interval_list");
    ilOut.deleteOnExit();

    final List<String> args = new ArrayList<>();

    args.add("ACTION=" + action.toString());
    args.add("INPUT=" + scatterable);

    if (action.takesSecondInput) {
        args.add("SECOND_INPUT=" + secondInput);
    } else {
        args.add("INPUT=" + secondInput);
    }

    if (invert) {
        args.add("INVERT=true");
    }

    if (unique) {
        args.add("UNIQUE=true");
    }

    args.add("OUTPUT=" + ilOut);

    Assert.assertEquals(runPicardCommandLine(args), 0);

    return IntervalList.fromFile(ilOut);
}
 
Example 10
Source File: ExtractSequences.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INTERVAL_LIST);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    IOUtil.assertFileIsWritable(OUTPUT);

    final IntervalList intervals = IntervalList.fromFile(INTERVAL_LIST);
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
    SequenceUtil.assertSequenceDictionariesEqual(intervals.getHeader().getSequenceDictionary(), ref.getSequenceDictionary());

    final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);

    for (final Interval interval : intervals) {
        final ReferenceSequence seq = ref.getSubsequenceAt(interval.getContig(), interval.getStart(), interval.getEnd());
        final byte[] bases = seq.getBases();
        if (interval.isNegativeStrand()) SequenceUtil.reverseComplement(bases);

        try {
            out.write(">");
            out.write(interval.getName());
            out.write("\n");

            for (int i=0; i<bases.length; ++i) {
                if (i > 0 && i % LINE_LENGTH == 0) out.write("\n");
                out.write(bases[i]);
            }

            out.write("\n");
        }
        catch (IOException ioe) {
            throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);

        }
    }

    CloserUtil.close(out);

    return 0;
}
 
Example 11
Source File: MultiCellDigitalAlleleCountsIteratorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * Using BQ=0 here to make data similar to the ad-hoc analysis that started this.
 * This data set assumes no UMI collapse.
 * 5 cells, 2 SNPs + meta analysis.
 */
@Test(enabled=true)
public void bigDataTest () {
	List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
	IntervalList snpIntervals = IntervalList.fromFile(snpIntervalsFile);
	int baseQualityThreshold=0;

	SNPUMIBasePileupIterator sbpi = new SNPUMIBasePileupIterator(
			largeBAMFile, snpIntervals, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG,
			LOCUS_FUNCTION_LIST, STRAND_STRATEGY, cellBarcodeTag,
			molBCTag, snpTag, functionTag, readMQ, assignReadsToAllGenes, cellBarcodes);

	MultiCellDigitalAlleleCountsIterator multiIter = new MultiCellDigitalAlleleCountsIterator(new DigitalAlleleCountsIterator(sbpi, baseQualityThreshold));

	int counter=0;
	while (multiIter.hasNext()) {
		MultiCellDigitalAlleleCounts mcdac = multiIter.next();
		for (String cellID: mcdac.getCells()) {
			DigitalAlleleCounts dac = mcdac.getDigitalAlleleCounts(cellID);
			Collection<String> umis = dac.umis();
			int numUMIs=dac.umis().size();
			checkAnswerLargeData(dac);
		}
		DigitalAlleleCounts meta = mcdac.getMetaAnalysis();
		checkAnswerLargeData(meta);
		counter++;
	}
	// assert that you saw 1 gene/snp MultiCellDAC that has 0 or more cells.
	Assert.assertEquals(2, counter);
	multiIter.close();
}
 
Example 12
Source File: IntervalListToolsTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "testScatterTestcases")
public void testScatterByContent(final IntervalListScattererTest.Testcase tc) throws IOException {

    final List<String> args = new ArrayList<>();

    args.add("ACTION=CONCAT");
    args.add("INPUT=" + tc.file.getAbsolutePath());

    args.add("SUBDIVISION_MODE=" + tc.mode);

    final File ilOutDir = IOUtil.createTempDir("IntervalListTools", "lists");
    dirsToDelete.add(ilOutDir);

    if (tc.scatterWidth == 1) {
        final File subDir = new File(ilOutDir, "temp_1_of_1");
        Assert.assertTrue(subDir.mkdir(), "was unable to create directory");
        args.add("OUTPUT=" + new File(subDir, "scattered.interval_list"));
    } else {
        args.add("OUTPUT=" + ilOutDir);
    }

    args.add("SCATTER_CONTENT=" + tc.mode.make().listWeight(tc.source) / tc.expectedScatter.size());

    Assert.assertEquals(runPicardCommandLine(args), 0);

    final String[] files = ilOutDir.list();
    Assert.assertNotNull(files);
    Arrays.sort(files);

    Assert.assertEquals(files.length, tc.expectedScatter.size());

    final Iterator<IntervalList> intervals = tc.expectedScatter.iterator();

    for (final String fileName : files) {
        final IntervalList intervalList = IntervalList.fromFile(new File(new File(ilOutDir, fileName), "scattered.interval_list"));
        final IntervalList expected = intervals.next();

        Assert.assertEquals(intervalList.getIntervals(), expected);
    }
}
 
Example 13
Source File: CollectWgsMetrics.java    From picard with MIT License 5 votes vote down vote up
/**
 * Creates {@link htsjdk.samtools.util.AbstractLocusIterator} implementation according to {@link #USE_FAST_ALGORITHM} value.
 *
 * @param in inner {@link htsjdk.samtools.SamReader}
 * @return if {@link #USE_FAST_ALGORITHM} is enabled, returns {@link htsjdk.samtools.util.EdgeReadIterator} implementation,
 * otherwise default algorithm is used and {@link htsjdk.samtools.util.SamLocusIterator} is returned.
 */
protected AbstractLocusIterator getLocusIterator(final SamReader in) {
    if (USE_FAST_ALGORITHM) {
        return (INTERVALS != null) ? new EdgeReadIterator(in, IntervalList.fromFile(INTERVALS)) : new EdgeReadIterator(in);
    }
    SamLocusIterator iterator = (INTERVALS != null) ? new SamLocusIterator(in, IntervalList.fromFile(INTERVALS)) : new SamLocusIterator(in);
    iterator.setMaxReadsToAccumulatePerLocus(LOCUS_ACCUMULATION_CAP);
    iterator.setEmitUncoveredLoci(true);
    iterator.setQualityScoreCutoff(0);
    return iterator;
}
 
Example 14
Source File: SNPUMICellReadIteratorWrapperTest.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * Integration style test to see if I can throw null pointers, etc with a bit of real data.
 */
@Test(enabled=true)
public void processReads() {
	List<String> cellBarcodeList = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
	IntervalList loci = IntervalList.fromFile(snpIntervals);

       SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile);
       // Filter records before sorting, to reduce I/O
       final MissingTagFilteringIterator filteringIterator =
               new MissingTagFilteringIterator(reader.iterator(), GENE_NAME_TAG, cellBarcodeTag, molBCTag);

       MapQualityFilteredIterator filteringIterator2 = new MapQualityFilteredIterator(filteringIterator, readMQ, true);

       GeneFunctionIteratorWrapper gfteratorWrapper = new GeneFunctionIteratorWrapper(filteringIterator2, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG, false, STRAND_STRATEGY, LOCUS_FUNCTION_LIST);

       SNPUMICellReadIteratorWrapper snpumiCellReadIterator = new SNPUMICellReadIteratorWrapper(gfteratorWrapper, loci, cellBarcodeTag, cellBarcodeList, GENE_NAME_TAG, snpTag, readMQ);

       // create comparators in the order the data should be sorted
       final MultiComparator<SAMRecord> multiComparator = new MultiComparator<>(
               new StringTagComparator(cellBarcodeTag),
               new StringTagComparator(GENE_NAME_TAG),
               new StringTagComparator(molBCTag));

       final CloseableIterator<SAMRecord> sortingIterator =
               SamRecordSortingIteratorFactory.create(reader.getFileHeader(), snpumiCellReadIterator, multiComparator, null);

       Assert.assertTrue(sortingIterator.hasNext());
       SAMRecord nextRead = sortingIterator.next();
       List<SAMTagAndValue> tagValues= nextRead.getAttributes();
       String snpTagValue = nextRead.getStringAttribute(this.snpTag);
	CloserUtil.close(snpumiCellReadIterator);
}
 
Example 15
Source File: CreateIntervalsFilesTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testCreateIntervalsFiles() throws IOException {
    final CreateIntervalsFiles clp = new CreateIntervalsFiles();
    clp.SEQUENCE_DICTIONARY = new File(METADATA_DIR, REFERENCE_NAME + ".dict");
    clp.REDUCED_GTF = new File(METADATA_DIR, REFERENCE_NAME + ".reduced.gtf.gz");
    clp.OUTPUT = Files.createTempDir();
    clp.PREFIX = REFERENCE_NAME;
    clp.MT_SEQUENCE = MT_SEQUENCE;
    clp.NON_AUTOSOME_SEQUENCE = NON_AUTOSOME_SEQUENCE;

    try {
        Assert.assertEquals(clp.doWork(), 0);

        final IntervalList mtIntervals = IntervalList.fromFile(new File(clp.OUTPUT, REFERENCE_NAME + ".mt.intervals"));
        Assert.assertEquals(mtIntervals.size(), 37);

        final IntervalList nonAutosomeSequences = IntervalList.fromFile(new File(clp.OUTPUT, REFERENCE_NAME + ".non_autosomes.intervals"));
        Assert.assertEquals(nonAutosomeSequences.size(), 0);
        Assert.assertEquals(nonAutosomeSequences.getHeader().getSequenceDictionary().size(), 3);

        final IntervalList rRnaIntervals = IntervalList.fromFile(new File(clp.OUTPUT, REFERENCE_NAME + ".rRNA.intervals"));
        Assert.assertEquals(rRnaIntervals.size(), 355);

        // This is hard to validate, so just confirm its readability.
        IntervalList.fromFile(new File(clp.OUTPUT, REFERENCE_NAME + ".intergenic.intervals"));

        final IntervalList genesIntervals = IntervalList.fromFile(new File(clp.OUTPUT, REFERENCE_NAME + ".genes.intervals"));
        Assert.assertEquals(genesIntervals.size(), 32976);

        final IntervalList exonsIntervals = IntervalList.fromFile(new File(clp.OUTPUT, REFERENCE_NAME + ".exons.intervals"));
        Assert.assertEquals(exonsIntervals.size(), 615275);

        final IntervalList consensusIntronsIntervals = IntervalList.fromFile(new File(clp.OUTPUT, REFERENCE_NAME + ".consensus_introns.intervals"));
        Assert.assertEquals(consensusIntronsIntervals.size(), 396038);

    } finally {
        FileUtils.deleteDirectory(clp.OUTPUT);
    }
}
 
Example 16
Source File: GenotypeSperm.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	// validation
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	IOUtil.assertFileIsReadable(this.CELL_BC_FILE);
	
	List<String> cellBarcodes = ParseBarcodeFile.readCellBarcodeFile(this.CELL_BC_FILE);
	log.info("Using " + cellBarcodes.size() + " cells in analysis");
	IntervalList snpIntervals = IntervalList.fromFile(INTERVALS);
	log.info("Using " + snpIntervals.getIntervals().size() + " SNP intervals in analysis");
	PrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(OUTPUT));
	writeHeader(out);

	SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(this.INPUT);			
	
	SNPUMIBasePileupIterator sbpi = getIter(reader, snpIntervals, cellBarcodes);
	
	MultiCellDigitalAlleleCountsIterator multiIter = new MultiCellDigitalAlleleCountsIterator(new DigitalAlleleCountsIterator(sbpi, BASE_QUALITY));

	// sort cell barcodes alphabetically for output.
	Collections.sort(cellBarcodes);
	@SuppressWarnings("unused")
	int counter=0;
	while (multiIter.hasNext()) {
		MultiCellDigitalAlleleCounts mcdac = multiIter.next();
		processMCDAC(cellBarcodes, mcdac, out, AUTO_FLUSH_OUTPUTS);
		counter++;
		if (counter%PROGRESS_RATE==0) log.info("Processed " + counter + " SNPs");
	}
	log.info("Processed " + counter +" total SNPs");
	out.close();
	multiIter.close();
	return 0;
}
 
Example 17
Source File: SNPUMICellReadIteratorWrapperTest.java    From Drop-seq with MIT License 4 votes vote down vote up
@Test(enabled=true)
public void testGeneSNPSplitting () {
	List<String> cellBarcodeList = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
	IntervalList loci = IntervalList.fromFile(snpIntervals);
	// make a new SNP that is 10 bases before the first SNP.  Reads in the test data will hit both.
	Interval i1 = loci.getIntervals().iterator().next();
	Interval i2 = new Interval (i1.getContig(), i1.getStart()-10, i1.getStart()-10, true, "testSNP1");
	loci.add(i2);



	//A read that hits only the default SNP: start read at 76227020
	SAMRecord r1 = getReadByName("NS500217:67:H14GMBGXX:1:11308:22039:11268", smallBAMFile);
	Collection<SAMRecord> tempList = processOneRead(r1, loci, cellBarcodeList);
	Assert.assertTrue(tempList.size()==1);
	Assert.assertEquals(1, getNumGenes(tempList));
	Assert.assertEquals(1, getNumSNPs(tempList));

	//A read that hits both the default SNP and the new SNP: start read at 76227000
	r1 = getReadByName("NS500217:67:H14GMBGXX:1:13202:10555:15929", smallBAMFile);
	tempList = processOneRead(r1, loci, cellBarcodeList);
	Assert.assertTrue(tempList.size()==2);
	Assert.assertEquals(1, getNumGenes(tempList));
	Assert.assertEquals(2, getNumSNPs(tempList));


	//  A read that hits only the default SNP: start read at 76227020, has 2 genes.
	r1 = getReadByName("NS500217:67:H14GMBGXX:1:21206:20467:19854", smallBAMFile);
	tempList = processOneRead(r1, loci, cellBarcodeList);
	Assert.assertTrue(tempList.size()==2);
	Assert.assertEquals(2, getNumGenes(tempList));
	Assert.assertEquals(1, getNumSNPs(tempList));


	//A read that hits both the default SNP and the new SNP: start read at 76227000, has 2 genes.
	r1 = getReadByName("NS500217:67:H14GMBGXX:1:22302:3826:3320", smallBAMFile);
	tempList = processOneRead(r1, loci, cellBarcodeList);
	Assert.assertTrue(tempList.size()==4);
	Assert.assertEquals(2, getNumGenes(tempList));
	Assert.assertEquals(2, getNumSNPs(tempList));


}
 
Example 18
Source File: ViewSamTest.java    From picard with MIT License 4 votes vote down vote up
/**
 * Confirm that ViewSam only outputs records that overlap intervals in a provided interval file.
 */
@Test
public void testIntervals() throws Exception {
    // a SAM file designed to test intervals against
    final File inputSam = new File("testdata/picard/sam/viewsam_intervals_test.sam");
    // an interval file containing the intervals to run against the SAM
    final File inputIntervalsFile = new File("testdata/picard/sam/viewsam_intervals_test.interval_list");

    // create temp output file that ViewSam call get written to
    final File viewSamOutputFile = File.createTempFile("ViewSamTest.output.", ".sam");
    viewSamOutputFile.deleteOnExit();

    final ViewSam viewSam = new ViewSam();
    viewSam.INPUT = inputSam.getAbsolutePath();
    viewSam.INTERVAL_LIST = inputIntervalsFile;

    // create a print stream to this file
    final PrintStream viewSamPrintStream = new PrintStream(viewSamOutputFile);
    // make sure the command line call exited successfully
    Assert.assertEquals(viewSam.writeSamText(viewSamPrintStream), 0);
    viewSamPrintStream.close();

    // load the interval file
    final IntervalList inputIntervalsList = IntervalList.fromFile(inputIntervalsFile);
    // ViewSam internally utilizes uniqued intervals, so we will compare to the same
    final List<Interval> intervals = inputIntervalsList.uniqued().getIntervals();

    // make a reader that is not using intervals to load the output file we wrote that
    // was written by the call to ViewSam with the given interval file.  This will give us
    // the "filtered" file that we can compare to the intervals and ensure that only
    // overlapped records were written
    final SamReader samReader = SamReaderFactory.makeDefault().open(viewSamOutputFile);

    // make sure the intervals file caused at least one match to be found
    boolean foundMatches = false;

    for (final SAMRecord samRecord : samReader) {
        // make an interval representing this SAM record
        final Interval samRecordInterval = new Interval(samRecord.getContig(), samRecord.getStart(), samRecord.getEnd());
        // go through and look to see whether this SAM interval overlaps a filtering interval
        boolean samRecordIntervalOverlaps = false;
        for (final Interval interval : intervals) {
            if (interval.intersects(samRecordInterval)) {
                samRecordIntervalOverlaps = true;
                // mark that we have found at least one SAM record that overlaps an interval
                foundMatches = true;
                break;
            }
        }
        // if this SAM record does not overlap an interval, it should not have been written
        Assert.assertTrue(samRecordIntervalOverlaps, "SAM record written out was not overlapped by an interval.");
    }

    // we should have at least one SAM record written to ensure interval filtering worked correctly
    Assert.assertTrue(foundMatches, "No SAM records overlapped the given intervals.");
}
 
Example 19
Source File: FilterIntervalsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test(dataProvider = "dataAllFilters")
public void testAllFilters(final File intervalsFile,
                           final List<String> excludedIntervals,
                           final File annotatedIntervalsFile,
                           final double minimumGCContent,
                           final double maximumGCContent,
                           final double minimumMappability,
                           final double maximumMappability,
                           final double minimumSegmentalDuplicationContent,
                           final double maximumSegmentalDuplicationContent,
                           final List<File> countFiles,
                           final int lowCountFilterCountThreshold,
                           final double lowCountFilterPercentageOfSamples,
                           final double extremeCountFilterMinimumPercentile,
                           final double extremeCountFilterMaximumPercentile,
                           final double extremeCountFilterPercentageOfSamples,
                           final List<Integer> expectedIndices) {
    final File outputFile = createTempFile("filter-intervals-test", ".interval_list");
    final ArgumentsBuilder argsBuilder = new ArgumentsBuilder()
            .add(StandardArgumentDefinitions.INTERVALS_LONG_NAME, intervalsFile.getAbsolutePath())
            .add(CopyNumberStandardArgument.ANNOTATED_INTERVALS_FILE_LONG_NAME, annotatedIntervalsFile.getAbsolutePath())
            .add(FilterIntervals.MINIMUM_GC_CONTENT_LONG_NAME, Double.toString(minimumGCContent))
            .add(FilterIntervals.MAXIMUM_GC_CONTENT_LONG_NAME, Double.toString(maximumGCContent))
            .add(FilterIntervals.MINIMUM_MAPPABILITY_LONG_NAME, Double.toString(minimumMappability))
            .add(FilterIntervals.MAXIMUM_MAPPABILITY_LONG_NAME, Double.toString(maximumMappability))
            .add(FilterIntervals.MINIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME, Double.toString(minimumSegmentalDuplicationContent))
            .add(FilterIntervals.MAXIMUM_SEGMENTAL_DUPLICATION_CONTENT_LONG_NAME, Double.toString(maximumSegmentalDuplicationContent))
            .add(FilterIntervals.LOW_COUNT_FILTER_COUNT_THRESHOLD_LONG_NAME, Integer.toString(lowCountFilterCountThreshold))
            .add(FilterIntervals.LOW_COUNT_FILTER_PERCENTAGE_OF_SAMPLES_LONG_NAME, Double.toString(lowCountFilterPercentageOfSamples))
            .add(FilterIntervals.EXTREME_COUNT_FILTER_MINIMUM_PERCENTILE_LONG_NAME, Double.toString(extremeCountFilterMinimumPercentile))
            .add(FilterIntervals.EXTREME_COUNT_FILTER_MAXIMUM_PERCENTILE_LONG_NAME, Double.toString(extremeCountFilterMaximumPercentile))
            .add(FilterIntervals.EXTREME_COUNT_FILTER_PERCENTAGE_OF_SAMPLES_LONG_NAME, Double.toString(extremeCountFilterPercentageOfSamples))
            .add(IntervalArgumentCollection.INTERVAL_MERGING_RULE_LONG_NAME, IntervalMergingRule.OVERLAPPING_ONLY.toString())
            .addOutput(outputFile);
    excludedIntervals.forEach(i -> argsBuilder.add(IntervalArgumentCollection.EXCLUDE_INTERVALS_LONG_NAME, i));
    countFiles.forEach(argsBuilder::addInput);
    runCommandLine(argsBuilder);
    final IntervalList result = IntervalList.fromFile(outputFile);
    final IntervalList all = IntervalList.fromFile(intervalsFile);
    final List<Interval> allIntervals = all.getIntervals();
    final IntervalList expected = new IntervalList(all.getHeader().getSequenceDictionary());
    expectedIndices.stream().map(allIntervals::get).map(Interval::new).forEach(expected::add);
    Assert.assertEquals(result, expected);
    Assert.assertNotSame(result, expected);
}
 
Example 20
Source File: NonNFastaSize.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    // set up the reference and a mask so that we only count the positions requested by the user
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT);
    final ReferenceSequenceMask referenceSequenceMask;
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
        final IntervalList intervalList = IntervalList.fromFile(INTERVALS);
        referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList);
    } else {
        final SAMFileHeader header = new SAMFileHeader();
        header.setSequenceDictionary(ref.getSequenceDictionary());
        referenceSequenceMask = new WholeGenomeReferenceSequenceMask(header);
    }

    long nonNbases = 0L;

    for (final SAMSequenceRecord rec : ref.getSequenceDictionary().getSequences()) {
        // pull out the contig and set up the bases
        final ReferenceSequence sequence = ref.getSequence(rec.getSequenceName());
        final byte[] bases = sequence.getBases();
        StringUtil.toUpperCase(bases);

        for (int i = 0; i < bases.length; i++) {
            // only investigate this position if it's within our mask
            if (referenceSequenceMask.get(sequence.getContigIndex(), i+1)) {
                nonNbases += bases[i] == SequenceUtil.N ? 0 : 1;
            }
        }
    }

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        out.write(nonNbases + "\n");
        out.close();
    }
    catch (IOException ioe) {
        throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
    }

    return 0;
}