htsjdk.samtools.util.PeekableIterator Java Examples

The following examples show how to use htsjdk.samtools.util.PeekableIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MultiCellDigitalAlleleCounts.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Constructs a meta cell containing all of the umi data across all individual cells in the provided list.
 * Allows you to calculate population wide statistics on all cells across a single SNP/gene.
 * This changes UMI names to be a combination of the cell name and the UMI sequence.
 * This is done because the same UMI sequence can exist in two different cells, and umi IDs are only unique within a cell.
 * @return A new DigitalAlleleCounts object that contains the data from all cells added to this collection.
 * If no data was added to the MultiCellDigitalAlleleCounts object, this will return null.
 */
public DigitalAlleleCounts getMetaAnalysis (final String clusterID, final Collection<String> cells) {
	Iterator<DigitalAlleleCounts> iter =  dacMap.values().iterator();
	// short circuit.
	if (!iter.hasNext())
		return null;

	PeekableIterator<DigitalAlleleCounts> pIter= new PeekableIterator<DigitalAlleleCounts>(iter);
	DigitalAlleleCounts first = pIter.peek();
	DigitalAlleleCounts result = new DigitalAlleleCounts(first.getSnpInterval(), first.getGene(), clusterID, first.getBaseQualityThreshold());
	while (pIter.hasNext()) {
		DigitalAlleleCounts next=pIter.next();
		if (cells.contains(next.getCell()))
			result=addDAC(result, next);
	}
	pIter.close();
	return (result);
}
 
Example #2
Source File: SimpleGermlineTagger.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static List<AnnotatedInterval> mergedRegionsByAnnotation(final String annotationToMerge, final List<AnnotatedInterval> regions) {
    final List<AnnotatedInterval> mergedSegments = new ArrayList<>();
    final PeekableIterator<AnnotatedInterval> segmentsIterator = new PeekableIterator<>(regions.iterator());
    while (segmentsIterator.hasNext()) {
        AnnotatedInterval normalSegment = segmentsIterator.next();
        AnnotatedInterval nextSegment = segmentsIterator.peek();

        int updatedEndPoint =  normalSegment.getEnd();

        // Merge (if any to merge)
        while (segmentsIterator.hasNext() && isMergeableByAnnotation(annotationToMerge, normalSegment, nextSegment)) {
            updatedEndPoint = nextSegment.getEnd();
            segmentsIterator.next();
            nextSegment = segmentsIterator.peek();
        }
        final AnnotatedInterval segmentToAddToResult = new AnnotatedInterval(
                new SimpleInterval(normalSegment.getContig(), normalSegment.getStart(), updatedEndPoint),
                ImmutableSortedMap.of(annotationToMerge, normalSegment.getAnnotationValue(annotationToMerge)));
        mergedSegments.add(segmentToAddToResult);
    }
    return mergedSegments;
}
 
Example #3
Source File: AnnotatedIntervalUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/** Merge AnnotatedIntervals by whether the regions overlap.  Sorting is performed as well, so input
 * ordering is lost.
 *
 * When two overlapping regions are merged, annotations are merged as follows:
 *  - The annotation appears in one region:  Resulting region will have the annotation with the value of that one region
 *  - The annotation appears in both regions:  Resulting region will have both values separated by the {@code annotationSeparator}
 *  parameter.
 *
 * Only merges overlaps, not abutters.
 *
 * @param initialRegions regions to merge.  Never {@code null}
 * @param dictionary sequence dictionary to use for sorting.  Never {@code null}
 * @param annotationSeparator separator to use in the case of annotation conflicts.  Never {@code null}
 * @param progressUpdater Consuming function that can callback with the latest region that has been processed.
 *                        Never {@code null}.  Use a no-op function if you do not wish to provide a progress update.
 * @return Segments will be sorted by the sequence dictionary
 */
public static List<AnnotatedInterval> mergeRegions(final List<AnnotatedInterval> initialRegions,
                                                   final SAMSequenceDictionary dictionary, final String annotationSeparator,
                                                   final Consumer<Locatable> progressUpdater) {

    Utils.nonNull(initialRegions);
    Utils.nonNull(dictionary);
    Utils.nonNull(annotationSeparator);
    Utils.nonNull(progressUpdater);

    final List<AnnotatedInterval> segments = IntervalUtils.sortLocatablesBySequenceDictionary(initialRegions,
            dictionary);

    final List<AnnotatedInterval> finalSegments = new ArrayList<>();
    final PeekableIterator<AnnotatedInterval> segmentsIterator = new PeekableIterator<>(segments.iterator());
    while (segmentsIterator.hasNext()) {
        AnnotatedInterval currentRegion = segmentsIterator.next();
        while (segmentsIterator.peek() != null && IntervalUtils.overlaps(currentRegion, segmentsIterator.peek())) {
            final AnnotatedInterval toBeMerged = segmentsIterator.next();
            currentRegion = merge(currentRegion, toBeMerged, annotationSeparator);
        }
        progressUpdater.accept(currentRegion);
        finalSegments.add(currentRegion);
    }
    return finalSegments;
}
 
Example #4
Source File: MultiHitAlignedReadIterator.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 *
 * @param querynameOrderIterator
 * @param primaryAlignmentSelectionStrategy Algorithm for selecting primary alignment when it is not clear from
 *                                          the input what should be primary.
 */
MultiHitAlignedReadIterator(final CloseableIterator<SAMRecord> querynameOrderIterator,
                            final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy) {
    this.primaryAlignmentSelectionStrategy = primaryAlignmentSelectionStrategy;
    peekIterator = new PeekableIterator<>(new FilteringSamIterator(querynameOrderIterator,
            new SamRecordFilter() {
                // Filter unmapped reads.
                @Override
                public boolean filterOut(final SAMRecord record) {
                    return record.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(record.getCigar());
                }
                @Override
                public boolean filterOut(final SAMRecord first, final SAMRecord second) {
                    return ((first.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(first.getCigar()))
                            && (second.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(second.getCigar())));
                }
            }));


    advance();
}
 
Example #5
Source File: AllLocusIterator.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param interval The single interval over whose loci we'll be iterating
 * @param nestedLocusIterator Provider of AlignmentContexts that may lie within the interval. Must return AlignmentContexts
 *                            that are on the same contig as the provided interval.
 */
public AllLocusIterator(final SimpleInterval interval, final Iterator<AlignmentContext> nestedLocusIterator) {
    Utils.nonNull(interval);
    Utils.nonNull(nestedLocusIterator);
    
    this.nestedLocusIterator = new PeekableIterator<>(nestedLocusIterator);
    this.interval = interval;
    this.currentPosition = interval.getStart();

    // Sanity check:
    if ( this.nestedLocusIterator.peek() != null && ! this.nestedLocusIterator.peek().getContig().equals(interval.getContig()) ) {
        throw new IllegalArgumentException("Locus iterator must be over the same contig as the interval provided");
    }

    nextPileup = advance();
}
 
Example #6
Source File: ReadStateManager.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
public ReadStateManager(final Iterator<GATKRead> source,
                        final List<String> samples,
                        final LIBSDownsamplingInfo info,
                        final boolean keepSubmittedReads,
                        final SAMFileHeader header) {
    Utils.nonNull(source, "source");
    Utils.nonNull(samples, "samples");
    Utils.nonNull(info, "downsampling info");
    Utils.nonNull(header, "header");
    this.samples = samples;
    this.iterator = new PeekableIterator<>(source);

    this.keepSubmittedReads = keepSubmittedReads;
    this.submittedReads = new LinkedList<>();

    for (final String sample : samples) {
        // because this is a linked hash map the order of iteration will be in sample order
        readStatesBySample.put(sample, new PerSampleReadStateManager(info));
    }

    samplePartitioner = new SamplePartitioner(info, samples, header);
}
 
Example #7
Source File: QuerySortedReadPairIteratorUtilTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testFragmentNoReadPair() {
    SAMRecordSetBuilder builder = new SAMRecordSetBuilder(false, SAMFileHeader.SortOrder.queryname);
    builder.setReadLength(READ_LENGTH);
    builder.addFrag("mapped_frag_a", 1, 1, false);
    builder.addFrag("mapped_frag_b", 1, 1, false);
    PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(builder.iterator());

    QuerySortedReadPairIteratorUtil.ReadPair pair = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
    Assert.assertNotNull(pair);
    Assert.assertNotNull(pair.read1);
    Assert.assertNull(pair.read2);
    Assert.assertEquals("mapped_frag_a", pair.read1.getReadName());

    pair = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
    Assert.assertNotNull(pair);
    Assert.assertNotNull(pair.read1);
    Assert.assertNull(pair.read2);
    Assert.assertEquals("mapped_frag_b", pair.read1.getReadName());

    pair = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
    Assert.assertNull(pair);
}
 
Example #8
Source File: QuerySortedReadPairIteratorUtilTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testBasicHalfmappedReadPair() {
    SAMRecordSetBuilder builder = new SAMRecordSetBuilder(false, SAMFileHeader.SortOrder.queryname);
    builder.setReadLength(READ_LENGTH);
    builder.addPair("halfmapped_paired", 1, 1, 31, false, true, "20M", "20M", true, false, 20);
    PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(builder.iterator());

    QuerySortedReadPairIteratorUtil.ReadPair pair = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
    Assert.assertNotNull(pair);
    Assert.assertNotNull(pair.read1);
    Assert.assertNotNull(pair.read2);
    Assert.assertEquals("halfmapped_paired", pair.read1.getReadName());
    Assert.assertEquals("halfmapped_paired", pair.read2.getReadName());

    pair = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
    Assert.assertNull(pair);
}
 
Example #9
Source File: QuerySortedReadPairIteratorUtilTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testBasicUnmappedReadPair() {
    SAMRecordSetBuilder builder = new SAMRecordSetBuilder(false, SAMFileHeader.SortOrder.queryname);
    builder.setReadLength(READ_LENGTH);
    builder.addUnmappedPair("unmapped_paired");
    PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(builder.iterator());

    QuerySortedReadPairIteratorUtil.ReadPair pair = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
    Assert.assertNotNull(pair);
    Assert.assertNotNull(pair.read1);
    Assert.assertNotNull(pair.read2);
    Assert.assertEquals("unmapped_paired", pair.read1.getReadName());
    Assert.assertEquals("unmapped_paired", pair.read2.getReadName());

    pair = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
    Assert.assertNull(pair);
}
 
Example #10
Source File: QuerySortedReadPairIteratorUtilTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testBasicPairedRead() {
    SAMRecordSetBuilder builder = new SAMRecordSetBuilder(false, SAMFileHeader.SortOrder.queryname);
    builder.setReadLength(READ_LENGTH);
    builder.addPair("mapped_paired", 1, 1, 31);
    PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(builder.iterator());

    QuerySortedReadPairIteratorUtil.ReadPair pair = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
    Assert.assertNotNull(pair);
    Assert.assertNotNull(pair.read1);
    Assert.assertNotNull(pair.read2);
    Assert.assertEquals("mapped_paired", pair.read1.getReadName());
    Assert.assertEquals("mapped_paired", pair.read2.getReadName());

    pair = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
    Assert.assertNull(pair);
}
 
Example #11
Source File: QuerySortedReadPairIteratorUtil.java    From picard with MIT License 6 votes vote down vote up
/**
 * Return the next usable read in the iterator
 *
 * @param iterator the iterator to pull from
 * @param justPeek if true, just peek the next usable read rather than pulling it (note: it may remove unusable reads from the iterator)
 * @return the next read or null if none are left
 */
private static SAMRecord getNextUsableRead(final PeekableIterator<SAMRecord> iterator, final boolean justPeek) {

    while (iterator.hasNext()) {
        // trash the next read if it fails PF, is secondary, or is supplementary
        final SAMRecord nextRead = iterator.peek();
        if (nextRead.getReadFailsVendorQualityCheckFlag() || nextRead.isSecondaryOrSupplementary()) {
            iterator.next();
        }
        // otherwise, return it
        else {
            return justPeek ? nextRead : iterator.next();
        }
    }

    // no good reads left
    return null;
}
 
Example #12
Source File: QuerySortedReadPairIteratorUtil.java    From picard with MIT License 6 votes vote down vote up
/**
 * Get the next read pair (where both have the same read name).
 * If we encounter an unpaired read, the second read in the pair will be set to null.
 *
 * @param iterator iterator of reads
 * @return ReadPair object holding the reads, or null if there are no more reads in the iterator
 */
public static ReadPair getNextReadPair(final PeekableIterator<SAMRecord> iterator) {

    final ReadPair readPair = new ReadPair();
    readPair.read1 = getNextUsableRead(iterator, false);
    if (readPair.read1 == null) {
        return null;
    }

    final SAMRecord peekedNextRead = getNextUsableRead(iterator, true);
    if (peekedNextRead != null && peekedNextRead.getReadName().equals(readPair.read1.getReadName())) {
        readPair.read2 = getNextUsableRead(iterator, false);
    }

    return readPair;
}
 
Example #13
Source File: MultiHitAlignedReadIterator.java    From picard with MIT License 6 votes vote down vote up
/**
 *
 * @param querynameOrderIterator
 * @param primaryAlignmentSelectionStrategy Algorithm for selecting primary alignment when it is not clear from
 *                                          the input what should be primary.
 */
MultiHitAlignedReadIterator(final CloseableIterator<SAMRecord> querynameOrderIterator,
                            final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy) {
    this.primaryAlignmentSelectionStrategy = primaryAlignmentSelectionStrategy;
    peekIterator = new PeekableIterator<SAMRecord>(new FilteringSamIterator(querynameOrderIterator,
            new SamRecordFilter() {
                // Filter unmapped reads.
                public boolean filterOut(final SAMRecord record) {
                    return record.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(record.getCigar());
                }
                public boolean filterOut(final SAMRecord first, final SAMRecord second) {
                    return ((first.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(first.getCigar()))
                            && (second.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(second.getCigar())));
                }
            }));


    advance();
}
 
Example #14
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 6 votes vote down vote up
private void processContext (Iterable<SAMRecord> i, SAMFileWriter writer, boolean verbose, PrintStream outMetrics) {
	PeekableIterator<SAMRecord> iter = new PeekableIterator<>(i.iterator());
   	if (!iter.hasNext()) return;

	// get context.
	String context = getContextString(iter.peek(), this.CONTEXT_TAGS);

	// get barcode counts.
	ObjectCounter<String> barcodeCounts = getBarcodeCounts (iter, this.COLLAPSE_TAG, this.COUNT_TAGS, this.COUNT_TAGS_EDIT_DISTANCE);
	if (this.MIN_COUNT > 1 & !this.MUTATIONAL_COLLAPSE) barcodeCounts.filterByMinCount(this.MIN_COUNT);
	
	Map<String, String> collapseMap = collapseBarcodes(barcodeCounts, this.FIND_INDELS, this.EDIT_DISTANCE, this.ADAPTIVE_ED_MIN, this.ADAPTIVE_ED_MAX, this.MIN_COUNT, this.MUTATIONAL_COLLAPSE_PATH_ED, verbose, outMetrics, context, this.ADAPTIVE_ED_METRICS_ED_LIST);
	iter = new PeekableIterator<>(i.iterator());
	retagBarcodedReads(iter, barcodeCounts, collapseMap, this.DROP_SMALL_COUNTS, writer, this.COLLAPSE_TAG, this.OUT_TAG);        	

}
 
Example #15
Source File: DetectBeadSynthesisErrors.java    From Drop-seq with MIT License 5 votes vote down vote up
private void writeFile (final PeekableIterator <BeadSynthesisErrorData> iter, final PrintStream out) {
	if (!iter.hasNext()) {
		out.close();
		return;
	}

	BeadSynthesisErrorData first = iter.peek();
	int umiLength = first.getBaseLength();
	writeBadBarcodeStatisticsFileHeader(umiLength, out);
	while (iter.hasNext()) {
		BeadSynthesisErrorData bsed = iter.next();
		writeBadBarcodeStatisticsFileEntry(bsed, out);
	}
	out.close();
}
 
Example #16
Source File: AnnotatedIntervalUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Merge AnnotatedIntervals by whether the given annotation values match (all annotations must match).
 *
 * Sorting is performed as well, so input ordering is lost.  Note that the input itself is not changed.
 *
 * When two overlapping regions are merged, annotations are merged as follows:
 *  - The annotation appears in one region:  Resulting region will have the annotation with the value of that one region
 *  - The annotation appears in both regions:  Resulting region will have both values separated by the {@code annotationSeparator}
 *  parameter.
 *
 * @param initialRegions Regions to merge.  Never {@code null}
 * @param dictionary Sequence dictionary to use for sorting.  Never {@code null}
 * @param annotationNames Names of the annotations to use for matching the initialRegions.  Never {@code null}
 * @param progressUpdater Consuming function that can callback with the latest region that has been processed.
 *                        Never {@code null}  Use a no-op function if you do not wish to provide a progress update.
 * @param annotationSeparator In the case of conflicting annotation values (of annotations other than those specified
 *                            in annotationNames), use this string to separate the resulting (merged) value.
 *                            Never {@code null}
 * @param maxDistanceInBp if two segments are farther than this distance, do not merge, even if all annotation values
 *                        match.  Must be >= 0.
 * @return merged annotated intervals.  Empty list if initialRegions was empty.  Never {@code null}
 */
public static List<AnnotatedInterval> mergeRegionsByAnnotation(final List<AnnotatedInterval> initialRegions,
                                                   final SAMSequenceDictionary dictionary, final List<String> annotationNames,
                                                   final Consumer<Locatable> progressUpdater, final String annotationSeparator,
                                                               final int maxDistanceInBp) {

    Utils.nonNull(initialRegions);
    Utils.nonNull(dictionary);
    Utils.nonNull(annotationNames);
    Utils.nonNull(annotationSeparator);
    Utils.nonNull(progressUpdater);
    ParamUtils.isPositiveOrZero(maxDistanceInBp, "Cannot have a negative value for distance.");

    final List<AnnotatedInterval> segments = IntervalUtils.sortLocatablesBySequenceDictionary(initialRegions,
            dictionary);

    final List<AnnotatedInterval> finalSegments = new ArrayList<>();
    final PeekableIterator<AnnotatedInterval> segmentsIterator = new PeekableIterator<>(segments.iterator());
    while (segmentsIterator.hasNext()) {
        AnnotatedInterval currentRegion = segmentsIterator.next();

        while (segmentsIterator.peek() != null && isMergeByAnnotation(currentRegion, segmentsIterator.peek(), maxDistanceInBp, annotationNames)
                ) {
            final AnnotatedInterval toBeMerged = segmentsIterator.next();

            currentRegion = merge(currentRegion, toBeMerged, annotationSeparator);
        }
        progressUpdater.accept(currentRegion);
        finalSegments.add(currentRegion);
    }
    return finalSegments;
}
 
Example #17
Source File: RevertSam.java    From picard with MIT License 5 votes vote down vote up
/**
 * Generates a list by consuming from the iterator in order starting with the first available
 * read and continuing while subsequent reads share the same read name. If there are no reads
 * remaining returns an empty list.
 */
private List<SAMRecord> fetchByReadName(final PeekableIterator<SAMRecord> iterator) {
    final List<SAMRecord> out = new ArrayList<>();

    if (iterator.hasNext()) {
        final SAMRecord first = iterator.next();
        out.add(first);

        while (iterator.hasNext() && iterator.peek().getReadName().equals(first.getReadName())) {
            out.add(iterator.next());
        }
    }

    return out;
}
 
Example #18
Source File: CollectSamErrorMetrics.java    From picard with MIT License 5 votes vote down vote up
/**
 * Initialize the source for {@link VariantContext} information.
 * If running in {@link #INTERVAL_ITERATOR} mode, will initialize the {@link #vcfFileReader}.
 * Otherwise, will initialize the {@link #vcfIterator}.
 */
private void initializeVcfDataSource() throws IOException {
    if ( INTERVAL_ITERATOR ) {
        vcfFileReader = new VCFFileReader(IOUtil.getPath(VCF), false);
        // Make sure we can query our file for interval mode:
        if (!vcfFileReader.isQueryable()) {
            throw new PicardException("Cannot query VCF File!  VCF Files must be queryable!  Please index input VCF and re-run.");
        }
    }
    else {
        vcfIterator = new PeekableIterator<>(
                VCF == null ? Collections.emptyIterator() : new VCFFileReader(IOUtil.getPath(VCF), false).iterator());
    }
}
 
Example #19
Source File: CreateSnpIntervalFromVcf.java    From Drop-seq with MIT License 5 votes vote down vote up
private void validateRequestedSamples (final PeekableIterator<VariantContext> iterator, final Set<String> sample) {
	if (iterator.hasNext()) {
		final VariantContext site = iterator.peek();
		Set<String> vCFSamples = site.getSampleNames();
		for (String s: sample)
			if (!vCFSamples.contains(s))
				throw new IllegalArgumentException("VCF doesn't have the requested sample " + s);
	}
}
 
Example #20
Source File: PairedVariantSubContextIterator.java    From picard with MIT License 5 votes vote down vote up
public PairedVariantSubContextIterator(final Iterator<VariantContext> leftIterator, final String leftSample,
                                final Iterator<VariantContext> rightIterator, final String rightSample,
                                final SAMSequenceDictionary dict) {
    this.leftIterator  = new PeekableIterator<>(leftIterator);
    this.leftSample    = leftSample;
    this.rightIterator = new PeekableIterator<>(rightIterator);
    this.rightSample   = rightSample;
    this.comparator    = new VariantContextComparator(dict);
}
 
Example #21
Source File: CompareDropSeqAlignments.java    From Drop-seq with MIT License 5 votes vote down vote up
private PeekableIterator<List<SAMRecord>> getReadIterator (final List<File> bamFile, final Integer readQuality, boolean removedUnmappedReads) {
       Iterator<SAMRecord> iter = getQueryNameSortedData(bamFile);
	// filter out unmapped reads.
	if (removedUnmappedReads) iter = new UnmappedReadFilter(iter);
	// optionally, filter out reads below a map quality threshold.
	if (readQuality!=null) iter = new MapQualityFilteredIterator(iter, readQuality, false).iterator();
	final GroupingIterator<SAMRecord> groupingIterator = new GroupingIterator<>(iter, READ_NAME_COMPARATOR);
	PeekableIterator<List<SAMRecord>> peekable = new PeekableIterator<>(groupingIterator);
	return peekable;
}
 
Example #22
Source File: FilterBamByTag.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 *
 * @param in
 * @param out
 * @param values
 */
void processPairedMode (final SamReader in, final SAMFileWriter out, final Set<String> values, final File summaryFile, Integer mapQuality, boolean allowPartial) {
	ProgressLogger progLog = new ProgressLogger(log);
	FilteredReadsMetric m = new FilteredReadsMetric();
	
	PeekableIterator<SAMRecord> iter = new PeekableIterator<>(CustomBAMIterators.getQuerynameSortedRecords(in));
	while (iter.hasNext()) {
		SAMRecord r1 = iter.next();
		progLog.record(r1);
		boolean filterFlag1 = filterRead(r1, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
		
		SAMRecord r2 = null;
		if (iter.hasNext()) r2 = iter.peek();
		// check for r2 being null in case the last read is unpaired.
		if (r2!=null && r1.getReadName().equals(r2.getReadName())) {
			// paired read found.
			progLog.record(r2);
			r2=iter.next();
			boolean filterFlag2 = filterRead(r2, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
			// if in accept tag mode, if either read shouldn't be filterd accept the pair
			// if in reject mode, if both reads shouldn't be filtered to accept the pair.
			if ((!filterFlag1 || !filterFlag2 & this.ACCEPT_TAG) || (!filterFlag1 && !filterFlag2 & !this.ACCEPT_TAG)) {
				out.addAlignment(r1);
				out.addAlignment(r2);
				m.READS_ACCEPTED+=2;
			} else 
				m.READS_REJECTED+=2;
		} else if (!filterFlag1) {
			out.addAlignment(r1);
			m.READS_ACCEPTED++;
		} else {
			m.READS_REJECTED++;
		}
	}
	CloserUtil.close(in);
	writeSummary(summaryFile, m);
	out.close();
	reportAndCheckFilterResults(m);
}
 
Example #23
Source File: ComputeUMISharing.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    // Make sure this is modifiable
    EDIT_DISTANCE = new ArrayList<>(EDIT_DISTANCE);
    while (EDIT_DISTANCE.size() < COUNT_TAG.size()) {
        EDIT_DISTANCE.add(0);
    }
    parentEditDistanceMatcher = new ParentEditDistanceMatcher(this.COUNT_TAG, this.EDIT_DISTANCE, this.FIND_INDELS, this.NUM_THREADS);

    SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
    final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting");
    Iterator<SAMRecord> iter = reader.iterator();
    if (LOCUS_FUNCTION_LIST.size() > 0) {
        iter = new GeneFunctionIteratorWrapper(iter, this.GENE_NAME_TAG,
                this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, false, this.STRAND_STRATEGY,
                this.LOCUS_FUNCTION_LIST);
    }

    CloseableIterator<SAMRecord> sortedIter = SortingIteratorFactory.create(SAMRecord.class, iter,
            PARENT_CHILD_COMPARATOR, new BAMRecordCodec(reader.getFileHeader()), MAX_RECORDS_IN_RAM,
            (SortingIteratorFactory.ProgressCallback<SAMRecord>) progressLogger::record);

    PeekableIterator<List<SAMRecord>> subgroupIterator =
            new PeekableIterator<List<SAMRecord>>(new GroupingIterator<SAMRecord>(
                    new ProgressLoggingIterator(sortedIter, new ProgressLogger(log, 1000000, "Grouping")),
                    GROUPING_COMPARATOR));

    MetricsFile<UmiSharingMetrics, Integer> outFile = getMetricsFile();
    List<SAMRecord> parentSubgroup = null;
    Set<TagValues> parentTuples = new HashSet<>();

    while (subgroupIterator.hasNext()) {
        if (parentSubgroup == null ||
        !parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).equals(subgroupIterator.peek().get(0).getAttribute(COLLAPSE_TAG))) {
            parentSubgroup = subgroupIterator.next();
            parentTuples = parentEditDistanceMatcher.getValues(parentSubgroup);                
        } else {                
            final List<SAMRecord> childSubgroup = subgroupIterator.next();
            final Set<TagValues> childTuples = parentEditDistanceMatcher.getValues(childSubgroup);                
            final UmiSharingMetrics metrics = new UmiSharingMetrics();
            metrics.PARENT = parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).toString();
            metrics.CHILD = childSubgroup.get(0).getAttribute(UNCOLLAPSED_TAG).toString();
            metrics.NUM_PARENT = parentTuples.size();
            metrics.NUM_CHILD = childTuples.size();
            metrics.NUM_SHARED = parentEditDistanceMatcher.computeNumShared(parentTuples, childTuples);
            metrics.FRAC_SHARED = metrics.NUM_SHARED/(double)metrics.NUM_CHILD;
            outFile.addMetric(metrics);
        }
    }
    BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
    outFile.write(w);
    try {
        w.close();
    } catch (IOException e) {
        throw new RuntimeIOException("Problem writing " + OUTPUT.getAbsolutePath(), e);
    }
    CloserUtil.close(reader);
    return 0;
}
 
Example #24
Source File: DetectBeadSynthesisErrors.java    From Drop-seq with MIT License 4 votes vote down vote up
/**
 * Find all the cell barcodes that are biased.
 *
 * This walks through many (perhaps all?) of the cell barcodes, and for cell barcodes that have a sufficient number of UMIs, looks to see if there's an error
 * @param iter The BAM iterator to walk through
 * @param out the verbose output stream.
 * @param outSummary The summary output stream.
 *
 * TODO: A less memory-hog version of this would write out the summary file as it runs.  Could even write this out to a SortingIteratorFactory by implementing a codec...
 * Only need to hang onto errors that are SYNTH_MISSING_BASE, and leave the rest null (and check for that when running barcode repair.)
 *
 * @return A collection of biased cell barcodes.
 */
private BiasedBarcodeCollection findBiasedBarcodes (final UMIIterator iter, final PrintStream out, final File outSummary, final Integer lastUMIBase) {
	log.info("Finding Cell Barcodes with UMI errors");
	// Group the stream of UMICollections into groups with the same cell barcode.
       GroupingIterator<UMICollection> groupingIterator = new GroupingIterator<>(iter,
               new Comparator<UMICollection>() {
                   @Override
                   public int compare(final UMICollection o1, final UMICollection o2) {
                       return o1.getCellBarcode().compareTo(o2.getCellBarcode());
                   }
               });


	// for holding barcodes results.  The key is the cell barcode, the value is the first base to pad.
	// Used for cleanup of BAMs.
	Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions = new HashMap<>();

		// for holding UMI Strings efficiently
	// TODO: evaluate if this is needed this anymore now that the report is written out disk immediately.
	StringInterner  umiStringCache = new StringInterner();

	// a sorting collection so big data can spill to disk before it's sorted and written out as a report.
	SortingCollection<BeadSynthesisErrorData> sortingCollection= SortingCollection.newInstance(BeadSynthesisErrorData.class, new BeadSynthesisErrorDataCodec(), new BeadSynthesisErrorData.SizeComparator(), this.MAX_BARCODE_ERRORS_IN_RAM);

       // gather up summary stats
    	BeadSynthesisErrorsSummaryMetric summary = new BeadSynthesisErrorsSummaryMetric();

    	// track the list of cell barcodes with sufficient UMIs to process.  We'll need them later to find intended sequences.
    	ObjectCounter<String> umisPerCellBarcode = new ObjectCounter<>();

    	// track the UMI Bias at the last base
    	Map<String, Double> umiBias = new HashMap<>();

    	// a log for processing
    	ProgressLogger prog = new ProgressLogger(log, 1000000, "Processed Cell/Gene UMIs");

    	// main data generation loop.
    	// to ease memory usage, after generating the BeadSynthesisErrorData object, use its cell barcode string for registering additional data.
       for (final List<UMICollection> umiCollectionList : groupingIterator) {
           BeadSynthesisErrorData bsed = buildBeadSynthesisErrorData(umiCollectionList, umiStringCache, prog);
           // if the cell has too few UMIs, then go to the next cell and skip all processing.
           if (bsed.getNumTranscripts() < this.MIN_UMIS_PER_CELL)
			// not sure I even want to track this...
           	// summary.LOW_UMI_COUNT++;
           	continue;

           // add the result to the summary
           summary=addDataToSummary(bsed, summary);
           umisPerCellBarcode.incrementByCount(bsed.getCellBarcode(), bsed.getNumTranscripts()); // track the cell barcode if it's sufficiently large to process.

           // explicitly call getting the error type.
           BeadSynthesisErrorType errorType=bsed.getErrorType(this.EXTREME_BASE_RATIO, this.detectPrimerTool, this.EDIT_DISTANCE);

           // gather up the UMI bias at the last base.  Note: this can be over-ridden by supplying a last base position.
           double barcodeUMIBias = bsed.getPolyTFrequencyLastBase();
           if (lastUMIBase!=null)  {
           	// bounds check
           	double freqs [] = bsed.getPolyTFrequency();
           	if (lastUMIBase>freqs.length)
           		throw new IllegalArgumentException("Trying to override UMI last base position with ["+ lastUMIBase +"] but UMI length is [" + freqs.length +"]");
           	barcodeUMIBias=freqs[lastUMIBase-1];
           }

           umiBias.put(bsed.getCellBarcode(), barcodeUMIBias);

           // finalize object so it uses less memory.
           bsed.finalize();
           // only add to the collection if you have UMIs and a repairable error.
           if (bsed.getUMICount()>=this.MIN_UMIS_PER_CELL && errorType==BeadSynthesisErrorType.SYNTH_MISSING_BASE)
           	errorBarcodesWithPositions.put(bsed.getCellBarcode(), bsed);

           // add to sorting collection if you have enough UMIs.
           sortingCollection.add(bsed);
       }

       PeekableIterator<BeadSynthesisErrorData> bsedIter = new PeekableIterator<>(sortingCollection.iterator());

       log.info("Writing Biased UMI reports");
       // write out the records from the sorting collection.
       writeFile(bsedIter, out);
       // write out the summary
       writeSummary(summary, outSummary);
       CloserUtil.close(bsedIter);

       // the error barcodes we want to fix.
       BiasedBarcodeCollection result = new BiasedBarcodeCollection(errorBarcodesWithPositions, umisPerCellBarcode, umiBias);
       return result;
}
 
Example #25
Source File: MultiCellDigitalAlleleCountsIterator.java    From Drop-seq with MIT License 4 votes vote down vote up
public MultiCellDigitalAlleleCountsIterator (DigitalAlleleCountsIterator iter) {
	this.iter=new PeekableIterator<DigitalAlleleCounts>(iter);
}
 
Example #26
Source File: QueryNameJointIterator.java    From Drop-seq with MIT License 4 votes vote down vote up
public QueryNameJointIterator(final PeekableIterator<List<SAMRecord>> iterOne, final PeekableIterator<List<SAMRecord>> iterTwo) {
	this.comp = new SAMRecordQueryNameComparator();
	this.iterOne = iterOne;
	this.iterTwo = iterTwo;
	getNextSet();
}
 
Example #27
Source File: CompareDropSeqAlignments.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
	ObjectCounter<ContigResult> contigResults = null;
	// key = original gene name, value = collection of mappings.
	// Map<String, GeneResult> geneResults = null;
	INPUT_1=FileListParsingUtils.expandFileList(INPUT_1);
	INPUT_2=FileListParsingUtils.expandFileList(INPUT_2);
	INPUT_1.stream().forEach(x-> IOUtil.assertFileIsReadable(x));
	INPUT_2.stream().forEach(x-> IOUtil.assertFileIsReadable(x));
	// if (this.CONTIG_REPORT==null && this.GENE_REPORT==null) log.error ("What's the point of running me if there's no output defined?");
	if (this.CONTIG_REPORT!=null) {
		IOUtil.assertFileIsWritable(this.CONTIG_REPORT);
		contigResults = new ObjectCounter<>();
	}
	
	PrintStream readWriter=null;
	if (this.READ_REPORT!=null) {
		IOUtil.assertFileIsWritable(this.READ_REPORT);
		readWriter = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.READ_REPORT));	
		writeReadHeader(this.TAG_NAMES, readWriter);
	}
	
	/*
	if (this.GENE_REPORT!=null) {
		IOUtil.assertFileIsWritable(this.GENE_REPORT);
		if (this.GENE_EXON_TAG==null) {
			log.error("If GENE_REPORT is specified, a GENE_EXON_TAG must also be specified");
			return 1;
		}

		geneResults = new HashMap<>();
	}
	*/
	
	log.info("Reading INPUT_1");
	PeekableIterator<List<SAMRecord>> oIter = getReadIterator(this.INPUT_1, this.READ_QUALITY, true);
	log.info("Reading INPUT_2");
	PeekableIterator<List<SAMRecord>> nIter = getReadIterator(this.INPUT_2, null, false);

	QueryNameJointIterator qnji = new QueryNameJointIterator(oIter, nIter);

	int counter =0;
	while (qnji.hasNext()) {
		if (counter%1000000==0) log.info("Number of reads processed [" + counter + "]");
		JointResult jr = qnji.next();
		counter++;

		List<SAMRecord> r1 = jr.getOne();
		List<SAMRecord> r2 = jr.getTwo();

		if (contigResults!=null) {
			ContigResult cr = evaluateByContig(r1, r2);
			contigResults.increment(cr);
		}
		if (readWriter!=null) {
			writeReadReportLine(r1, r2, this.TAG_NAMES, readWriter);
		}
		/*
		if (geneResults!=null)
			geneResults=evaluateByGene(r1, r2, geneResults, this.GENE_EXON_TAG);
		*/

	}
	log.info("Number of reads processed [" + counter + "]");

	if (this.CONTIG_REPORT!=null) writeContigReport(this.CONTIG_REPORT, contigResults);
	if (readWriter!=null) readWriter.close();
	
	// if (this.GENE_REPORT!=null) writeGeneReport (this.GENE_REPORT, geneResults);
	return 0;
}
 
Example #28
Source File: AbstractConcordanceWalker.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
protected ConcordanceIterator(final  Iterator<VariantContext> truthIterator, final Iterator<VariantContext> evalIterator) {
    this.truthIterator = new PeekableIterator<>(truthIterator);
    this.evalIterator = new PeekableIterator<>(evalIterator);
}
 
Example #29
Source File: PeekableGroupingIterator.java    From Drop-seq with MIT License 4 votes vote down vote up
public PeekableGroupingIterator(final Iterator<T> underlyingIterator, final Comparator<T> comparator) {
	this.underlyingIterator = new PeekableIterator<>(underlyingIterator);
	this.comparator = comparator;
	this.inGroup=false;
}
 
Example #30
Source File: GroupingIterator.java    From Drop-seq with MIT License 4 votes vote down vote up
public GroupingIterator(Iterator<T> underlyingIterator, Comparator<T> comparator) {
    this.underlyingIterator = new PeekableIterator<>(underlyingIterator);
    this.comparator = comparator;
}