Java Code Examples for htsjdk.samtools.SAMRecord#getDuplicateReadFlag()

The following examples show how to use htsjdk.samtools.SAMRecord#getDuplicateReadFlag() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BamTagHistogram.java    From Drop-seq with MIT License 6 votes vote down vote up
public ObjectCounter<String> getBamTagCounts (final Iterator<SAMRecord> iterator, final String tag, final int readQuality, final boolean filterPCRDuplicates) {
    ProgressLogger pl = new ProgressLogger(log, 10000000);

    ObjectCounter<String> counter = new ObjectCounter<>();

    for (final SAMRecord r : new IterableAdapter<>(iterator)) {
        pl.record(r);
        if (filterPCRDuplicates && r.getDuplicateReadFlag()) continue;
        if (r.getMappingQuality()<readQuality) continue;
        if (r.isSecondaryOrSupplementary()) continue;
        //String s1 = r.getStringAttribute(tag);
        String s1 = getAnyTagAsString(r, tag);
        if (s1!=null && s1!="") counter.increment(s1); // if the tag doesn't have a value, don't increment it.

    }
    return (counter);
}
 
Example 2
Source File: UnmarkDuplicatesIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static int getDuplicateCountForBam(final File bam, final File referenceFile) throws IOException {
    int duplicateCount = 0;
    final SamReaderFactory factory = SamReaderFactory.makeDefault();
    if(referenceFile != null) {
        factory.referenceSequence(referenceFile);
    }
    try ( final SamReader reader = factory.open(bam) ) {
        for ( SAMRecord read : reader ) {
            if ( read.getDuplicateReadFlag() ) {
                ++duplicateCount;
            }
        }
    }

    return duplicateCount;
}
 
Example 3
Source File: ReadQualityMetrics.java    From Drop-seq with MIT License 6 votes vote down vote up
public void addRead (SAMRecord r) {
	// skip secondary of supplemental reads.
	if (r.isSecondaryOrSupplementary()) {
		return;
	}
	
	boolean isDupe = r.getDuplicateReadFlag();
	int mapQuality = r.getMappingQuality();
	boolean unmapped = r.getReadUnmappedFlag();
	if (histogram!=null) {
		histogram.increment(mapQuality);
	}
	
	totalReads++;
	if (!unmapped) {
		mappedReads++;
		if (mapQuality >= this.mapQuality) {
			hqMappedReads++;
			if (!isDupe) {
				hqMappedReadsNoPCRDupes++;					
			}
		}
	}
}
 
Example 4
Source File: SpermSeqMarkDuplicatesTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
// tests which reads are marked as duplicates by the read position strategy.
public void testDetectDuplicatesByReadPositionStrategy() throws IOException {
	String [] duplicateReadNames={"READ1:2", "READ2:3"};
	Set<String> dupes = new HashSet<String>(Arrays.asList(duplicateReadNames));

	SpermSeqMarkDuplicates d = new SpermSeqMarkDuplicates();
	d.INPUT=Arrays.asList(INPUT);
	d.OUTPUT=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".bam");
	d.OUTPUT.deleteOnExit();
	d.OUTPUT_STATS=File.createTempFile("testDetectDuplicatesByReadPositionStrategy.", ".pcr_duplicate_metrics");
	d.OUTPUT_STATS.deleteOnExit();
	Assert.assertEquals(0, d.doWork());

	SamReader inputSam = SamReaderFactory.makeDefault().open(d.OUTPUT);
	for (SAMRecord r: inputSam) {
		boolean duplicateReadFlag = r.getDuplicateReadFlag();
		String readName = r.getReadName();
		if (dupes.contains(readName))
			Assert.assertTrue(duplicateReadFlag);
		else
			Assert.assertFalse(duplicateReadFlag);
	}
	final List<SpermSeqMarkDuplicates.PCRDuplicateMetrics> beans = MetricsFile.readBeans(d.OUTPUT_STATS);
	Assert.assertEquals(1, beans.size());
	Assert.assertEquals(dupes.size(), beans.get(0).NUM_DUPLICATES);
}
 
Example 5
Source File: BamSlicer.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private boolean passesFilters(@NotNull final SAMRecord record)
{
    if(record.getMappingQuality() < mMinMappingQuality || record.getReadUnmappedFlag())
        return false;

    if(mDropSupplementaries && (record.getSupplementaryAlignmentFlag() || record.isSecondaryAlignment()))
        return false;

    if(mDropDuplicates && record.getDuplicateReadFlag())
        return false;

    return true;
}
 
Example 6
Source File: GcBiasMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
@Override
public void acceptRecord(final GcBiasCollectorArgs args) {
    final SAMRecord rec = args.getRec();
    if (logCounter < 100 && rec.getReadBases().length == 0) {
        log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field.");
        if (++logCounter == 100) {
            log.warn("There are more than 100 reads with '*' in SEQ field in file.");
        }
        return;
    }
    if (!rec.getReadUnmappedFlag()) {
        if (referenceIndex != rec.getReferenceIndex() || gc == null) {
            final ReferenceSequence ref = args.getRef();
            refBases = ref.getBases();
            StringUtil.toUpperCase(refBases);
            final int refLength = refBases.length;
            final int lastWindowStart = refLength - scanWindowSize;
            gc = GcBiasUtils.calculateAllGcs(refBases, lastWindowStart, scanWindowSize);
            referenceIndex = rec.getReferenceIndex();
        }

        addReadToGcData(rec, this.gcData);
        if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
            addReadToGcData(rec, this.gcDataNonDups);
        }
    } else {
        updateTotalClusters(rec, this.gcData);
        if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
            updateTotalClusters(rec, this.gcDataNonDups);
        }
    }
}
 
Example 7
Source File: InsertSizeMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
@Override
public void acceptRecord(final SAMRecord record, final ReferenceSequence refSeq) {
    if (!record.getReadPairedFlag() ||
            record.getReadUnmappedFlag() ||
            record.getMateUnmappedFlag() ||
            record.getFirstOfPairFlag() ||
            record.isSecondaryOrSupplementary() ||
            (record.getDuplicateReadFlag() && !this.includeDuplicates) ||
            record.getInferredInsertSize() == 0) {
        return;
    }

    super.acceptRecord(record, refSeq);
}
 
Example 8
Source File: CollectDuplicateMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {

    final DuplicationMetrics metrics = AbstractMarkDuplicatesCommandLineProgram.addReadToLibraryMetrics(rec, header, libraryIdGenerator);
    final boolean isDuplicate = rec.getDuplicateReadFlag();

    if (isDuplicate) {
        AbstractMarkDuplicatesCommandLineProgram.addDuplicateReadToMetrics(rec, metrics);
    }
}
 
Example 9
Source File: MultiSamReader.java    From abra2 with MIT License 5 votes vote down vote up
private boolean shouldAssemble(SAMRecord read) {
	return (
		(!read.getDuplicateReadFlag()) && 
		(!read.getReadFailsVendorQualityCheckFlag()) &&
		read.getReadLength() > 0 &&
		(read.getMappingQuality() >= this.minMapqForAssembly || read.getReadUnmappedFlag()) &&
		SAMRecordUtils.isPrimary(read));  // Was previously an id check, so supplemental / secondary alignments could be included
}
 
Example 10
Source File: MNVRegionValidator.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
private boolean isEligible(@NotNull final SAMRecord record) {
    return record.getMappingQuality() >= MIN_MAPPING_QUALITY && !(record.getReadUnmappedFlag() || record.getDuplicateReadFlag()
            || record.isSecondaryOrSupplementary());
}
 
Example 11
Source File: MarkDuplicatesTagRepresentativeReadIndexTester.java    From picard with MIT License 4 votes vote down vote up
@Override
public void test() {
    try {
        updateExpectedDuplicationMetrics();
        // Read the output and check the duplicate flag
        int outputRecords = 0;
        int indexInFile = 0;
        final SamReader reader = SamReaderFactory.makeDefault().open(getOutput());
        for (final SAMRecord record : reader) {
            outputRecords++;

            final String key = samRecordToDuplicatesFlagsKey(record);
            Assert.assertTrue(this.duplicateFlags.containsKey(key),"DOES NOT CONTAIN KEY: " + key);
            final boolean value = this.duplicateFlags.get(key);
            this.duplicateFlags.remove(key);
            if (value != record.getDuplicateReadFlag()) {
                System.err.println("Mismatching read:");
                System.err.print(record.getSAMString());
            }
            Assert.assertEquals(record.getDuplicateReadFlag(), value);
            if (testRepresentativeReads) {
                if (expectedRepresentativeIndexMap.containsKey(indexInFile) && expectedSetSizeMap.containsKey(record.getReadName())){
                    Assert.assertEquals(record.getAttribute("DI"), expectedRepresentativeIndexMap.get(indexInFile));
                    Assert.assertEquals(record.getAttribute("DS"), expectedSetSizeMap.get(record.getReadName()));
                }
            }
            indexInFile+=1;
        }
        CloserUtil.close(reader);

        // Ensure the program output the same number of records as were read in
        Assert.assertEquals(outputRecords, this.getNumberOfRecords(), ("saw " + outputRecords + " output records, vs. " + this.getNumberOfRecords() + " input records"));

        // Check the values written to metrics.txt against our input expectations
        final MetricsFile<DuplicationMetrics, Comparable<?>> metricsOutput = new MetricsFile<>();
        try{
            metricsOutput.read(new FileReader(metricsFile));
        }
        catch (final FileNotFoundException ex) {
            throw new PicardException("Metrics file not found: " + ex);
        }
        final List<DuplicationMetrics> g = metricsOutput.getMetrics();
        // expect getMetrics to return a collection with a single duplicateMetrics object
        Assert.assertEquals(metricsOutput.getMetrics().size(), 1);
        final DuplicationMetrics observedMetrics = metricsOutput.getMetrics().get(0);
        Assert.assertEquals(observedMetrics.UNPAIRED_READS_EXAMINED, expectedMetrics.UNPAIRED_READS_EXAMINED, "UNPAIRED_READS_EXAMINED does not match expected");
        Assert.assertEquals(observedMetrics.READ_PAIRS_EXAMINED, expectedMetrics.READ_PAIRS_EXAMINED, "READ_PAIRS_EXAMINED does not match expected");
        Assert.assertEquals(observedMetrics.UNMAPPED_READS, expectedMetrics.UNMAPPED_READS, "UNMAPPED_READS does not match expected");
        Assert.assertEquals(observedMetrics.UNPAIRED_READ_DUPLICATES, expectedMetrics.UNPAIRED_READ_DUPLICATES, "UNPAIRED_READ_DUPLICATES does not match expected");
        Assert.assertEquals(observedMetrics.READ_PAIR_DUPLICATES, expectedMetrics.READ_PAIR_DUPLICATES, "READ_PAIR_DUPLICATES does not match expected");
        Assert.assertEquals(observedMetrics.READ_PAIR_OPTICAL_DUPLICATES, expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match expected");
        Assert.assertEquals(observedMetrics.PERCENT_DUPLICATION, expectedMetrics.PERCENT_DUPLICATION, "PERCENT_DUPLICATION does not match expected");
        Assert.assertEquals(observedMetrics.ESTIMATED_LIBRARY_SIZE, expectedMetrics.ESTIMATED_LIBRARY_SIZE, "ESTIMATED_LIBRARY_SIZE does not match expected");
        Assert.assertEquals(observedMetrics.SECONDARY_OR_SUPPLEMENTARY_RDS, expectedMetrics.SECONDARY_OR_SUPPLEMENTARY_RDS, "SECONDARY_OR_SUPPLEMENTARY_RDS does not match expected");
    } finally {
        IOUtil.recursiveDelete(getOutputDir().toPath());
    }
}
 
Example 12
Source File: CountingDuplicateFilter.java    From picard with MIT License 4 votes vote down vote up
@Override
public boolean reallyFilterOut(final SAMRecord record) { return record.getDuplicateReadFlag(); }
 
Example 13
Source File: SomaticLocusCaller.java    From abra2 with MIT License 4 votes vote down vote up
private Counts getCounts(SamReader reader, LocusInfo locus) {
		
		int depth = 0;
		int altCount = 0;
		int refCount = 0;
		
		CloseableIterator<SAMRecord> iter = reader.queryOverlapping(locus.chromosome, locus.posStart, locus.posStop);
		while (iter.hasNext()) {
			SAMRecord read = iter.next();
			if (!read.getDuplicateReadFlag()) {
				depth += 1;
				
				Object[] baseAndQual = getBaseAndQualAtPosition(read, locus.posStart);
				Character base = (Character) baseAndQual[0];
				int baseQual = (Integer) baseAndQual[1];
				Character refBase = c2r.getSequence(locus.chromosome, locus.posStart, 1).charAt(0);
				
				// Override input with actual reference
//				locus.ref = new String(new char[] { refBase });
				locus.actualRef = new String(new char[] { refBase });
				
				if (locus.isIndel()) {
					if (hasIndel(read, locus)) {
						altCount += 1;
					} else if (!base.equals('N') && base.equals(refBase)) {
						refCount += 1;
					}
				} else if (baseQual >= minBaseQual) {
					
					if (!base.equals('N') && base.equals(refBase)) {
						refCount += 1;
					} else if (base.equals(locus.alt.charAt(0))) {
						altCount += 1;
					}
				}
			}
		}
		
		iter.close();
		
		return new Counts(refCount, altCount, depth);
	}
 
Example 14
Source File: BamTagOfTagCounts.java    From Drop-seq with MIT License 4 votes vote down vote up
public TagOfTagResults<String,String> getResults (final File inputBAM, final String primaryTag, final String secondaryTag, final boolean filterPCRDuplicates, final Integer readQuality) {
	TagOfTagResults<String,String> result = new TagOfTagResults<>();
	SamReader reader = SamReaderFactory.makeDefault().open(inputBAM);
	CloseableIterator<SAMRecord> iter = CustomBAMIterators.getReadsInTagOrder(reader, primaryTag);
	CloserUtil.close(reader);
	String currentTag="";
	Set<String> otherTagCollection=new HashSet<>();

	ProgressLogger progress = new ProgressLogger(log);

	while (iter.hasNext()) {
		SAMRecord r = iter.next();
		progress.record(r);
		// skip reads that don't pass filters.
		boolean discardResult=(filterPCRDuplicates && r.getDuplicateReadFlag()) || r.getMappingQuality()<readQuality || r.isSecondaryOrSupplementary();
		// short circuit if read is below the quality to be considered.
		if (discardResult) continue;

		Object d = r.getAttribute(secondaryTag);
		// short circuit if there's no tag for this read.
		if (d==null) continue;

		String data=null;

		if (d instanceof String)
			data=r.getStringAttribute(secondaryTag);
		else if (d instanceof Integer)
			data=Integer.toString(r.getIntegerAttribute(secondaryTag));


		String newTag = r.getStringAttribute(primaryTag);
		if (newTag==null)
			newTag="";

		// if you see a new tag.
		if (!currentTag.equals(newTag)) {
			// write out tag results, if any.
			if (!currentTag.equals("") && otherTagCollection.size()>0)
				result.addEntries(currentTag, otherTagCollection);
			currentTag=newTag;
			otherTagCollection.clear();
			if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection);

		} else // gather stats
		if (!discardResult) otherTagCollection=addTagToCollection (data,otherTagCollection);
	}
	if (otherTagCollection.size()>0)
		result.addEntries(currentTag, otherTagCollection);

	return (result);
}
 
Example 15
Source File: ChromosomeReadCount.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
private boolean isEligible(@NotNull SAMRecord record) {
    return record.getMappingQuality() >= minMappingQuality && !(record.getReadUnmappedFlag() || record.getDuplicateReadFlag()
            || record.isSecondaryOrSupplementary());
}
 
Example 16
Source File: SamSlicer.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
private boolean samRecordMeetsQualityRequirements(@NotNull final SAMRecord record) {
    return record.getMappingQuality() >= minMappingQuality && !record.getReadUnmappedFlag() && !record.getDuplicateReadFlag() && !record
            .isSecondaryOrSupplementary();
}
 
Example 17
Source File: SAMSlicer.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
private boolean samRecordMeetsQualityRequirements(@NotNull final SAMRecord record) {
    return record.getMappingQuality() >= minMappingQuality && !record.getReadUnmappedFlag()
            && (!record.getDuplicateReadFlag() || !dropDuplicates) && !record.isSecondaryOrSupplementary();
}
 
Example 18
Source File: DuplicateReadTracker.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
public boolean checkDuplicates(final SAMRecord record)
{
    if(record.getDuplicateReadFlag())
        return true;

    if(!mMarkDuplicates)
        return false;

    if(mDuplicateReadIds.contains(record.getReadName()))
    {
        mDuplicateReadIds.remove(record.getReadName());
        return true;
    }

    if(!record.getReferenceName().equals(record.getMateReferenceName()) || record.getReadNegativeStrandFlag() == record.getMateNegativeStrandFlag())
        return false;

    int firstStartPos = record.getFirstOfPairFlag() ? record.getStart() : record.getMateAlignmentStart();
    int secondStartPos = record.getFirstOfPairFlag() ? record.getMateAlignmentStart() : record.getStart();
    int readLength = record.getReadLength();
    int insertSize = record.getInferredInsertSize();

    List<int[]> dupDataList = mDuplicateCache.get(firstStartPos);

    if(dupDataList == null)
    {
        dupDataList = Lists.newArrayList();
        mDuplicateCache.put(firstStartPos, dupDataList);
    }
    else
    {
        // search for a match
        if(dupDataList.stream().anyMatch(x -> x[DUP_DATA_SECOND_START] == secondStartPos
                && x[DUP_DATA_READ_LEN] == readLength && insertSize == x[DUP_DATA_INSERT_SIZE]))
        {
            ISF_LOGGER.trace("duplicate fragment: id({}) chr({}) pos({}->{}) otherReadStart({}) insertSize({})",
                    record.getReadName(), record.getReferenceName(), firstStartPos, record.getEnd(), secondStartPos, insertSize);

            // cache so the second read can be identified immediately
            mDuplicateReadIds.add(record.getReadName());
            return true;
        }
    }

    int[] dupData = {secondStartPos, readLength, insertSize};
    dupDataList.add(dupData);

    return false;
}
 
Example 19
Source File: FilterBam.java    From Drop-seq with MIT License 4 votes vote down vote up
private boolean rejectPCRDuplicate (final SAMRecord r) {
	if (this.FILTER_PCR_DUPES && r.getDuplicateReadFlag()) return (true);
	return (false);
}
 
Example 20
Source File: PCRDuplicateFilteringIterator.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
public boolean filterOut(final SAMRecord rec) {
	return rec.getDuplicateReadFlag();

}