Java Code Examples for htsjdk.samtools.util.SequenceUtil#isNoCall()

The following examples show how to use htsjdk.samtools.util.SequenceUtil#isNoCall() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReadBaseStratification.java    From picard with MIT License 6 votes vote down vote up
@Override
public Integer stratify(final SAMRecord sam) {
    int numberOfNsInRead = 0;

    // Check to see if we've already seen this record before.  If not, count the number
    // of Ns in the read, and store it in a transient attribute.  If we have seen it
    // before, simply retrieve the value and avoid doing the computation again.
    if (sam.getTransientAttribute(numberOfNsTag) != null) {
        numberOfNsInRead = (Integer) sam.getTransientAttribute(numberOfNsTag);
    } else {
        final byte[] bases = sam.getReadBases();
        for (final byte base : bases) {
            if (SequenceUtil.isNoCall(base)) {
                numberOfNsInRead++;
            }
        }
        sam.setTransientAttribute(numberOfNsTag, numberOfNsInRead);
    }
    return numberOfNsInRead;
}
 
Example 2
Source File: QualityScoreDistributionSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Adds a read to this count object by increasing counts of the base qualities.
 */
Counts addRead(final GATKRead read) {
    final byte[] bases = read.getBases();
    final byte[] quals = read.getBaseQualities();
    final byte[] oq    = ReadUtils.getOriginalBaseQualities(read);

    final int length = quals.length;

    for (int i=0; i<length; ++i) {
        if (includeNoCalls || !SequenceUtil.isNoCall(bases[i])) {
            qCounts[quals[i]]++;
            if (oq != null) {
                oqCounts[oq[i]]++;
            }
        }
    }
    return this;
}
 
Example 3
Source File: QualityScoreDistribution.java    From picard with MIT License 6 votes vote down vote up
@Override
protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
    // Skip unwanted records
    if (PF_READS_ONLY && rec.getReadFailsVendorQualityCheckFlag()) return;
    if (ALIGNED_READS_ONLY && rec.getReadUnmappedFlag()) return;
    if (rec.isSecondaryOrSupplementary()) return;

    final byte[] bases = rec.getReadBases();
    final byte[] quals = rec.getBaseQualities();
    final byte[] oq    = rec.getOriginalBaseQualities();

    final int length = quals.length;

    for (int i=0; i<length; ++i) {
        if (INCLUDE_NO_CALLS || !SequenceUtil.isNoCall(bases[i])) {
            qCounts[quals[i]]++;
            if (oq != null) oqCounts[oq[i]]++;
        }
    }
}
 
Example 4
Source File: SingleBarcodeDistanceMetric.java    From picard with MIT License 6 votes vote down vote up
public int hammingDistance() {
    int numMismatches = 0;
    for (int i = 0; i < barcodeBases.length && i < readBases.length && numMismatches <= maximalInterestingDistance; ++i) {

        if (SequenceUtil.isNoCall(readBases[i])) {
            continue;
        }

        if (!SequenceUtil.basesEqual(barcodeBases[i], readBases[i])) {
            ++numMismatches;
            continue;
        }

        // bases are equal but if quality is low we still penalize.
        // TODO: is it actually useful to penalize barcodes in this way?
        if (readQualities != null && readQualities[i] < minimumBaseQuality) {
            ++numMismatches;
        }
    }
    return numMismatches;
}
 
Example 5
Source File: TrimSequenceTemplate.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * Does the testString have part (or all) of the template in it?
 * The test string starting at position 1 may have some portion of the template.
 * For example, ANY bases of the template could have the 8 bases in the middle of a 30 base testString.
 * This is more flexible and slow than getPositionInTemplate, as it will let any portion of the template occur anywhere in the read.
 * @param testString A string to test against the template
 * @param minMatch The number of bases that must match in both
 * @param mismatchesAllowed How many mismatches can there be between the template and the read
 * @return The position in the read, 0 based.
 */
public int getPositionInRead (final String testString, final int minMatch, final int mismatchesAllowed) {
	byte [] read = StringUtil.stringToBytes(testString);
	// If the read's too short we can't possibly match it
       if (read == null || read.length < minMatch) return -1;

       int maxNumMatches=0;
       int bestReadStartPos=0;

       int lastViableReadIndex=read.length-minMatch+1;
       // Walk forwards
       READ_LOOP:  // walks through the read, looking for the template.
       for (int readIndex = 0; readIndex<lastViableReadIndex; readIndex++) {
           int mismatches = 0;
           int numMatches= 0;

           // can only search as far as you have left in the read.
           final int searchLength = Math.min(this.bases.length, read.length-readIndex);
           if (searchLength<minMatch) break;  // if you don't have enough search space left to match a min number of bases you give up.
           for (int templateIndex = 0; templateIndex < searchLength; templateIndex++) {
           	int tempReadIndex=templateIndex+readIndex;
           	char templateBase = (char)this.bases[templateIndex];
           	char readBase = (char) read[tempReadIndex];
               if (SequenceUtil.isNoCall(read[tempReadIndex]) || !SequenceUtil.basesEqual(this.bases[templateIndex], read[tempReadIndex])) {
               	if (++mismatches > mismatchesAllowed) continue READ_LOOP;
               } else
				numMatches++;
               if (numMatches>maxNumMatches) {
               	maxNumMatches=numMatches;
               	bestReadStartPos=readIndex;
               }
           }

       }
       if (maxNumMatches<minMatch) return (-1);
       return bestReadStartPos;
}
 
Example 6
Source File: ReadBaseStratification.java    From picard with MIT License 5 votes vote down vote up
private static Character stratifyReferenceBase(final RecordAndOffset recordAndOffset,
                                               final SAMLocusAndReference locusInfo) {
    final ReadDirection direction = ReadDirection.of(recordAndOffset.getRecord());

    if (SequenceUtil.isNoCall(locusInfo.getReferenceBase())) {
        return null;
    }

    return stratifySequenceBase(locusInfo.getReferenceBase(), direction == ReadDirection.NEGATIVE);
}
 
Example 7
Source File: SimpleErrorCalculator.java    From picard with MIT License 5 votes vote down vote up
/**
 * The function by which new loci are "shown" to the calculator
 **/
@Override
public void addBase(final SamLocusIterator.RecordAndOffset recordAndOffset, final SamLocusAndReferenceIterator.SAMLocusAndReference locusAndRef) {
    super.addBase(recordAndOffset, locusAndRef);
    final byte readBase = recordAndOffset.getReadBase();
    if (!SequenceUtil.isNoCall(readBase) && (readBase != locusAndRef.getReferenceBase())) {
        nMismatchingBases++;
    }
}
 
Example 8
Source File: BaseErrorCalculator.java    From picard with MIT License 5 votes vote down vote up
/**
 * the function by which new loci are "shown" to the calculator
 **/
@Override
public void addBase(final SamLocusIterator.RecordAndOffset recordAndOffset, final SamLocusAndReferenceIterator.SAMLocusAndReference locusAndRef) {
    if (recordAndOffset.getAlignmentType() == AbstractRecordAndOffset.AlignmentType.Match) {
        if (!SequenceUtil.isNoCall(recordAndOffset.getReadBase())) {
            nBases++;
        }
    } else if (recordAndOffset.getAlignmentType() == AbstractRecordAndOffset.AlignmentType.Insertion) {
        final CigarElement cigarElement = ReadBaseStratification.getIndelElement(recordAndOffset);
        if (cigarElement != null) {
            nBases += cigarElement.getLength();
        }
    }
}
 
Example 9
Source File: EstimateLibraryComplexity.java    From picard with MIT License 5 votes vote down vote up
/**
 * Checks that the average quality over the entire read is >= min, and that the first N bases do
 * not contain any no-calls.
 */
boolean passesQualityCheck(final byte[] bases, final byte[] quals, final int seedLength, final int minQuality) {
    if (bases.length < seedLength) return false;

    for (int i = 0; i < seedLength; ++i) {
        if (SequenceUtil.isNoCall(bases[i])) return false;
    }

    final int maxReadLength = (MAX_READ_LENGTH <= 0) ? Integer.MAX_VALUE : MAX_READ_LENGTH;
    final int readLength = Math.min(bases.length, maxReadLength);
    int total = 0;
    for (int i = 0; i < readLength; i++) total += quals[i];
    return total / readLength >= minQuality;
}
 
Example 10
Source File: AdapterTrimTransformer.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public GATKRead apply(final GATKRead read) {
    if (read.getLength() < minClipLength) return read;
    final byte[] bases = read.getBases();

    //Be sure to find the best match in case one adapter ends with another adapter's sequence (within maxMismatches difference)
    int bestNumMismatches = maxMismatches;
    int bestAlignmentStart = bases.length;
    boolean foundAdapter = false;

    //Adapter list loop
    for (final String adapterSequence : adapterSequences) {
        //Start at the end of the read and walk backwards
        int alignmentLength = minClipLength;
        for (int alignmentStart = bases.length - minClipLength; alignmentStart >= 0; alignmentStart--) {
            int numMismatches = 0;
            //Check each base for mismatches
            for (int j = 0; j < alignmentLength; j++) {
                if (!SequenceUtil.isNoCall((byte) adapterSequence.charAt(j)) && bases[alignmentStart + j] != adapterSequence.charAt(j)) {
                    if (++numMismatches > maxMismatches) break;
                }
            }
            if (numMismatches < bestNumMismatches || (numMismatches == bestNumMismatches && alignmentStart < bestAlignmentStart)) {
                //We have a (better/earlier) match
                bestNumMismatches = numMismatches;
                bestAlignmentStart = alignmentStart;
                foundAdapter = true;
            }
            alignmentLength = alignmentLength < adapterSequence.length() ? alignmentLength + 1 : alignmentLength;
        }
    }
    if (foundAdapter) {
        //Hard clip from the beginning of the adapter to the end of the read
        final ReadClipper readClipper = new ReadClipper(read);
        readClipper.addOp(new ClippingOp(bestAlignmentStart, read.getLength()));
        return readClipper.clipRead(ClippingRepresentation.HARDCLIP_BASES);
    }
    return read;
}
 
Example 11
Source File: FastWgsMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
private void processRecord(int position, ReferenceSequence ref, EdgingRecordAndOffset record, Set<EdgingRecordAndOffset> recordsAndOffsetsForName) {
    long processedLoci = counter;
    readsNames.put(record.getReadName(), recordsAndOffsetsForName);
    final byte[] qualities = record.getBaseQualities();
    final byte[] bases = record.getRecord().getReadBases();
    for (int i = 0; i < record.getLength(); i++) {
        final int index = i + record.getRefPos();
        if (isReferenceBaseN(index, ref)) {
            continue;
        }
        final byte quality = qualities[i + record.getOffset()];
        if (quality <= 2) {
            basesExcludedByBaseq++;
        } else {
            if (unfilteredDepthSize.get(index) < coverageCap) {
                unfilteredBaseQHistogramArray[quality]++;
                unfilteredDepthSize.increment(index);
            }
            if (quality < collectWgsMetrics.MINIMUM_BASE_QUALITY || SequenceUtil.isNoCall(bases[i + record.getOffset()])){
                basesExcludedByBaseq++;
            } else {
                final int bsq = excludeByQuality(recordsAndOffsetsForName, index);
                if (recordsAndOffsetsForName.size() - bsq > 0) {
                    basesExcludedByOverlap++;
                } else {
                    pileupSize.increment(index);
                }
            }
        }
        if (isTimeToStop(++processedLoci)) {
            break;
        }
    }
    recordsAndOffsetsForName.add(record);
}
 
Example 12
Source File: CollectWgsMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override
public void addInfo(final AbstractLocusInfo<SamLocusIterator.RecordAndOffset> info, final ReferenceSequence ref, boolean referenceBaseN) {

    if (referenceBaseN) {
        return;
    }
    // Figure out the coverage while not counting overlapping reads twice, and excluding various things
    final HashSet<String> readNames = new HashSet<>(info.getRecordAndOffsets().size());
    int pileupSize = 0;
    int unfilteredDepth = 0;

    for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndOffsets()) {
        if (recs.getBaseQuality() <= 2) { ++basesExcludedByBaseq;   continue; }

        // we add to the base quality histogram any bases that have quality > 2
        // the raw depth may exceed the coverageCap before the high-quality depth does. So stop counting once we reach the coverage cap.
        if (unfilteredDepth < coverageCap) {
            unfilteredBaseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
            unfilteredDepth++;
        }

        if (recs.getBaseQuality() < collectWgsMetrics.MINIMUM_BASE_QUALITY ||
                SequenceUtil.isNoCall(recs.getReadBase())) {
            ++basesExcludedByBaseq;
            continue;
        }
        if (!readNames.add(recs.getRecord().getReadName())) {
            ++basesExcludedByOverlap;
            continue;
        }
        pileupSize++;
    }
    final int highQualityDepth = Math.min(pileupSize, coverageCap);
    if (highQualityDepth < pileupSize) basesExcludedByCapping += pileupSize - coverageCap;
    highQualityDepthHistogramArray[highQualityDepth]++;
    unfilteredDepthHistogramArray[unfilteredDepth]++;
}
 
Example 13
Source File: AdapterMarker.java    From picard with MIT License 5 votes vote down vote up
/**
 * Truncate to the given length, and in addition truncate any trailing Ns.
 */
private String substringAndRemoveTrailingNs(final String s, int length) {
    length = Math.min(length, s.length());
    final byte[] bytes = StringUtil.stringToBytes(s);
    while (length > 0 && SequenceUtil.isNoCall(bytes[length - 1])) {
        length--;
    }
    return s.substring(0, length);
}
 
Example 14
Source File: SingleBarcodeDistanceMetric.java    From picard with MIT License 5 votes vote down vote up
/**
     * Similar to Hamming distance but this version doesn't penalize matching bases with low quality for the read.

     * @return the edit distance between the barcode(s) and the read(s)
     */
public int lenientHammingDistance() {
    int numMismatches = 0;
    for (int i = 0; i < barcodeBases.length && i < maskedBases.length && numMismatches <= maximalInterestingDistance; ++i) {

        if (SequenceUtil.isNoCall(maskedBases[i])) {
            continue;
        }

        if (!SequenceUtil.basesEqual(barcodeBases[i], maskedBases[i])) {
            ++numMismatches;
        }
    }
    return numMismatches;
}
 
Example 15
Source File: ExtractIlluminaBarcodes.java    From picard with MIT License 4 votes vote down vote up
static BarcodeMatch calculateBarcodeMatch(final byte[][] readSubsequences,
                                          final byte[][] qualityScores,
                                          final Map<String, BarcodeMetric> metrics,
                                          final int maxNoCalls, final int maxMismatches,
                                          final int minMismatchDelta, final int minimumBaseQuality,
                                          final DistanceMetric distanceMode) {
    final BarcodeMatch match;
    BarcodeMetric bestBarcodeMetric = null;
    match = new BarcodeMatch();

    int totalBarcodeReadBases = 0;
    int numNoCalls = 0; // NoCalls are calculated for all the barcodes combined

    for (final byte[] bc : readSubsequences) {
        totalBarcodeReadBases += bc.length;
        for (final byte b : bc) {
            if (SequenceUtil.isNoCall(b)) {
                ++numNoCalls;
            }
        }
    }

    // PIC-506 When forcing all reads to match a single barcode, allow a read to match even if every
    // base is a mismatch.
    int numMismatchesInBestBarcode = totalBarcodeReadBases + 1;
    int numMismatchesInSecondBestBarcode = totalBarcodeReadBases + 1;

    for (final BarcodeMetric barcodeMetric : metrics.values()) {
        // need to add maxMismatches + minMismatchDelta together since the result might get used as numMismatchesInSecondBestBarcode
        final BarcodeEditDistanceQuery barcodeEditDistanceQuery = new BarcodeEditDistanceQuery(barcodeMetric.barcodeBytes, readSubsequences, qualityScores,
                minimumBaseQuality, Math.min(maxMismatches, numMismatchesInBestBarcode) + minMismatchDelta);
        final int numMismatches = distanceMode.distance(barcodeEditDistanceQuery);

        if (numMismatches < numMismatchesInBestBarcode) {
            if (bestBarcodeMetric != null) {
                numMismatchesInSecondBestBarcode = numMismatchesInBestBarcode;
            }
            numMismatchesInBestBarcode = numMismatches;
            bestBarcodeMetric = barcodeMetric;
        } else if (numMismatches < numMismatchesInSecondBestBarcode) {
            numMismatchesInSecondBestBarcode = numMismatches;
        }
    }

    match.matched = bestBarcodeMetric != null &&
            numNoCalls <= maxNoCalls &&
            numMismatchesInBestBarcode <= maxMismatches &&
            numMismatchesInSecondBestBarcode - numMismatchesInBestBarcode >= minMismatchDelta;

    // If we have something that's not a "match" but matches one barcode
    // slightly, we output that matching barcode in lower case
    if (numNoCalls + numMismatchesInBestBarcode < totalBarcodeReadBases && bestBarcodeMetric != null) {
        match.mismatches = numMismatchesInBestBarcode;
        match.mismatchesToSecondBest = numMismatchesInSecondBestBarcode;
        match.barcode = bestBarcodeMetric.BARCODE_WITHOUT_DELIMITER.toLowerCase();
    } else {
        match.mismatches = totalBarcodeReadBases;
        match.barcode = "";
    }

    if (match.matched) {
        match.barcode = bestBarcodeMetric.BARCODE_WITHOUT_DELIMITER;
    }
    return match;
}
 
Example 16
Source File: OverlappingReadsErrorCalculator.java    From picard with MIT License 4 votes vote down vote up
/**
 * The function by which new loci are "shown" to the calculator
 **/
@Override
public void addBase(final SamLocusIterator.RecordAndOffset recordAndOffset, final SamLocusAndReferenceIterator.SAMLocusAndReference locusAndRef) {
    super.addBase(recordAndOffset, locusAndRef);
    final byte readBase = recordAndOffset.getReadBase();
    final SAMRecord record = recordAndOffset.getRecord();

    // by traversing the reads and splitting into sets with the same name we convert a O(N^2) iteration
    // into a O(N) iteration
    updateReadNameSet(locusAndRef.getLocus());

    final SamLocusIterator.RecordAndOffset mate = readNameSets.get(record.getReadName())
            .stream()
            .filter(putative -> areReadsMates(record, putative.getRecord()))
            .findFirst()
            .orElse(null);

    // we are only interested in bases for which the mate read also has a base over the same locus
    if (mate == null) return;
    // both bases need to be called for this error calculation

    final byte mateBase = mate.getReadBase();
    if (SequenceUtil.isNoCall(readBase)) return;
    if (SequenceUtil.isNoCall(mateBase)) return;

    nTotalBasesWithOverlappingReads++;

    // Only bases that disagree with the reference are counted as errors.
    if (SequenceUtil.basesEqual(readBase, locusAndRef.getReferenceBase())) return;

    final boolean agreesWithMate = SequenceUtil.basesEqual(readBase, mateBase);
    final boolean mateAgreesWithRef = SequenceUtil.basesEqual(mateBase, locusAndRef.getReferenceBase());

    if (agreesWithMate) {
        nBothDisagreeWithReference++;
    } else if (mateAgreesWithRef) {
        nDisagreeWithRefAndMate++;
    } else {
        nThreeWaysDisagreement++;
    }
}
 
Example 17
Source File: AbstractWgsMetricsCollector.java    From picard with MIT License 2 votes vote down vote up
/**
 * Checks if reference base at given position is unknown.
 *
 * @param position to check the base
 * @param ref      reference sequence
 * @return true if reference base at position represents a no call, otherwise false
 */
boolean isReferenceBaseN(final int position, final ReferenceSequence ref) {
    final byte base = ref.getBases()[position - 1];
    return SequenceUtil.isNoCall(base);
}