Java Code Examples for htsjdk.samtools.CigarOperator#consumesReadBases()

The following examples show how to use htsjdk.samtools.CigarOperator#consumesReadBases() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * How many bases to the right does a read's alignment start shift given its cigar and the number of left soft clips
 */
public static int alignmentStartShift(final Cigar cigar, final int numClipped) {
    int refBasesClipped = 0;

    int elementStart = 0;   // this and elementEnd are indices in the read's bases
    for (final CigarElement element : cigar.getCigarElements()) {
        final CigarOperator operator = element.getOperator();
        // hard clips consume neither read bases nor reference bases and are irrelevant
        if (operator == CigarOperator.HARD_CLIP) {
            continue;
        }
        final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0);

        if (elementEnd <= numClipped) {  // totally within clipped span -- this includes deletions immediately following clipping
            refBasesClipped += operator.consumesReferenceBases() ? element.getLength() : 0;
        } else if (elementStart < numClipped) { // clip in middle of element, which means the element necessarily consumes read bases
            final int clippedLength = numClipped - elementStart;
            refBasesClipped += operator.consumesReferenceBases() ? clippedLength : 0;
            break;
        }
        elementStart = elementEnd;
    }
    return refBasesClipped;
}
 
Example 2
Source File: XGBoostEvidenceFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
CigarQualityInfo(final BreakpointEvidence evidence) {
    if(evidence instanceof ReadEvidence) {
        int numMatched = 0;
        int refLength = 0;
        final String cigarString = ((ReadEvidence) evidence).getCigarString();
        for (final CigarElement element : TextCigarCodec.decode(cigarString).getCigarElements()) {
            final CigarOperator op = element.getOperator();
            if (op.consumesReferenceBases()) {
                refLength += element.getLength();
                if (op.consumesReadBases()) {
                    numMatched += element.getLength();
                }
            }
        }
        basesMatched = numMatched;
        referenceLength = refLength;
    }
    else {
        basesMatched = NON_READ_CIGAR_LENGTHS;
        referenceLength = NON_READ_CIGAR_LENGTHS;
    }
}
 
Example 3
Source File: AlignmentUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Calculate the number of bases that are soft clipped in read with quality score greater than threshold
 *
 * Handles the case where the cigar is null (i.e., the read is unmapped), returning 0
 *
 * @param read a non-null read.
 * @param qualThreshold consider bases with quals > this value as high quality.  Must be >= 0
 * @return positive integer
 */
public static int countHighQualitySoftClips(final GATKRead read, final byte qualThreshold ) {
    Utils.nonNull(read);
    ParamUtils.isPositiveOrZero(qualThreshold, "Expected qualThreshold to be positive");

    final Cigar cigar = read.getCigar();
    if ( cigar == null ) {  // the read is unmapped
        return 0;
    }

    int numHQSoftClips = 0;
    int alignPos = 0;
    for ( final CigarElement elem : read.getCigarElements() ) {
        final int elementLength = elem.getLength();
        final CigarOperator operator = elem.getOperator();

        if (operator == CigarOperator.SOFT_CLIP) {
            for (int n = 0; n < elementLength; n++) {
                if( read.getBaseQuality(alignPos++) > qualThreshold ) {
                    numHQSoftClips++;
                }
            }
        } else if (operator.consumesReadBases()) {
            alignPos += elementLength;
        }
    }

    return numHQSoftClips;
}
 
Example 4
Source File: SNPBasePileUp.java    From Drop-seq with MIT License 4 votes vote down vote up
/**
 * For a read on this pileup, get the base and quality of the base that is at the same
 * position as the SNP.  If the read does not overlap the interval, then return an empty array.
 * @param r The read
 * @return A length 2 byte array containing the base and quality.  Empty if the read does not overlap the interval.
 */
public byte [] getBaseAndQualityOverlappingInterval (final SAMRecord r) {
	byte [] result = new byte [2];

	List<CigarElement> elements = r.getCigar().getCigarElements();
	Iterator<AlignmentBlock> blocks = r.getAlignmentBlocks().iterator();

	int finalSNPLocalPosition=-1;
	int lengthTraversed=0;

	for (CigarElement ce: elements) {
		CigarOperator co = ce.getOperator();
		// you're in an alignment block
		if (co==CigarOperator.M) {
			// get the next alignment block
			AlignmentBlock b = blocks.next();
			int refStart = b.getReferenceStart();
			int snpLocalPos=this.getPosition() - refStart +1;
			int blockLength=b.getLength();

			// is the local position inside this alignment block?
			// if not, move onto the next block.
			if (snpLocalPos >0 && snpLocalPos<=blockLength) {
				// found it!  Done.
				finalSNPLocalPosition=snpLocalPos+lengthTraversed;
				break;
			}
		}
		// consume the bases if necessary and move on to the next element.
		if (co.consumesReadBases())
			lengthTraversed+=ce.getLength();
	}

	// if the position is assigned, then add to the pileup.
	if (finalSNPLocalPosition!=-1) {
		// arrays 0 based.
		byte [] quals = r.getBaseQualities();
		byte [] bases = r.getReadBases();
		byte base = bases[finalSNPLocalPosition-1];
		// char baseChar = StringUtil.byteToChar(base);
		byte qual = quals[finalSNPLocalPosition-1];
		result[0]=base;
		result[1]=qual;
		return (result);
	}
	return (ArrayUtils.EMPTY_BYTE_ARRAY);
}
 
Example 5
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Given a cigar string, soft clip up to leftClipEnd and soft clip starting at rightClipBegin
 * @param start initial index to clip within read bases, inclusive
 * @param stop final index to clip within read bases exclusive
 * @param clippingOperator      type of clipping -- must be either hard clip or soft clip
 */
public static Cigar clipCigar(final Cigar cigar, final int start, final int stop, CigarOperator clippingOperator) {
    Utils.validateArg(clippingOperator.isClipping(), "Not a clipping operator");
    final boolean clipLeft = start == 0;

    final CigarBuilder newCigar = new CigarBuilder();

    int elementStart = 0;
    for (final CigarElement element : cigar.getCigarElements()) {
        final CigarOperator operator = element.getOperator();
        // copy hard clips
        if (operator == CigarOperator.HARD_CLIP) {
            newCigar.add(new CigarElement(element.getLength(), element.getOperator()));
            continue;
        }
        final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0);

        // element precedes start or follows end of clip, copy it to new cigar
        if (elementEnd <= start || elementStart >= stop) {
            // edge case: deletions at edge of clipping are meaningless and we skip them
            if (operator.consumesReadBases() || (elementStart != start && elementStart != stop)) {
                newCigar.add(new CigarElement(element.getLength(), operator));
            }
        } else {    // otherwise, some or all of the element is soft-clipped
            final int unclippedLength = clipLeft ? elementEnd - stop : start - elementStart;
            final int clippedLength = element.getLength() - unclippedLength;

            if (unclippedLength <= 0) { // totally clipped
                if (operator.consumesReadBases()) {
                    newCigar.add(new CigarElement(element.getLength(), clippingOperator));
                }
            } else if (clipLeft) {
                newCigar.add(new CigarElement(clippedLength, clippingOperator));
                newCigar.add(new CigarElement(unclippedLength, operator));
            } else {
                newCigar.add(new CigarElement(unclippedLength, operator));
                newCigar.add(new CigarElement(clippedLength, clippingOperator));
            }
        }
        elementStart = elementEnd;
    }

    return newCigar.make();
}
 
Example 6
Source File: AlignmentUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static Pair<byte[], byte[]> getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte basePadCharacter, final byte qualityPadCharacter) {
    Utils.nonNull(read);
    // As this code is performance sensitive in the HaplotypeCaller, we elect to use the noCopy versions of these getters.
    // We can do this because we don't mutate base or quality arrays in this method or in its accessors
    final byte[] bases = read.getBasesNoCopy();
    final byte[] baseQualities = read.getBaseQualitiesNoCopy();
    final int numCigarElements = read.numCigarElements();
    boolean sawIndel = false;

    // Check if the cigar contains indels
    // Note that we don't call ContainsOperator() here twice to avoid the performance hit of building stream iterators twice
    for (int i = 0; i < numCigarElements; i++) {
        final CigarOperator e = read.getCigarElement(i).getOperator();
        if (e == CigarOperator.INSERTION || e == CigarOperator.DELETION) {
            sawIndel = true;
            break;
        }
    }
    if (!sawIndel) {
        return new ImmutablePair<>(bases, baseQualities);
    }
    else {
        final int numberRefBasesIncludingSoftclips = CigarUtils.countRefBasesAndSoftClips(read.getCigarElements(), 0, numCigarElements);
        final byte[] paddedBases = new byte[numberRefBasesIncludingSoftclips];
        final byte[] paddedBaseQualities = new byte[numberRefBasesIncludingSoftclips];
        int literalPos = 0;
        int paddedPos = 0;
        for ( int i = 0; i < numCigarElements; i++ ) {
            final CigarElement ce = read.getCigarElement(i);
            final CigarOperator co = ce.getOperator();
            if (co.consumesReadBases()) {
                if (!co.consumesReferenceBases()) {
                    literalPos += ce.getLength();  //skip inserted bases
                }
                else {
                    System.arraycopy(bases, literalPos, paddedBases, paddedPos, ce.getLength());
                    System.arraycopy(baseQualities, literalPos, paddedBaseQualities, paddedPos, ce.getLength());
                    literalPos += ce.getLength();
                    paddedPos += ce.getLength();
                }
            }
            else if (co.consumesReferenceBases()) {
                for ( int j = 0; j < ce.getLength(); j++ ) {  //pad deleted bases
                    paddedBases[paddedPos] = basePadCharacter;
                    paddedBaseQualities[paddedPos] = qualityPadCharacter;
                    paddedPos++;
                }
            }
        }
        return new ImmutablePair<>(paddedBases, paddedBaseQualities);
    }
}