Java Code Examples for htsjdk.samtools.CigarOperator#SOFT_CLIP

The following examples show how to use htsjdk.samtools.CigarOperator#SOFT_CLIP . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CigarBuilder.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void advanceSectionAndValidateCigarOrder(CigarOperator operator) {
    if (operator == CigarOperator.HARD_CLIP) {
        if (section == Section.LEFT_SOFT_CLIP || section == Section.MIDDLE || section == Section.RIGHT_SOFT_CLIP) {
            section = Section.RIGHT_HARD_CLIP;
        }
    } else if (operator == CigarOperator.SOFT_CLIP) {
        Utils.validate(section != Section.RIGHT_HARD_CLIP, "cigar has already reached its right hard clip");
        if (section == Section.LEFT_HARD_CLIP) {
            section = Section.LEFT_SOFT_CLIP;
        } else if(section == Section.MIDDLE) {
            section = Section.RIGHT_SOFT_CLIP;
        }
    } else {
        Utils.validate(section != Section.RIGHT_SOFT_CLIP && section != Section.RIGHT_HARD_CLIP, "cigar has already reached right clip");
        if (section == Section.LEFT_HARD_CLIP || section == Section.LEFT_SOFT_CLIP) {
            section = Section.MIDDLE;
        }
    }
}
 
Example 2
Source File: ReadClipper.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Will hard clip every soft clipped bases in the read.
 *
 * @return a new read without the soft clipped bases (Could be an empty, unmapped read if it was all soft and hard clips).
 */
private GATKRead hardClipSoftClippedBases () {
    if (read.isEmpty()) {
        return read;
    }

    int readIndex = 0;
    int cutLeft = -1;            // first position to hard clip (inclusive)
    int cutRight = -1;           // first position to hard clip (inclusive)
    boolean rightTail = false;   // trigger to stop clipping the left tail and start cutting the right tail

    for (final CigarElement cigarElement : read.getCigarElements()) {
        if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
            if (rightTail) {
                cutRight = readIndex;
            }
            else {
                cutLeft = readIndex + cigarElement.getLength() - 1;
            }
        }
        else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
            rightTail = true;
        }

        if (cigarElement.getOperator().consumesReadBases()) {
            readIndex += cigarElement.getLength();
        }
    }

    // It is extremely important that we cut the end first otherwise the read coordinates change.
    if (cutRight >= 0) {
        this.addOp(new ClippingOp(cutRight, read.getLength() - 1));
    }
    if (cutLeft >= 0) {
        this.addOp(new ClippingOp(0, cutLeft));
    }

    return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
 
Example 3
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static int countRefBasesAndMaybeAlsoClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex, final boolean includeSoftClips, final boolean includeHardClips) {
    Utils.nonNull(elems);
    Utils.validateArg(cigarStartIndex >= 0 && cigarEndIndex <= elems.size() && cigarStartIndex <= cigarEndIndex, () -> "invalid index:" + 0 + " -" + elems.size());

    int result = 0;
    for (final CigarElement elem : elems.subList(cigarStartIndex, cigarEndIndex)) {
        final CigarOperator op = elem.getOperator();
        if (op.consumesReferenceBases() || (includeSoftClips && op == CigarOperator.SOFT_CLIP) || (includeHardClips && op == CigarOperator.HARD_CLIP)) {
            result += elem.getLength();
        }
    }

    return result;
}
 
Example 4
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * replace soft clips (S) with match (M) operators, normalizing the result by all the transformations of the {@link CigarBuilder} class:
 * merging consecutive identical operators and removing zero-length elements.  For example 10S10M -> 20M and 10S10M10I10I -> 20M20I.
 */
public static Cigar revertSoftClips(final Cigar originalCigar) {
    final CigarBuilder builder = new CigarBuilder();
    for (final CigarElement element : originalCigar.getCigarElements()) {
        if (element.getOperator() == CigarOperator.SOFT_CLIP) {
            builder.add(new CigarElement(element.getLength(), CigarOperator.MATCH_OR_MISMATCH));
        } else {
            builder.add(element);
        }
    }

    return builder.make();
}
 
Example 5
Source File: AlignmentUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Calculate the number of bases that are soft clipped in read with quality score greater than threshold
 *
 * Handles the case where the cigar is null (i.e., the read is unmapped), returning 0
 *
 * @param read a non-null read.
 * @param qualThreshold consider bases with quals > this value as high quality.  Must be >= 0
 * @return positive integer
 */
public static int countHighQualitySoftClips(final GATKRead read, final byte qualThreshold ) {
    Utils.nonNull(read);
    ParamUtils.isPositiveOrZero(qualThreshold, "Expected qualThreshold to be positive");

    final Cigar cigar = read.getCigar();
    if ( cigar == null ) {  // the read is unmapped
        return 0;
    }

    int numHQSoftClips = 0;
    int alignPos = 0;
    for ( final CigarElement elem : read.getCigarElements() ) {
        final int elementLength = elem.getLength();
        final CigarOperator operator = elem.getOperator();

        if (operator == CigarOperator.SOFT_CLIP) {
            for (int n = 0; n < elementLength; n++) {
                if( read.getBaseQuality(alignPos++) > qualThreshold ) {
                    numHQSoftClips++;
                }
            }
        } else if (operator.consumesReadBases()) {
            alignPos += elementLength;
        }
    }

    return numHQSoftClips;
}
 
Example 6
Source File: ReadClassifier.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static boolean hasInitialSoftClip( final List<CigarElement> cigarElements, final GATKRead read ) {
    final ListIterator<CigarElement> itr = cigarElements.listIterator();
    if ( !itr.hasNext() ) return false;

    CigarElement firstEle = itr.next();
    if ( firstEle.getOperator() == CigarOperator.HARD_CLIP && itr.hasNext() ) {
        firstEle = itr.next();
    }
    final int clipStart = firstEle.getLength() - MIN_SOFT_CLIP_LEN;
    return firstEle.getOperator() == CigarOperator.SOFT_CLIP &&
            clipStart >= 0 &&
            isHighQualityRegion(read.getBaseQualities(), clipStart);
}
 
Example 7
Source File: ReadClassifier.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static boolean hasFinalSoftClip( final List<CigarElement> cigarElements, final GATKRead read ) {
    final ListIterator<CigarElement> itr = cigarElements.listIterator(cigarElements.size());
    if ( !itr.hasPrevious() ) return false;

    CigarElement lastEle = itr.previous();
    if ( lastEle.getOperator() == CigarOperator.HARD_CLIP && itr.hasPrevious() ) {
        lastEle = itr.previous();
    }
    return lastEle.getOperator() == CigarOperator.SOFT_CLIP &&
            lastEle.getLength() >= MIN_SOFT_CLIP_LEN &&
            isHighQualityRegion(read.getBaseQualities(), read.getLength() - lastEle.getLength());
}
 
Example 8
Source File: ReadClipperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
boolean assertHardClippingSoftClips(final CigarCounter clipped) {
    for (final CigarOperator op : counter.keySet()) {
        if (op == CigarOperator.HARD_CLIP || op == CigarOperator.SOFT_CLIP) {
            final int counterTotal = counter.get(CigarOperator.HARD_CLIP) + counter.get(CigarOperator.SOFT_CLIP);
            final int clippedHard = clipped.getCounterForOp(CigarOperator.HARD_CLIP);
            final int clippedSoft = clipped.getCounterForOp(CigarOperator.SOFT_CLIP);

            Assert.assertEquals(counterTotal, clippedHard);
            Assert.assertEquals(clippedSoft, 0);
        } else {
            Assert.assertEquals(counter.get(op), clipped.getCounterForOp(op));
        }
    }
    return true;
}
 
Example 9
Source File: PalindromeArtifactClipReadTransformer.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public GATKRead apply(final GATKRead read) {
    final int adaptorBoundary = read.getAdaptorBoundary();
    if (!read.isProperlyPaired() || adaptorBoundary == ReadUtils.CANNOT_COMPUTE_ADAPTOR_BOUNDARY) {
        return read;
    }

    final Cigar cigar = read.getCigar();
    final CigarOperator firstOperator = cigar.getFirstCigarElement().getOperator();
    final CigarOperator lastOperator = cigar.getLastCigarElement().getOperator();
    final boolean readIsUpstreamOfMate = read.getFragmentLength() > 0;

    if ((readIsUpstreamOfMate && firstOperator != CigarOperator.SOFT_CLIP && firstOperator != CigarOperator.INSERTION) ||
            (!readIsUpstreamOfMate && lastOperator != CigarOperator.SOFT_CLIP && lastOperator != CigarOperator.INSERTION)) {
        return read;
    }

    final int potentialArtifactBaseCount = readIsUpstreamOfMate ? cigar.getFirstCigarElement().getLength() :
            cigar.getLastCigarElement().getLength();


    final int numBasesToCompare = Math.min(potentialArtifactBaseCount + minPalindromeSize, read.getLength());

    // the reference position of bases that are the reverse complement of the suspected artifact
    final String contig = read.getContig();
    final int refStart = readIsUpstreamOfMate ? adaptorBoundary - numBasesToCompare : adaptorBoundary + 1;
    final int refEnd = readIsUpstreamOfMate ? adaptorBoundary - 1 : adaptorBoundary + numBasesToCompare;
    
    // it's possible that the read's assigned fragment length / mate start were incompatible with the contig length,
    // so we explicitly check for this case to avoid errors below.  We can't rely on the alignment!
    if (refStart < 1 || refEnd > sequenceDictionary.getSequence(contig).getSequenceLength()) {
        return read;
    }

    // if the reference bases overlap the soft-clipped bases, it's not an artifact.  Note that read.getStart() is the unclipped start
    // this can happen, for a read with a huge soft-clip that overlaps its mate significantly.
    if ( (readIsUpstreamOfMate && refStart < read.getStart()) || (!readIsUpstreamOfMate && read.getEnd() < refEnd)) {
        return read;
    }
    final SimpleInterval refInterval = new SimpleInterval(contig, refStart, refEnd);
    final MutableInt numMatch = new MutableInt(0);

    // we traverse the reference in the forward direction, hence the read in the reverse direction
    final MutableInt readIndex = new MutableInt(readIsUpstreamOfMate ? numBasesToCompare - 1 : read.getLength() - 1);
    referenceDataSource.query(refInterval).forEachRemaining(refBase -> {
        if (BaseUtils.getComplement(refBase) == read.getBase(readIndex.getValue())) {
            numMatch.increment();
        }
        readIndex.decrement();
    });

    if (numMatch.doubleValue() / numBasesToCompare >= MIN_FRACTION_OF_MATCHING_BASES) {
        final ReadClipper readClipper = new ReadClipper(read);
        final ClippingOp clippingOp = readIsUpstreamOfMate ? new ClippingOp(0, potentialArtifactBaseCount - 1) :
                new ClippingOp(read.getLength() - potentialArtifactBaseCount, read.getLength());
        readClipper.addOp(clippingOp);
        return readClipper.clipRead(ClippingRepresentation.HARDCLIP_BASES);
    } else {
        return read;
    }
}
 
Example 10
Source File: PairedEndAndSplitReadEvidenceCollection.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private boolean isSoftClipped(final GATKRead read) {
    final CigarOperator firstOperator = read.getCigar().getFirstCigarElement().getOperator();
    final CigarOperator lastOperator = read.getCigar().getLastCigarElement().getOperator();
    return (firstOperator == CigarOperator.SOFT_CLIP && lastOperator != CigarOperator.SOFT_CLIP) ||
            (firstOperator != CigarOperator.SOFT_CLIP && lastOperator == CigarOperator.SOFT_CLIP);
}
 
Example 11
Source File: Utils.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * A rip off samtools bam_md.c
 * 
 * @param record
 * @param ref
 * @param calcMD
 * @param calcNM
 */
public static void calculateMdAndNmTags(SAMRecord record, byte[] ref, boolean calcMD, boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	Cigar cigar = record.getCigar();
	List<CigarElement> cigarElements = cigar.getCigarElements();
	byte[] seq = record.getReadBases();
	int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	StringBuffer str = new StringBuffer();

	int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		CigarElement ce = cigarElements.get(i);
		int j, l = ce.getLength();
		CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < l; ++j) {
				int z = y + j;

				if (ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j];

				if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j]);
					u = 0;
					++nm;
				}
			}
			if (j < l)
				break;
			x += l;
			y += l;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < l; ++j) {
				if (ref[x + j] == 0)
					break;
				str.appendCodePoint(ref[x + j]);
			}
			u = 0;
			if (j < l)
				break;
			x += l;
			nm += l;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += l;
			if (op == CigarOperator.INSERTION)
				nm += l;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += l;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}
 
Example 12
Source File: AlignmentsTags.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags}
 * but allows for reference excerpt instead of a full reference sequence.
 * 
 * @param record
 *            SAMRecord to inject NM and MD tags into
 * @param ref
 *            reference sequence bases, may be a subsequence starting at
 *            refOffest
 * @param refOffset
 *            array offset of the reference bases, 0 is the same as the
 *            whole sequence. This value will be subtracted from the
 *            reference array index in calculations.
 * @param calcMD
 *            calculate MD tag if true
 * @param calcNM
 *            calculate NM tag if true
 */
public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset,
		final boolean calcMD, final boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	final Cigar cigar = record.getCigar();
	final List<CigarElement> cigarElements = cigar.getCigarElements();
	final byte[] seq = record.getReadBases();
	final int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	final StringBuilder str = new StringBuilder();

	final int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		final CigarElement ce = cigarElements.get(i);
		int j;
		final int length = ce.getLength();
		final CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < length; ++j) {
				final int z = y + j;

				if (refOffset + ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j - refOffset];

				if ((c1 == c2) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j - refOffset]);
					u = 0;
					++nm;
				}
			}
			if (j < length)
				break;
			x += length;
			y += length;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < length; ++j) {
				if (ref[x + j - refOffset] == 0)
					break;
				str.appendCodePoint(ref[x + j - refOffset]);
			}
			u = 0;
			if (j < length)
				break;
			x += length;
			nm += length;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += length;
			if (op == CigarOperator.INSERTION)
				nm += length;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += length;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}