Java Code Examples for htsjdk.samtools.CigarOperator#HARD_CLIP

The following examples show how to use htsjdk.samtools.CigarOperator#HARD_CLIP . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CigarBuilder.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void advanceSectionAndValidateCigarOrder(CigarOperator operator) {
    if (operator == CigarOperator.HARD_CLIP) {
        if (section == Section.LEFT_SOFT_CLIP || section == Section.MIDDLE || section == Section.RIGHT_SOFT_CLIP) {
            section = Section.RIGHT_HARD_CLIP;
        }
    } else if (operator == CigarOperator.SOFT_CLIP) {
        Utils.validate(section != Section.RIGHT_HARD_CLIP, "cigar has already reached its right hard clip");
        if (section == Section.LEFT_HARD_CLIP) {
            section = Section.LEFT_SOFT_CLIP;
        } else if(section == Section.MIDDLE) {
            section = Section.RIGHT_SOFT_CLIP;
        }
    } else {
        Utils.validate(section != Section.RIGHT_SOFT_CLIP && section != Section.RIGHT_HARD_CLIP, "cigar has already reached right clip");
        if (section == Section.LEFT_HARD_CLIP || section == Section.LEFT_SOFT_CLIP) {
            section = Section.MIDDLE;
        }
    }
}
 
Example 2
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * How many bases to the right does a read's alignment start shift given its cigar and the number of left soft clips
 */
public static int alignmentStartShift(final Cigar cigar, final int numClipped) {
    int refBasesClipped = 0;

    int elementStart = 0;   // this and elementEnd are indices in the read's bases
    for (final CigarElement element : cigar.getCigarElements()) {
        final CigarOperator operator = element.getOperator();
        // hard clips consume neither read bases nor reference bases and are irrelevant
        if (operator == CigarOperator.HARD_CLIP) {
            continue;
        }
        final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0);

        if (elementEnd <= numClipped) {  // totally within clipped span -- this includes deletions immediately following clipping
            refBasesClipped += operator.consumesReferenceBases() ? element.getLength() : 0;
        } else if (elementStart < numClipped) { // clip in middle of element, which means the element necessarily consumes read bases
            final int clippedLength = numClipped - elementStart;
            refBasesClipped += operator.consumesReferenceBases() ? clippedLength : 0;
            break;
        }
        elementStart = elementEnd;
    }
    return refBasesClipped;
}
 
Example 3
Source File: ReadClipper.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Will hard clip every soft clipped bases in the read.
 *
 * @return a new read without the soft clipped bases (Could be an empty, unmapped read if it was all soft and hard clips).
 */
private GATKRead hardClipSoftClippedBases () {
    if (read.isEmpty()) {
        return read;
    }

    int readIndex = 0;
    int cutLeft = -1;            // first position to hard clip (inclusive)
    int cutRight = -1;           // first position to hard clip (inclusive)
    boolean rightTail = false;   // trigger to stop clipping the left tail and start cutting the right tail

    for (final CigarElement cigarElement : read.getCigarElements()) {
        if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
            if (rightTail) {
                cutRight = readIndex;
            }
            else {
                cutLeft = readIndex + cigarElement.getLength() - 1;
            }
        }
        else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
            rightTail = true;
        }

        if (cigarElement.getOperator().consumesReadBases()) {
            readIndex += cigarElement.getLength();
        }
    }

    // It is extremely important that we cut the end first otherwise the read coordinates change.
    if (cutRight >= 0) {
        this.addOp(new ClippingOp(cutRight, read.getLength() - 1));
    }
    if (cutLeft >= 0) {
        this.addOp(new ClippingOp(0, cutLeft));
    }

    return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
 
Example 4
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static int countRefBasesAndMaybeAlsoClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex, final boolean includeSoftClips, final boolean includeHardClips) {
    Utils.nonNull(elems);
    Utils.validateArg(cigarStartIndex >= 0 && cigarEndIndex <= elems.size() && cigarStartIndex <= cigarEndIndex, () -> "invalid index:" + 0 + " -" + elems.size());

    int result = 0;
    for (final CigarElement elem : elems.subList(cigarStartIndex, cigarEndIndex)) {
        final CigarOperator op = elem.getOperator();
        if (op.consumesReferenceBases() || (includeSoftClips && op == CigarOperator.SOFT_CLIP) || (includeHardClips && op == CigarOperator.HARD_CLIP)) {
            result += elem.getLength();
        }
    }

    return result;
}
 
Example 5
Source File: ReadPosRankSumTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static OptionalDouble getReadPosition(final GATKRead read, final VariantContext vc) {
    Utils.nonNull(read);

    // edge case: if a read starts with an insertion then the variant context's end is the reference base BEFORE the read start,
    // which appears like no overlap
    if (read.getStart() == vc.getEnd() + 1 && read.getCigarElements().stream().map(CigarElement::getOperator)
            .filter(o -> !o.isClipping()).findFirst().orElse(null) == CigarOperator.INSERTION) {
        return OptionalDouble.of(0);
    }

    final Pair<Integer, CigarOperator> offset = ReadUtils.getReadIndexForReferenceCoordinate(read, vc.getStart());

    if (offset.getLeft() == ReadUtils.READ_INDEX_NOT_FOUND) {
        return OptionalDouble.empty();
    }

    // hard clips at this point in the code are perfectly good bases that were clipped to make the read fit the assembly region
    final Cigar cigar = read.getCigar();
    final CigarElement firstElement = cigar.getFirstCigarElement();
    final CigarElement lastElement = cigar.getLastCigarElement();
    final int leadingHardClips = firstElement.getOperator() == CigarOperator.HARD_CLIP ? firstElement.getLength() : 0;
    final int trailingHardClips = lastElement.getOperator() == CigarOperator.HARD_CLIP ? lastElement.getLength() : 0;


    final int leftDistance = leadingHardClips + offset.getLeft();
    final int rightDistance = read.getLength() - 1 - offset.getLeft() + trailingHardClips;
    return OptionalDouble.of(Math.min(leftDistance, rightDistance));
}
 
Example 6
Source File: ReadClassifier.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static boolean hasInitialSoftClip( final List<CigarElement> cigarElements, final GATKRead read ) {
    final ListIterator<CigarElement> itr = cigarElements.listIterator();
    if ( !itr.hasNext() ) return false;

    CigarElement firstEle = itr.next();
    if ( firstEle.getOperator() == CigarOperator.HARD_CLIP && itr.hasNext() ) {
        firstEle = itr.next();
    }
    final int clipStart = firstEle.getLength() - MIN_SOFT_CLIP_LEN;
    return firstEle.getOperator() == CigarOperator.SOFT_CLIP &&
            clipStart >= 0 &&
            isHighQualityRegion(read.getBaseQualities(), clipStart);
}
 
Example 7
Source File: ReadClassifier.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static boolean hasFinalSoftClip( final List<CigarElement> cigarElements, final GATKRead read ) {
    final ListIterator<CigarElement> itr = cigarElements.listIterator(cigarElements.size());
    if ( !itr.hasPrevious() ) return false;

    CigarElement lastEle = itr.previous();
    if ( lastEle.getOperator() == CigarOperator.HARD_CLIP && itr.hasPrevious() ) {
        lastEle = itr.previous();
    }
    return lastEle.getOperator() == CigarOperator.SOFT_CLIP &&
            lastEle.getLength() >= MIN_SOFT_CLIP_LEN &&
            isHighQualityRegion(read.getBaseQualities(), read.getLength() - lastEle.getLength());
}
 
Example 8
Source File: ReadClipperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private int leadingCigarElementLength(final Cigar cigar, final CigarOperator operator) {
    for (final CigarElement cigarElement : cigar.getCigarElements()) {
        if (cigarElement.getOperator() == operator)
            return cigarElement.getLength();
        if (cigarElement.getOperator() != CigarOperator.HARD_CLIP)
            break;
    }
    return 0;
}
 
Example 9
Source File: ReadClipperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
boolean assertHardClippingSoftClips(final CigarCounter clipped) {
    for (final CigarOperator op : counter.keySet()) {
        if (op == CigarOperator.HARD_CLIP || op == CigarOperator.SOFT_CLIP) {
            final int counterTotal = counter.get(CigarOperator.HARD_CLIP) + counter.get(CigarOperator.SOFT_CLIP);
            final int clippedHard = clipped.getCounterForOp(CigarOperator.HARD_CLIP);
            final int clippedSoft = clipped.getCounterForOp(CigarOperator.SOFT_CLIP);

            Assert.assertEquals(counterTotal, clippedHard);
            Assert.assertEquals(clippedSoft, 0);
        } else {
            Assert.assertEquals(counter.get(op), clipped.getCounterForOp(op));
        }
    }
    return true;
}
 
Example 10
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Given a cigar string, soft clip up to leftClipEnd and soft clip starting at rightClipBegin
 * @param start initial index to clip within read bases, inclusive
 * @param stop final index to clip within read bases exclusive
 * @param clippingOperator      type of clipping -- must be either hard clip or soft clip
 */
public static Cigar clipCigar(final Cigar cigar, final int start, final int stop, CigarOperator clippingOperator) {
    Utils.validateArg(clippingOperator.isClipping(), "Not a clipping operator");
    final boolean clipLeft = start == 0;

    final CigarBuilder newCigar = new CigarBuilder();

    int elementStart = 0;
    for (final CigarElement element : cigar.getCigarElements()) {
        final CigarOperator operator = element.getOperator();
        // copy hard clips
        if (operator == CigarOperator.HARD_CLIP) {
            newCigar.add(new CigarElement(element.getLength(), element.getOperator()));
            continue;
        }
        final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0);

        // element precedes start or follows end of clip, copy it to new cigar
        if (elementEnd <= start || elementStart >= stop) {
            // edge case: deletions at edge of clipping are meaningless and we skip them
            if (operator.consumesReadBases() || (elementStart != start && elementStart != stop)) {
                newCigar.add(new CigarElement(element.getLength(), operator));
            }
        } else {    // otherwise, some or all of the element is soft-clipped
            final int unclippedLength = clipLeft ? elementEnd - stop : start - elementStart;
            final int clippedLength = element.getLength() - unclippedLength;

            if (unclippedLength <= 0) { // totally clipped
                if (operator.consumesReadBases()) {
                    newCigar.add(new CigarElement(element.getLength(), clippingOperator));
                }
            } else if (clipLeft) {
                newCigar.add(new CigarElement(clippedLength, clippingOperator));
                newCigar.add(new CigarElement(unclippedLength, operator));
            } else {
                newCigar.add(new CigarElement(unclippedLength, operator));
                newCigar.add(new CigarElement(clippedLength, clippingOperator));
            }
        }
        elementStart = elementEnd;
    }

    return newCigar.make();
}
 
Example 11
Source File: AlignmentUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Aligns reads the haplotype, and then projects this alignment of read -> hap onto the reference
 * via the alignment of haplotype (via its getCigar) method.
 *
 * @param originalRead the read we want to write aligned to the reference genome
 * @param haplotype the haplotype that the read should be aligned to, before aligning to the reference
 * @param referenceStart the start of the reference that haplotype is aligned to.  Provides global coordinate frame.
 * @param isInformative true if the read is differentially informative for one of the haplotypes
 *
 * @param aligner
 * @throws IllegalArgumentException if {@code originalRead} is {@code null} or {@code haplotype} is {@code null} or it
 *   does not have a Cigar or the {@code referenceStart} is invalid (less than 1).
 *
 * @return a GATKRead aligned to reference. Never {@code null}.
 */
public static GATKRead createReadAlignedToRef(final GATKRead originalRead,
                                              final Haplotype haplotype,
                                              final Haplotype refHaplotype,
                                              final int referenceStart,
                                              final boolean isInformative,
                                              final SmithWatermanAligner aligner) {
    Utils.nonNull(originalRead);
    Utils.nonNull(haplotype);
    Utils.nonNull(refHaplotype);
    Utils.nonNull(haplotype.getCigar());
    Utils.nonNull(aligner);
    if ( referenceStart < 1 ) { throw new IllegalArgumentException("reference start much be >= 1 but got " + referenceStart); }

    // compute the smith-waterman alignment of read -> haplotype
    final SmithWatermanAlignment readToHaplotypeSWAlignment = aligner.align(haplotype.getBases(), originalRead.getBases(), CigarUtils.ALIGNMENT_TO_BEST_HAPLOTYPE_SW_PARAMETERS, SWOverhangStrategy.SOFTCLIP);
    if ( readToHaplotypeSWAlignment.getAlignmentOffset() == -1 ) {
        // sw can fail (reasons not clear) so if it happens just don't realign the read
        return originalRead;
    }

    final Cigar swCigar = new CigarBuilder().addAll(readToHaplotypeSWAlignment.getCigar()).make();

    // since we're modifying the read we need to clone it
    final GATKRead read = originalRead.copy();

    // only informative reads are given the haplotype tag to enhance visualization
    if ( isInformative ) {
        read.setAttribute(HAPLOTYPE_TAG, haplotype.hashCode());
    }

    // compute here the read starts w.r.t. the reference from the SW result and the hap -> ref cigar
    final Cigar rightPaddedHaplotypeVsRefCigar = haplotype.getConsolidatedPaddedCigar(1000);

    // this computes the number of reference bases before the read starts, based on the haplotype vs ref cigar
    // This cigar corresponds exactly to the readToRefCigarRaw, below.  One might wonder whether readToRefCigarRaw and
    // readToRefCigarClean ever imply different starts, which could occur if if the former has a leading deletion.  However,
    // according to the logic of applyCigarToCigar, this can only happen if the read has a leading deletion wrt its best haplotype,
    // which our SW aligner won't do, or if the read starts on a haplotype base that is in a deletion wrt to reference, which is nonsensical
    // since a base that exists is not a deletion.  Thus, there is nothing to worry about, in contrast to below where we do check
    // whether left-alignment shifted the start position.
    final int readStartOnReferenceHaplotype = readStartOnReferenceHaplotype(rightPaddedHaplotypeVsRefCigar, readToHaplotypeSWAlignment.getAlignmentOffset());


    //final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnHaplotype;

    final int readStartOnReference = referenceStart + haplotype.getAlignmentStartHapwrtRef() + readStartOnReferenceHaplotype;

    // compute the read -> ref alignment by mapping read -> hap -> ref from the
    // SW of read -> hap mapped through the given by hap -> ref

    // this is the sub-cigar of the haplotype-to-ref alignment, with cigar elements before the read start removed.  Elements after the read end are kept.
    final Cigar haplotypeToRef = trimCigarByBases(rightPaddedHaplotypeVsRefCigar, readToHaplotypeSWAlignment.getAlignmentOffset(), rightPaddedHaplotypeVsRefCigar.getReadLength() - 1).getCigar();

    final Cigar readToRefCigar = applyCigarToCigar(swCigar, haplotypeToRef);
    final CigarBuilder.Result leftAlignedReadToRefCigarResult = leftAlignIndels(readToRefCigar, refHaplotype.getBases(), originalRead.getBases(), readStartOnReferenceHaplotype);
    final Cigar leftAlignedReadToRefCigar = leftAlignedReadToRefCigarResult.getCigar();
    // it's possible that left-alignment shifted a deletion to the beginning of a read and removed it, shifting the first aligned base to the right
    read.setPosition(read.getContig(), readStartOnReference + leftAlignedReadToRefCigarResult.getLeadingDeletionBasesRemoved());

    // the SW Cigar does not contain the hard clips of the original read
    final Cigar originalCigar = originalRead.getCigar();
    final CigarElement firstElement = originalCigar.getFirstCigarElement();
    final CigarElement lastElement = originalCigar.getLastCigarElement();
    final List<CigarElement> readToRefCigarElementsWithHardClips = new ArrayList<>();
    if (firstElement.getOperator() == CigarOperator.HARD_CLIP) {
        readToRefCigarElementsWithHardClips.add(firstElement);
    }
    readToRefCigarElementsWithHardClips.addAll(leftAlignedReadToRefCigar.getCigarElements());
    if (lastElement.getOperator() == CigarOperator.HARD_CLIP) {
        readToRefCigarElementsWithHardClips.add(lastElement);
    }

    read.setCigar(new Cigar(readToRefCigarElementsWithHardClips));

    if ( leftAlignedReadToRefCigar.getReadLength() != read.getLength() ) {
        throw new GATKException("Cigar " + leftAlignedReadToRefCigar + " with read length " + leftAlignedReadToRefCigar.getReadLength()
                + " != read length " + read.getLength() + " for read " + read.toString() + "\nhapToRef " + haplotypeToRef + " length " + haplotypeToRef.getReadLength() + "/" + haplotypeToRef.getReferenceLength()
                + "\nreadToHap " + swCigar + " length " + swCigar.getReadLength() + "/" + swCigar.getReferenceLength());
    }

    return read;
}
 
Example 12
Source File: ReadClipperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private boolean cigarHasElementsDifferentThanInsertionsAndHardClips(final Cigar cigar) {
    for (final CigarElement cigarElement : cigar.getCigarElements())
        if (cigarElement.getOperator() != CigarOperator.INSERTION && cigarElement.getOperator() != CigarOperator.HARD_CLIP)
            return true;
    return false;
}