Java Code Examples for htsjdk.samtools.CigarOperator#DELETION

The following examples show how to use htsjdk.samtools.CigarOperator#DELETION . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Checks if cigar starts with a deletion (ignoring any clips at the beginning).
 */
private static boolean startsOrEndsWithDeletionIgnoringClips(final List<CigarElement> elems) {

    for (final boolean leftSide : new boolean[] {true, false}) {
        for (final CigarElement elem : leftSide ? elems : Lists.reverse(elems)) {
            final CigarOperator op = elem.getOperator();
            if (op == CigarOperator.DELETION) { //first non-clipping is deletion
                return true;
            } else if (!op.isClipping()) {  // first non-clipping is non deletion
                break;
            }
        }
    }

    return false;
}
 
Example 2
Source File: ReadClassifier.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void checkBigIndel( final List<CigarElement> cigarElements,
                           final GATKRead read,
                           final List<BreakpointEvidence> evidenceList ) {
    int locus = read.getStart();
    for ( final CigarElement ele : cigarElements ) {
        final CigarOperator op = ele.getOperator();
        if ( ele.getLength() >= MIN_INDEL_LEN ) {
            if ( op == CigarOperator.INSERTION ) {
                evidenceList.add(new BreakpointEvidence.LargeIndel(read, readMetadata, locus));
            } else if ( op == CigarOperator.DELETION ) {
                evidenceList.add(new BreakpointEvidence.LargeIndel(read, readMetadata, locus+ele.getLength()/2));
            }
        }
        if ( op.consumesReferenceBases() ) {
            locus += ele.getLength();
        }
    }
}
 
Example 3
Source File: BaseRecalibrationEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
protected static boolean[] calculateKnownSites( final GATKRead read, final Iterable<? extends Locatable> knownSites ) {
    final int readLength = read.getLength();
    final boolean[] knownSitesArray = new boolean[readLength];//initializes to all false
    final Cigar cigar = read.getCigar();
    final int softStart = read.getSoftStart();
    final int softEnd = read.getSoftEnd();
    for ( final Locatable knownSite : knownSites ) {
        if (knownSite.getEnd() < softStart || knownSite.getStart() > softEnd) {
            // knownSite is outside clipping window for the read, ignore
            continue;
        }
        final Pair<Integer, CigarOperator> featureStartAndOperatorOnRead = ReadUtils.getReadIndexForReferenceCoordinate(read, knownSite.getStart());
        int featureStartOnRead = featureStartAndOperatorOnRead.getLeft() == ReadUtils.READ_INDEX_NOT_FOUND ? 0 : featureStartAndOperatorOnRead.getLeft();
        if (featureStartAndOperatorOnRead.getRight() == CigarOperator.DELETION) {
            featureStartOnRead--;
        }

        final Pair<Integer, CigarOperator> featureEndAndOperatorOnRead = ReadUtils.getReadIndexForReferenceCoordinate(read, knownSite.getEnd());
        int featureEndOnRead = featureEndAndOperatorOnRead.getLeft() == ReadUtils.READ_INDEX_NOT_FOUND ? readLength : featureEndAndOperatorOnRead.getLeft();

        if( featureStartOnRead > readLength ) {
            featureStartOnRead = featureEndOnRead = readLength;
        }

        Arrays.fill(knownSitesArray, Math.max(0, featureStartOnRead), Math.min(readLength, featureEndOnRead + 1), true);
    }
    return knownSitesArray;
}
 
Example 4
Source File: CigarBuilder.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Cigar make(final boolean allowEmpty) {
    Utils.validate(!(section == Section.LEFT_SOFT_CLIP && cigarElements.get(0).getOperator() == CigarOperator.SOFT_CLIP), "cigar is completely soft-clipped");
    trailingDeletionBasesRemovedInMake = 0;
    if (removeDeletionsAtEnds && lastOperator == CigarOperator.DELETION) {
        trailingDeletionBasesRemovedInMake = cigarElements.get(cigarElements.size() - 1).getLength();
        cigarElements.remove(cigarElements.size() - 1);
    } else if (removeDeletionsAtEnds && lastTwoElementsWereDeletionAndInsertion()) {
        trailingDeletionBasesRemovedInMake = cigarElements.get(cigarElements.size() - 2).getLength();
        cigarElements.remove(cigarElements.size() - 2);
    }
    Utils.validate(allowEmpty || !cigarElements.isEmpty(), "No cigar elements left after removing leading and trailing deletions.");
    return new Cigar(cigarElements);    // removing flanking deletions may cause an empty cigar to be output.  We do not throw an error or return null.
}
 
Example 5
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Checks if cigar has consecutive I/D elements.
 */
private static boolean hasConsecutiveIndels(final List<CigarElement> elems) {
    boolean prevIndel = false;
    for (final CigarElement elem : elems) {
        final CigarOperator op = elem.getOperator();
        final boolean isIndel = (op == CigarOperator.INSERTION || op == CigarOperator.DELETION);
        if (prevIndel && isIndel) {
            return true;
        }
        prevIndel = isIndel;
    }
    return false;
}
 
Example 6
Source File: RealignmentEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static boolean supportsVariant(final GATKRead read, final VariantContext vc, int indelStartTolerance) {
    final byte[] readBases = read.getBasesNoCopy();

    final Pair<Integer, CigarOperator> offsetAndOperatorInRead = ReadUtils.getReadIndexForReferenceCoordinate(read, vc.getStart());

    if ( offsetAndOperatorInRead.getLeft() == ReadUtils.READ_INDEX_NOT_FOUND) {
        return false;
    } else if (vc.isSNP() && offsetAndOperatorInRead.getRight() == CigarOperator.DELETION) {
        return false;
    }

    final int variantPositionInRead = offsetAndOperatorInRead.getLeft();

    for (final Allele allele : vc.getAlternateAlleles()) {
        final int referenceLength = vc.getReference().length();
        if (allele.length() == referenceLength) {   // SNP or MNP -- check whether read bases match variant allele bases
            if (allele.basesMatch(ArrayUtils.subarray(readBases, variantPositionInRead, Math.min(variantPositionInRead + allele.length(), readBases.length)))) {
                return true;
            }
        } else {    // indel -- check if the read has the right CIGAR operator near position -- we don't require exact an exact match because indel representation is non-unique
            final boolean isDeletion = allele.length() < referenceLength;
            int readPosition = 0;    // offset by 1 because the variant position is one base before the first indel base
            for (final CigarElement cigarElement : read.getCigarElements()) {
                if (Math.abs(readPosition - variantPositionInRead) <= indelStartTolerance) {
                    if (isDeletion && mightSupportDeletion(cigarElement) || !isDeletion && mightSupportInsertion(cigarElement)) {
                        return true;
                    }
                } else {
                    readPosition += cigarElement.getLength();
                }
            }
        }
    }
    return false;
}
 
Example 7
Source File: IndelShifter.java    From abra2 with MIT License 4 votes vote down vote up
public Cigar shiftAllIndelsLeft(int refStart, int refEnd, String chromosome, Cigar cigar, String seq, CompareToReference2 c2r) {
	
	try {
	
		List<CigarElement> elems = new ArrayList<CigarElement>(cigar.getCigarElements());
		
		int elemSize = elems.size();
		
		for (int i=1; i<elemSize-1; i++) {
			
			int refOffset = 0;
			int readOffset = 0;
			
			for (int j=0; j<i-1; j++) {
				refOffset += getRefOffset(elems.get(j));
				readOffset += getReadOffset(elems.get(j));
			}
			
			CigarElement prev = elems.get(i-1);
			CigarElement elem = elems.get(i);
			CigarElement next = elems.get(i+1);
			
			if ((elem.getOperator() == CigarOperator.DELETION || elem.getOperator() == CigarOperator.INSERTION) &&
				prev.getOperator() == CigarOperator.MATCH_OR_MISMATCH && next.getOperator() == CigarOperator.MATCH_OR_MISMATCH) {
				
				// subset cigar here and attempt to shift
				Cigar subCigar = new Cigar(Arrays.asList(prev, elem, next));
				String subSeq = seq.substring(readOffset, readOffset+subCigar.getReadLength());
				Cigar newCigar = shiftIndelsLeft(refStart + refOffset, refStart + refOffset + subCigar.getReferenceLength(),
						chromosome, subCigar, subSeq, c2r);
				
				//TODO: Merge abutting indels if applicable
				elems.set(i-1, newCigar.getCigarElement(0));
				elems.set(i, newCigar.getCigarElement(1));
				if (newCigar.getCigarElements().size() == 3) {
					elems.set(i+1, newCigar.getCigarElement(2));
				} else {
					elems.remove(i+1);
					elemSize -= 1;
				}
			}
		}
		
		mergeAbuttingElements(elems);
		return new Cigar(elems);
	} catch (RuntimeException e) {
		e.printStackTrace();
		Logger.error("Error on: " + refStart + ", " + refEnd + "," + chromosome + ", " + cigar + ", " + seq);
		throw e;
	}
}
 
Example 8
Source File: CigarBuilder.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private boolean lastTwoElementsWereDeletionAndInsertion() {
    return lastOperator == CigarOperator.INSERTION && cigarElements.size() > 1 && cigarElements.get(cigarElements.size() - 2).getOperator() == CigarOperator.DELETION;
}
 
Example 9
Source File: AlignmentUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static Pair<byte[], byte[]> getBasesAndBaseQualitiesAlignedOneToOne(final GATKRead read, final byte basePadCharacter, final byte qualityPadCharacter) {
    Utils.nonNull(read);
    // As this code is performance sensitive in the HaplotypeCaller, we elect to use the noCopy versions of these getters.
    // We can do this because we don't mutate base or quality arrays in this method or in its accessors
    final byte[] bases = read.getBasesNoCopy();
    final byte[] baseQualities = read.getBaseQualitiesNoCopy();
    final int numCigarElements = read.numCigarElements();
    boolean sawIndel = false;

    // Check if the cigar contains indels
    // Note that we don't call ContainsOperator() here twice to avoid the performance hit of building stream iterators twice
    for (int i = 0; i < numCigarElements; i++) {
        final CigarOperator e = read.getCigarElement(i).getOperator();
        if (e == CigarOperator.INSERTION || e == CigarOperator.DELETION) {
            sawIndel = true;
            break;
        }
    }
    if (!sawIndel) {
        return new ImmutablePair<>(bases, baseQualities);
    }
    else {
        final int numberRefBasesIncludingSoftclips = CigarUtils.countRefBasesAndSoftClips(read.getCigarElements(), 0, numCigarElements);
        final byte[] paddedBases = new byte[numberRefBasesIncludingSoftclips];
        final byte[] paddedBaseQualities = new byte[numberRefBasesIncludingSoftclips];
        int literalPos = 0;
        int paddedPos = 0;
        for ( int i = 0; i < numCigarElements; i++ ) {
            final CigarElement ce = read.getCigarElement(i);
            final CigarOperator co = ce.getOperator();
            if (co.consumesReadBases()) {
                if (!co.consumesReferenceBases()) {
                    literalPos += ce.getLength();  //skip inserted bases
                }
                else {
                    System.arraycopy(bases, literalPos, paddedBases, paddedPos, ce.getLength());
                    System.arraycopy(baseQualities, literalPos, paddedBaseQualities, paddedPos, ce.getLength());
                    literalPos += ce.getLength();
                    paddedPos += ce.getLength();
                }
            }
            else if (co.consumesReferenceBases()) {
                for ( int j = 0; j < ce.getLength(); j++ ) {  //pad deleted bases
                    paddedBases[paddedPos] = basePadCharacter;
                    paddedBaseQualities[paddedPos] = qualityPadCharacter;
                    paddedPos++;
                }
            }
        }
        return new ImmutablePair<>(paddedBases, paddedBaseQualities);
    }
}
 
Example 10
Source File: Utils.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * A rip off samtools bam_md.c
 * 
 * @param record
 * @param ref
 * @param calcMD
 * @param calcNM
 */
public static void calculateMdAndNmTags(SAMRecord record, byte[] ref, boolean calcMD, boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	Cigar cigar = record.getCigar();
	List<CigarElement> cigarElements = cigar.getCigarElements();
	byte[] seq = record.getReadBases();
	int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	StringBuffer str = new StringBuffer();

	int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		CigarElement ce = cigarElements.get(i);
		int j, l = ce.getLength();
		CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < l; ++j) {
				int z = y + j;

				if (ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j];

				if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j]);
					u = 0;
					++nm;
				}
			}
			if (j < l)
				break;
			x += l;
			y += l;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < l; ++j) {
				if (ref[x + j] == 0)
					break;
				str.appendCodePoint(ref[x + j]);
			}
			u = 0;
			if (j < l)
				break;
			x += l;
			nm += l;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += l;
			if (op == CigarOperator.INSERTION)
				nm += l;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += l;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}
 
Example 11
Source File: AlignmentsTags.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags}
 * but allows for reference excerpt instead of a full reference sequence.
 * 
 * @param record
 *            SAMRecord to inject NM and MD tags into
 * @param ref
 *            reference sequence bases, may be a subsequence starting at
 *            refOffest
 * @param refOffset
 *            array offset of the reference bases, 0 is the same as the
 *            whole sequence. This value will be subtracted from the
 *            reference array index in calculations.
 * @param calcMD
 *            calculate MD tag if true
 * @param calcNM
 *            calculate NM tag if true
 */
public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset,
		final boolean calcMD, final boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	final Cigar cigar = record.getCigar();
	final List<CigarElement> cigarElements = cigar.getCigarElements();
	final byte[] seq = record.getReadBases();
	final int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	final StringBuilder str = new StringBuilder();

	final int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		final CigarElement ce = cigarElements.get(i);
		int j;
		final int length = ce.getLength();
		final CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < length; ++j) {
				final int z = y + j;

				if (refOffset + ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j - refOffset];

				if ((c1 == c2) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j - refOffset]);
					u = 0;
					++nm;
				}
			}
			if (j < length)
				break;
			x += length;
			y += length;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < length; ++j) {
				if (ref[x + j - refOffset] == 0)
					break;
				str.appendCodePoint(ref[x + j - refOffset]);
			}
			u = 0;
			if (j < length)
				break;
			x += length;
			nm += length;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += length;
			if (op == CigarOperator.INSERTION)
				nm += length;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += length;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}