Java Code Examples for htsjdk.samtools.CigarOperator#D

The following examples show how to use htsjdk.samtools.CigarOperator#D . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LocusIteratorByStateUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider(name = "IndelLengthAndBasesTest")
public Object[][] makeIndelLengthAndBasesTest() {
    final String EVENT_BASES = "ACGTACGTACGT";
    final List<Object[]> tests = new LinkedList<>();

    for ( int eventSize = 1; eventSize < 10; eventSize++ ) {
        for ( final CigarOperator indel : Arrays.asList(CigarOperator.D, CigarOperator.I) ) {
            final String cigar = String.format("2M%d%s1M", eventSize, indel.toString());
            final String eventBases = indel == CigarOperator.D ? "" : EVENT_BASES.substring(0, eventSize);
            final int readLength = 3 + eventBases.length();

            final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "read", 0, 1, readLength);
            read.setBases(("TT" + eventBases + "A").getBytes());
            final byte[] quals = new byte[readLength];
            for ( int i = 0; i < readLength; i++ )
                quals[i] = (byte)(i % QualityUtils.MAX_SAM_QUAL_SCORE);
            read.setBaseQualities(quals);
            read.setCigar(cigar);

            tests.add(new Object[]{read, indel, eventSize, eventBases.equals("") ? null : eventBases});
        }
    }

    return tests.toArray(new Object[][]{});
}
 
Example 2
Source File: CollectSamErrorMetricsTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "provideForTestHasDeletionBeenProcessed")
public void testHasDeletionBeenProcessed(final String cigarString, final List<Boolean> expected) {
    final Cigar cigar = TextCigarCodec.decode(cigarString);
    final SAMRecord deletionRecord = createRecordFromCigar(cigar.toString());

    final CollectSamErrorMetrics collectSamErrorMetrics = new CollectSamErrorMetrics();
    final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr1", 2000);

    int position = 100;
    int iExpected = 0;

    for(CigarElement cigarElement : cigar.getCigarElements()) {
        for(int iCigarElementPosition = 0; iCigarElementPosition < cigarElement.getLength(); iCigarElementPosition++) {
            final SamLocusIterator.LocusInfo locusInfo = new SamLocusIterator.LocusInfo(samSequenceRecord, ++position);
            if(cigarElement.getOperator() == CigarOperator.D) {
                Assert.assertEquals(collectSamErrorMetrics.processDeletionLocus(new SamLocusIterator.RecordAndOffset(deletionRecord, 0), locusInfo), (boolean) expected.get(iExpected++));
            }
        }
    }
}
 
Example 3
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
/**
 *  Returns total length of deletions and insertions for the input read. 
 */
public static int getNumIndelBases(SAMRecord read) {
	int numIndelBases = 0;
	
	for (CigarElement element : read.getCigar().getCigarElements()) {
		if ((element.getOperator() == CigarOperator.D) || (element.getOperator() == CigarOperator.I)) {
			numIndelBases += element.getLength();
		}
	}
	
	return numIndelBases;
}
 
Example 4
Source File: AlignmentUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Removing a trailing deletion from the incoming cigar if present
 *
 * @param c the cigar we want to update
 * @return a non-null Cigar
 */
public static Cigar removeTrailingDeletions(final Cigar c) {

    final List<CigarElement> elements = c.getCigarElements();
    if ( elements.get(elements.size() - 1).getOperator() != CigarOperator.D )
        return c;

    return new Cigar(elements.subList(0, elements.size() - 1));
}
 
Example 5
Source File: SmithWatermanJavaAligner.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static CigarElement makeElement(final State state, final int length) {
    CigarOperator op = null;
    switch (state) {
        case MATCH: op = CigarOperator.M; break;
        case INSERTION: op = CigarOperator.I; break;
        case DELETION: op = CigarOperator.D; break;
        case CLIP: op = CigarOperator.S; break;
    }
    return new CigarElement(length, op);
}
 
Example 6
Source File: AlignmentUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Is the offset inside a deletion?
 *
 * @param cigar         the read's CIGAR -- cannot be null
 * @param offset        the offset into the CIGAR
 * @return true if the offset is inside a deletion, false otherwise
 */
public static boolean isInsideDeletion(final Cigar cigar, final int offset) {
    Utils.nonNull(cigar);
    if ( offset < 0 ) return false;

    // pos counts read bases
    int pos = 0;
    int prevPos = 0;

    for (final CigarElement ce : cigar.getCigarElements()) {

        switch (ce.getOperator()) {
            case I:
            case S:
            case D:
            case M:
            case EQ:
            case X:
                prevPos = pos;
                pos += ce.getLength();
                break;
            case H:
            case P:
            case N:
                break;
            default:
                throw new GATKException("Unsupported cigar operator: " + ce.getOperator());
        }

        // Is the offset inside a deletion?
        if ( prevPos < offset && pos >= offset && ce.getOperator() == CigarOperator.D ) {
            return true;

        }
    }

    return false;
}
 
Example 7
Source File: CigarModifierTest.java    From VarDictJava with MIT License 5 votes vote down vote up
@Test(dataProvider = "cigar")
public void getCigarOperatorTest(Object cigarObject) {
    Cigar cigar = (Cigar) cigarObject;
    CigarOperator[] operators = new CigarOperator[] {
            CigarOperator.M,
            CigarOperator.S,
            CigarOperator.I,
            CigarOperator.D,
            CigarOperator.N,
            CigarOperator.H,
    };
    for (int i = 0; i < cigar.numCigarElements(); i++) {
        Assert.assertEquals(CigarParser.getCigarOperator(cigar, i), operators[i]);
    }
}
 
Example 8
Source File: ReadThreadingGraph.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Actually merge the dangling tail if possible
 *
 * @param danglingTailMergeResult   the result from generating a Cigar for the dangling tail against the reference
 * @return 1 if merge was successful, 0 otherwise
 */
@VisibleForTesting
final int mergeDanglingTail(final DanglingChainMergeHelper danglingTailMergeResult) {

    final List<CigarElement> elements = danglingTailMergeResult.cigar.getCigarElements();
    final CigarElement lastElement = elements.get(elements.size() - 1);
    Utils.validateArg( lastElement.getOperator() == CigarOperator.M, "The last Cigar element must be an M");

    final int lastRefIndex = danglingTailMergeResult.cigar.getReferenceLength() - 1;
    final int matchingSuffix = Math.min(longestSuffixMatch(danglingTailMergeResult.referencePathString, danglingTailMergeResult.danglingPathString, lastRefIndex), lastElement.getLength());
    if ( matchingSuffix == 0 ) {
        return 0;
    }

    final int altIndexToMerge = Math.max(danglingTailMergeResult.cigar.getReadLength() - matchingSuffix - 1, 0);

    // there is an important edge condition that we need to handle here: Smith-Waterman correctly calculates that there is a
    // deletion, that deletion is left-aligned such that the LCA node is part of that deletion, and the rest of the dangling
    // tail is a perfect match to the suffix of the reference path.  In this case we need to push the reference index to merge
    // down one position so that we don't incorrectly cut a base off of the deletion.
    final boolean firstElementIsDeletion = elements.get(0).getOperator() == CigarOperator.D;
    final boolean mustHandleLeadingDeletionCase =  firstElementIsDeletion && (elements.get(0).getLength() + matchingSuffix == lastRefIndex + 1);
    final int refIndexToMerge = lastRefIndex - matchingSuffix + 1 + (mustHandleLeadingDeletionCase ? 1 : 0);

    // another edge condition occurs here: if Smith-Waterman places the whole tail into an insertion then it will try to
    // merge back to the LCA, which results in a cycle in the graph.  So we do not want to merge in such a case.
    if ( refIndexToMerge == 0 ) {
        return 0;
    }

    // it's safe to merge now
    addEdge(danglingTailMergeResult.danglingPath.get(altIndexToMerge), danglingTailMergeResult.referencePath.get(refIndexToMerge), ((MyEdgeFactory)getEdgeFactory()).createEdge(false, 1));

    return 1;
}
 
Example 9
Source File: CadabraProcessor.java    From abra2 with MIT License 5 votes vote down vote up
private IndelInfo checkForIndelAtLocus(int alignmentStart, Cigar cigar, int refPos) {
	
	IndelInfo ret = null;
	
	int readIdx = 0;
	int currRefPos = alignmentStart;
	for (CigarElement element : cigar.getCigarElements()) {
		if (element.getOperator() == CigarOperator.M) {
			readIdx += element.getLength();
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.I) {
			if (currRefPos == refPos+1) {
				ret = new IndelInfo(element, readIdx);
				break;
			}
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.D) {
			if (currRefPos == refPos+1) {
				ret = new IndelInfo(element, readIdx);
				break;
			}				
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.S) {
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.N) {
			currRefPos += element.getLength();
		}
		
		if (currRefPos > refPos+1) {
			break;
		}
	}
	
	return ret;
}
 
Example 10
Source File: LocusIteratorByStateBaseTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private LIBSTest makePermutationTest(final List<CigarElement> elements) {
    CigarElement last = null;
    boolean hasMatch = false;

    // starts with D => bad
    if ( startsWithDeletion(elements) )
        return null;

    // ends with D => bad
    if ( elements.get(elements.size()-1).getOperator() == CigarOperator.D )
        return null;

    // make sure it's valid
    String cigar = "";
    for ( final CigarElement ce : elements ) {
        if ( ce.getOperator() == CigarOperator.N )
            return null; // TODO -- don't support N

        // abort on a bad cigar
        if ( last != null ) {
            if ( ce.getOperator() == last.getOperator() )
                return null;
            if ( isIndel(ce) && isIndel(last) )
                return null;
        }

        cigar += ce.getLength() + ce.getOperator().toString();
        last = ce;
        hasMatch = hasMatch || ce.getOperator() == CigarOperator.M;
    }

    if ( ! hasMatch && elements.size() == 1 &&
            ! (last.getOperator() == CigarOperator.I || last.getOperator() == CigarOperator.S))
        return null;

    return new LIBSTest(cigar);
}
 
Example 11
Source File: NativeAssembler.java    From abra2 with MIT License 5 votes vote down vote up
private int firstIndelLength(Cigar cigar) {
	int len = 0;
	
	for (CigarElement elem : cigar.getCigarElements()) {
		if (elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.I) {
			len = elem.getLength();
			break;
		}
	}
	
	return len;
}
 
Example 12
Source File: IndelShifter.java    From abra2 with MIT License 5 votes vote down vote up
private int firstIndelOffset(Cigar cigar) {
	int pos = 0;
	
	for (CigarElement elem : cigar.getCigarElements()) {
		if ((elem.getOperator() == CigarOperator.D) || (elem.getOperator() == CigarOperator.I)) {
			return pos;
		} else if (elem.getOperator() != CigarOperator.S) {
			pos += elem.getLength();
		}
	}
	
	throw new IllegalArgumentException("No indel for record: [" + cigar + "]");
}
 
Example 13
Source File: IndelShifter.java    From abra2 with MIT License 5 votes vote down vote up
private boolean containsIndel(Cigar cigar) {
	for (CigarElement elem : cigar.getCigarElements()) {
		if ((elem.getOperator() == CigarOperator.D) || (elem.getOperator() == CigarOperator.I)) {
			return true;
		}
	}
	
	return false;
}
 
Example 14
Source File: SimpleAlleleCounter.java    From abra2 with MIT License 5 votes vote down vote up
private IndelInfo checkForIndelAtLocus(int alignmentStart, Cigar cigar, int refPos) {
	
	IndelInfo ret = null;
	
	int readIdx = 0;
	int currRefPos = alignmentStart;
	for (CigarElement element : cigar.getCigarElements()) {
		if (element.getOperator() == CigarOperator.M) {
			readIdx += element.getLength();
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.I) {
			if (currRefPos == refPos+1) {
				ret = new IndelInfo(element, readIdx);
				break;
			}
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.D) {
			if (currRefPos == refPos+1) {
				ret = new IndelInfo(element, readIdx);
				break;
			}				
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.S) {
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.N) {
			currRefPos += element.getLength();
		}
		
		if (currRefPos > refPos+1) {
			break;
		}
	}
	
	return ret;
}
 
Example 15
Source File: ReAligner.java    From abra2 with MIT License 4 votes vote down vote up
private List<Feature> getExonSkippingJunctions(ContigAlignerResult result, List<Feature> junctions) {

		// Handles special case where an exon skipping junction causes large gaps with a tiny (~1)
		// number of bases mapped somewhere in the gap
		Set<Feature> junctionSet = new HashSet<Feature>(junctions);
		List<Feature> extraJunctions = new ArrayList<Feature>();
		
		Cigar cigar = TextCigarCodec.decode(result.getCigar());
		
		boolean isInGap = false;
		int gapStart = -1;
		int gapLength = -1;
		int numElems = -1;
		int refOffset = 0;
		
		int maxBasesInMiddle = 5;
		int middleBases = -1;
		
		CigarOperator prev = CigarOperator.M;
		
		for (CigarElement elem : cigar.getCigarElements()) {
			if (elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.N) {
				if (!isInGap) {
					isInGap = true;
					gapStart = refOffset;
					gapLength = elem.getLength();
					numElems = 1;
					middleBases = 0;
				} else {
					gapLength += elem.getLength();
					numElems += 1;
				}
			} else {
				if (isInGap) {
					
					if (elem.getLength() + middleBases < maxBasesInMiddle) {
						middleBases += elem.getLength();
					} else {
						
						if (numElems > 1 && middleBases > 0 && (prev == CigarOperator.D || prev == CigarOperator.N)) {
							long start = result.getGenomicPos() + gapStart;
							long end = start + gapLength;
							
							// Find junction start / end points closest to gap start / end point
							long closestStart = junctions.get(0).getStart();
							long closestEnd = junctions.get(0).getEnd();
							for (Feature junction : junctions) {
								if (Math.abs(junction.getStart()-start) < Math.abs(closestStart-start)) {
									closestStart = junction.getStart();
								}

								if (Math.abs(junction.getEnd()-end) < Math.abs(closestEnd-end)) {
									closestEnd = junction.getEnd();
								}
							}
							
							if (closestEnd > closestStart+1) {
								Feature junc = new Feature(result.getChromosome(), closestStart, closestEnd);
								if (!junctionSet.contains(junc)) {
									Logger.info("Potential exon skipping junction idenfified: %s", junc);
									extraJunctions.add(junc);
								}
							}
						}
						
						isInGap = false;
						gapStart = -1;
						gapLength = -1;
						numElems = -1;
						middleBases = -1;
					}
				}
			}
			
			if (elem.getOperator() == CigarOperator.M || 
				elem.getOperator() == CigarOperator.D || 
				elem.getOperator() == CigarOperator.N ||
				elem.getOperator() == CigarOperator.X ||
				elem.getOperator() == CigarOperator.EQ) {
				
				refOffset += elem.getLength();
			}
			
			prev = elem.getOperator();
		}
		
		return extraJunctions;
	}
 
Example 16
Source File: ReAligner.java    From abra2 with MIT License 4 votes vote down vote up
private List<Feature> getExtraJunctions(ContigAlignerResult result, List<Feature> junctions, List<Feature> junctions2) {
	// Look for junctions with adjacent deletions.
	// Treat these as putative novel junctions.
	Set<Feature> junctionSet = new HashSet<Feature>(junctions);
	junctionSet.addAll(junctions2);
	List<Feature> extraJunctions = new ArrayList<Feature>();
	
	Cigar cigar = TextCigarCodec.decode(result.getCigar());
	
	boolean isInGap = false;
	int gapStart = -1;
	int gapLength = -1;
	int numElems = -1;
	int refOffset = 0;
	
	for (CigarElement elem : cigar.getCigarElements()) {
		if (elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.N) {
			if (!isInGap) {
				isInGap = true;
				gapStart = refOffset;
				gapLength = elem.getLength();
				numElems = 1;
			} else {
				gapLength += elem.getLength();
				numElems += 1;
			}
		} else {
			if (isInGap) {
				
				if (numElems > 1) {
					long start = result.getGenomicPos() + gapStart;
					long end = start + gapLength;
					Feature junc = new Feature(result.getChromosome(), start, end-1);
					if (!junctionSet.contains(junc)) {
						Logger.info("Extra junction idenfified: %s", junc);
						extraJunctions.add(junc);
					}
				}
				
				isInGap = false;
				gapStart = -1;
				gapLength = -1;
				numElems = -1;
			}
		}
		
		if (elem.getOperator() == CigarOperator.M || 
			elem.getOperator() == CigarOperator.D || 
			elem.getOperator() == CigarOperator.N ||
			elem.getOperator() == CigarOperator.X ||
			elem.getOperator() == CigarOperator.EQ) {
			
			refOffset += elem.getLength();
		}
	}
	
	return extraJunctions;
}
 
Example 17
Source File: RealignmentEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static boolean mightSupportDeletion(final CigarElement cigarElement) {
    final CigarOperator cigarOperator = cigarElement.getOperator();
    return cigarOperator == CigarOperator.D || cigarOperator == CigarOperator.S;
}
 
Example 18
Source File: LocusIteratorByState.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Creates the next alignment context from the given state.  Note that this is implemented as a
 * lazy load method. nextAlignmentContext MUST BE null in order for this method to advance to the
 * next entry.
 */
private void lazyLoadNextAlignmentContext() {
    while (nextAlignmentContext == null && readStates.hasNext()) {
        readStates.collectPendingReads();

        final Locatable location = getLocation();

        // We don't need to keep the pileup elements separated by sample within this method,
        // since they are just going to get combined into one monolithic pileup anyway
        // when we construct the final ReadPileup below. This optimization speeds up the
        // HaplotypeCaller by quite a bit!
        final List<PileupElement> allPileupElements = new ArrayList<>(100);

        for (final Map.Entry<String, PerSampleReadStateManager> sampleStatePair : readStates) {
            final PerSampleReadStateManager readState = sampleStatePair.getValue();
            final Iterator<AlignmentStateMachine> iterator = readState.iterator();

            while (iterator.hasNext()) {
                // state object with the read/offset information
                final AlignmentStateMachine state = iterator.next();
                final GATKRead read = state.getRead();
                final CigarOperator op = state.getCigarOperator();

                if (!includeReadsWithNsAtLoci && op == CigarOperator.N) {
                    continue;
                }

                if (!dontIncludeReadInPileup(read, location.getStart())) {
                    if (!includeReadsWithDeletionAtLoci && op == CigarOperator.D) {
                        continue;
                    }

                    allPileupElements.add(state.makePileupElement());
                }
            }
        }

        readStates.updateReadStates(); // critical - must be called after we get the current state offsets and location
        if (!allPileupElements.isEmpty()) { // if we got reads with non-D/N over the current position, we are done
            nextAlignmentContext = new AlignmentContext(location, new ReadPileup(location, allPileupElements));
        }
    }
}
 
Example 19
Source File: LocusIteratorByStateBaseTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private boolean isIndel(final CigarElement ce) {
    return ce.getOperator() == CigarOperator.D || ce.getOperator() == CigarOperator.I;
}
 
Example 20
Source File: SimpleCaller.java    From abra2 with MIT License 4 votes vote down vote up
private BaseInfo getBaseAtReferencePosition(SAMRecord read, int refPos) {
	boolean isNearEdge = false;
	boolean isIndel = false;
	int alignmentStart = read.getAlignmentStart();
	Cigar cigar = read.getCigar();
	
	char base = 'N';
	
	int readIdx = 0;
	int currRefPos = alignmentStart;
	
	for (CigarElement element : cigar.getCigarElements()) {
					
		if (element.getOperator() == CigarOperator.M) {
			readIdx += element.getLength();
			currRefPos += element.getLength();
			
			if (currRefPos > refPos) {  // TODO: double check end of read base here...
				
				int offset = currRefPos - refPos;
				
				if ((offset < 3) || (offset+3 >= element.getLength())) {
					// This position is within 3 bases of start/end of alignment or clipping or indel.
					// Tag this base for evaluation downstream.
					isNearEdge = true;
				}
				
				readIdx -= offset;
				if ((readIdx < 0) || (readIdx >= read.getReadBases().length)) {
					System.err.println("Read index out of bounds for read: " + read.getSAMString());
					break;
				}
				
				if (read.getBaseQualities()[readIdx] >= MIN_BASE_QUALITY) {
					base = (char) read.getReadBases()[readIdx];
				}
				break;
			}
		} else if (element.getOperator() == CigarOperator.I) {
			if (currRefPos == refPos) {
				//TODO: Handle insertions
				isIndel = true;
				break;
			}
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.D) {
			if (refPos >= currRefPos && refPos <= currRefPos+element.getLength()) {
				//TODO: Handle deletions
				isIndel = true;
				break;
			}				
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.S) {
			readIdx += element.getLength();
		}			
	}
	
	return new BaseInfo(Character.toUpperCase(base), isNearEdge, isIndel);
}