Java Code Examples for htsjdk.samtools.CigarOperator#N

The following examples show how to use htsjdk.samtools.CigarOperator#N . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ReadRecord.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public static int calcFragmentLength(final ReadRecord read1, final ReadRecord read2)
{
    int insertSize = abs(read1.fragmentInsertSize());

    if(!read1.containsSplit() && !read2.containsSplit())
        return insertSize;

    // find unique split lengths and remove them

    List<Integer> splitLengths = read1.Cigar.getCigarElements().stream()
            .filter(x -> x.getOperator() == CigarOperator.N).map(x -> x.getLength()).collect(Collectors.toList());

    for(final CigarElement element : read2.Cigar.getCigarElements())
    {
        if(element.getOperator() == CigarOperator.N && !splitLengths.contains(element.getLength()))
            splitLengths.add(element.getLength());
    }

    int totalSplitLength = splitLengths.stream().mapToInt(x -> x).sum();

    return insertSize - totalSplitLength;
}
 
Example 2
Source File: NDNCigarReadTransformer.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Refactor cigar strings that contain N-D-N elements to one N element (with total length of the three refactored elements).
 */
@VisibleForTesting
protected static Cigar refactorNDNtoN(final Cigar originalCigar) {
    final Cigar refactoredCigar = new Cigar();
    final int cigarLength = originalCigar.numCigarElements();
    for(int i = 0; i < cigarLength; i++){
        final CigarElement element = originalCigar.getCigarElement(i);
        if(element.getOperator() == CigarOperator.N && thereAreAtLeast2MoreElements(i,cigarLength)){
            final CigarElement nextElement = originalCigar.getCigarElement(i+1);
            final CigarElement nextNextElement = originalCigar.getCigarElement(i+2);

            // if it is N-D-N replace with N (with the total length) otherwise just add the first N.
            if(nextElement.getOperator() == CigarOperator.D && nextNextElement.getOperator() == CigarOperator.N){
                final int threeElementsLength = element.getLength() + nextElement.getLength() + nextNextElement.getLength();
                final CigarElement refactoredElement = new CigarElement(threeElementsLength,CigarOperator.N);
                refactoredCigar.add(refactoredElement);
                i += 2; //skip the elements that were refactored
            } else {
                refactoredCigar.add(element);  // add only the first N
            }
        } else {
            refactoredCigar.add(element);  // add any non-N element
        }
    }
    return refactoredCigar;
}
 
Example 3
Source File: CadabraProcessor.java    From abra2 with MIT License 5 votes vote down vote up
private IndelInfo checkForIndelAtLocus(int alignmentStart, Cigar cigar, int refPos) {
	
	IndelInfo ret = null;
	
	int readIdx = 0;
	int currRefPos = alignmentStart;
	for (CigarElement element : cigar.getCigarElements()) {
		if (element.getOperator() == CigarOperator.M) {
			readIdx += element.getLength();
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.I) {
			if (currRefPos == refPos+1) {
				ret = new IndelInfo(element, readIdx);
				break;
			}
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.D) {
			if (currRefPos == refPos+1) {
				ret = new IndelInfo(element, readIdx);
				break;
			}				
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.S) {
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.N) {
			currRefPos += element.getLength();
		}
		
		if (currRefPos > refPos+1) {
			break;
		}
	}
	
	return ret;
}
 
Example 4
Source File: SimpleAlleleCounter.java    From abra2 with MIT License 5 votes vote down vote up
private IndelInfo checkForIndelAtLocus(int alignmentStart, Cigar cigar, int refPos) {
	
	IndelInfo ret = null;
	
	int readIdx = 0;
	int currRefPos = alignmentStart;
	for (CigarElement element : cigar.getCigarElements()) {
		if (element.getOperator() == CigarOperator.M) {
			readIdx += element.getLength();
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.I) {
			if (currRefPos == refPos+1) {
				ret = new IndelInfo(element, readIdx);
				break;
			}
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.D) {
			if (currRefPos == refPos+1) {
				ret = new IndelInfo(element, readIdx);
				break;
			}				
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.S) {
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.N) {
			currRefPos += element.getLength();
		}
		
		if (currRefPos > refPos+1) {
			break;
		}
	}
	
	return ret;
}
 
Example 5
Source File: NativeAssembler.java    From abra2 with MIT License 5 votes vote down vote up
private int countGaps(Cigar cigar) {
	int count = 0;
	for (CigarElement elem : cigar.getCigarElements()) {
		if (elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.I || elem.getOperator() == CigarOperator.N) {
			count += 1;
		}
	}
	
	return count;
}
 
Example 6
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
public static int getNumSplices(SAMRecord read) {
	int splices = 0;
	
	for (CigarElement element : read.getCigar().getCigarElements()) {
		if (element.getOperator() == CigarOperator.N) {
			splices += 1;
		}
	}
	
	return splices;
}
 
Example 7
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
/**
 * Return the number of insertions, deletions and introns in a SAMRecord 
 */
public static int getNumGaps(SAMRecord read) {
	int numGaps = 0;
	
	for (CigarElement element : read.getCigar().getCigarElements()) {
		if ((element.getOperator() == CigarOperator.D) || (element.getOperator() == CigarOperator.I) || 
			(element.getOperator() == CigarOperator.N)) {
			numGaps += 1;
		}
	}
	
	return numGaps;
}
 
Example 8
Source File: CigarModifierTest.java    From VarDictJava with MIT License 5 votes vote down vote up
@Test(dataProvider = "cigar")
public void getCigarOperatorTest(Object cigarObject) {
    Cigar cigar = (Cigar) cigarObject;
    CigarOperator[] operators = new CigarOperator[] {
            CigarOperator.M,
            CigarOperator.S,
            CigarOperator.I,
            CigarOperator.D,
            CigarOperator.N,
            CigarOperator.H,
    };
    for (int i = 0; i < cigar.numCigarElements(); i++) {
        Assert.assertEquals(CigarParser.getCigarOperator(cigar, i), operators[i]);
    }
}
 
Example 9
Source File: LocusIteratorByStateBaseTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private LIBSTest makePermutationTest(final List<CigarElement> elements) {
    CigarElement last = null;
    boolean hasMatch = false;

    // starts with D => bad
    if ( startsWithDeletion(elements) )
        return null;

    // ends with D => bad
    if ( elements.get(elements.size()-1).getOperator() == CigarOperator.D )
        return null;

    // make sure it's valid
    String cigar = "";
    for ( final CigarElement ce : elements ) {
        if ( ce.getOperator() == CigarOperator.N )
            return null; // TODO -- don't support N

        // abort on a bad cigar
        if ( last != null ) {
            if ( ce.getOperator() == last.getOperator() )
                return null;
            if ( isIndel(ce) && isIndel(last) )
                return null;
        }

        cigar += ce.getLength() + ce.getOperator().toString();
        last = ce;
        hasMatch = hasMatch || ce.getOperator() == CigarOperator.M;
    }

    if ( ! hasMatch && elements.size() == 1 &&
            ! (last.getOperator() == CigarOperator.I || last.getOperator() == CigarOperator.S))
        return null;

    return new LIBSTest(cigar);
}
 
Example 10
Source File: ReadRecord.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
public static final List<int[]> generateMappedCoords(final Cigar cigar, int posStart)
{
    final List<int[]> mappedCoords = Lists.newArrayList();

    // first establish whether the read is split across 2 distant regions, and if so which it maps to
    int posOffset = 0;
    boolean continueRegion = false;

    for(CigarElement element : cigar.getCigarElements())
    {
        if(element.getOperator() == CigarOperator.S)
        {
            // nothing to skip
        }
        else if(element.getOperator() == D)
        {
            posOffset += element.getLength();
            continueRegion = true;
        }
        else if(element.getOperator() == CigarOperator.I)
        {
            // nothing to skip
            continueRegion = true;
        }
        else if(element.getOperator() == CigarOperator.N)
        {
            posOffset += element.getLength();
            continueRegion = false;
        }
        else if(element.getOperator() == CigarOperator.M)
        {
            int readStartPos = posStart + posOffset;
            int readEndPos = readStartPos + element.getLength() - 1;

            if(continueRegion && !mappedCoords.isEmpty())
            {
                int[] lastRegion = mappedCoords.get(mappedCoords.size() - 1);
                lastRegion[SE_END] = readEndPos;
            }
            else
            {
                mappedCoords.add(new int[] { readStartPos, readEndPos });
            }

            posOffset += element.getLength();
            continueRegion = false;
        }
    }

    return mappedCoords;
}
 
Example 11
Source File: ReAligner.java    From abra2 with MIT License 4 votes vote down vote up
private List<Feature> getExtraJunctions(ContigAlignerResult result, List<Feature> junctions, List<Feature> junctions2) {
	// Look for junctions with adjacent deletions.
	// Treat these as putative novel junctions.
	Set<Feature> junctionSet = new HashSet<Feature>(junctions);
	junctionSet.addAll(junctions2);
	List<Feature> extraJunctions = new ArrayList<Feature>();
	
	Cigar cigar = TextCigarCodec.decode(result.getCigar());
	
	boolean isInGap = false;
	int gapStart = -1;
	int gapLength = -1;
	int numElems = -1;
	int refOffset = 0;
	
	for (CigarElement elem : cigar.getCigarElements()) {
		if (elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.N) {
			if (!isInGap) {
				isInGap = true;
				gapStart = refOffset;
				gapLength = elem.getLength();
				numElems = 1;
			} else {
				gapLength += elem.getLength();
				numElems += 1;
			}
		} else {
			if (isInGap) {
				
				if (numElems > 1) {
					long start = result.getGenomicPos() + gapStart;
					long end = start + gapLength;
					Feature junc = new Feature(result.getChromosome(), start, end-1);
					if (!junctionSet.contains(junc)) {
						Logger.info("Extra junction idenfified: %s", junc);
						extraJunctions.add(junc);
					}
				}
				
				isInGap = false;
				gapStart = -1;
				gapLength = -1;
				numElems = -1;
			}
		}
		
		if (elem.getOperator() == CigarOperator.M || 
			elem.getOperator() == CigarOperator.D || 
			elem.getOperator() == CigarOperator.N ||
			elem.getOperator() == CigarOperator.X ||
			elem.getOperator() == CigarOperator.EQ) {
			
			refOffset += elem.getLength();
		}
	}
	
	return extraJunctions;
}
 
Example 12
Source File: ReAligner.java    From abra2 with MIT License 4 votes vote down vote up
private List<Feature> getExonSkippingJunctions(ContigAlignerResult result, List<Feature> junctions) {

		// Handles special case where an exon skipping junction causes large gaps with a tiny (~1)
		// number of bases mapped somewhere in the gap
		Set<Feature> junctionSet = new HashSet<Feature>(junctions);
		List<Feature> extraJunctions = new ArrayList<Feature>();
		
		Cigar cigar = TextCigarCodec.decode(result.getCigar());
		
		boolean isInGap = false;
		int gapStart = -1;
		int gapLength = -1;
		int numElems = -1;
		int refOffset = 0;
		
		int maxBasesInMiddle = 5;
		int middleBases = -1;
		
		CigarOperator prev = CigarOperator.M;
		
		for (CigarElement elem : cigar.getCigarElements()) {
			if (elem.getOperator() == CigarOperator.D || elem.getOperator() == CigarOperator.N) {
				if (!isInGap) {
					isInGap = true;
					gapStart = refOffset;
					gapLength = elem.getLength();
					numElems = 1;
					middleBases = 0;
				} else {
					gapLength += elem.getLength();
					numElems += 1;
				}
			} else {
				if (isInGap) {
					
					if (elem.getLength() + middleBases < maxBasesInMiddle) {
						middleBases += elem.getLength();
					} else {
						
						if (numElems > 1 && middleBases > 0 && (prev == CigarOperator.D || prev == CigarOperator.N)) {
							long start = result.getGenomicPos() + gapStart;
							long end = start + gapLength;
							
							// Find junction start / end points closest to gap start / end point
							long closestStart = junctions.get(0).getStart();
							long closestEnd = junctions.get(0).getEnd();
							for (Feature junction : junctions) {
								if (Math.abs(junction.getStart()-start) < Math.abs(closestStart-start)) {
									closestStart = junction.getStart();
								}

								if (Math.abs(junction.getEnd()-end) < Math.abs(closestEnd-end)) {
									closestEnd = junction.getEnd();
								}
							}
							
							if (closestEnd > closestStart+1) {
								Feature junc = new Feature(result.getChromosome(), closestStart, closestEnd);
								if (!junctionSet.contains(junc)) {
									Logger.info("Potential exon skipping junction idenfified: %s", junc);
									extraJunctions.add(junc);
								}
							}
						}
						
						isInGap = false;
						gapStart = -1;
						gapLength = -1;
						numElems = -1;
						middleBases = -1;
					}
				}
			}
			
			if (elem.getOperator() == CigarOperator.M || 
				elem.getOperator() == CigarOperator.D || 
				elem.getOperator() == CigarOperator.N ||
				elem.getOperator() == CigarOperator.X ||
				elem.getOperator() == CigarOperator.EQ) {
				
				refOffset += elem.getLength();
			}
			
			prev = elem.getOperator();
		}
		
		return extraJunctions;
	}
 
Example 13
Source File: LocusIteratorByState.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Creates the next alignment context from the given state.  Note that this is implemented as a
 * lazy load method. nextAlignmentContext MUST BE null in order for this method to advance to the
 * next entry.
 */
private void lazyLoadNextAlignmentContext() {
    while (nextAlignmentContext == null && readStates.hasNext()) {
        readStates.collectPendingReads();

        final Locatable location = getLocation();

        // We don't need to keep the pileup elements separated by sample within this method,
        // since they are just going to get combined into one monolithic pileup anyway
        // when we construct the final ReadPileup below. This optimization speeds up the
        // HaplotypeCaller by quite a bit!
        final List<PileupElement> allPileupElements = new ArrayList<>(100);

        for (final Map.Entry<String, PerSampleReadStateManager> sampleStatePair : readStates) {
            final PerSampleReadStateManager readState = sampleStatePair.getValue();
            final Iterator<AlignmentStateMachine> iterator = readState.iterator();

            while (iterator.hasNext()) {
                // state object with the read/offset information
                final AlignmentStateMachine state = iterator.next();
                final GATKRead read = state.getRead();
                final CigarOperator op = state.getCigarOperator();

                if (!includeReadsWithNsAtLoci && op == CigarOperator.N) {
                    continue;
                }

                if (!dontIncludeReadInPileup(read, location.getStart())) {
                    if (!includeReadsWithDeletionAtLoci && op == CigarOperator.D) {
                        continue;
                    }

                    allPileupElements.add(state.makePileupElement());
                }
            }
        }

        readStates.updateReadStates(); // critical - must be called after we get the current state offsets and location
        if (!allPileupElements.isEmpty()) { // if we got reads with non-D/N over the current position, we are done
            nextAlignmentContext = new AlignmentContext(location, new ReadPileup(location, allPileupElements));
        }
    }
}