Java Code Examples for htsjdk.samtools.Cigar#add()

The following examples show how to use htsjdk.samtools.Cigar#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftCigarLeft_insertAtTail() {
	Cigar cigar = new Cigar();
	
	cigar.add(new CigarElement(40, CigarOperator.M));
	cigar.add(new CigarElement(10, CigarOperator.I));
	
	Cigar newCigar;

	newCigar = indelShifter.shiftCigarLeft(cigar, 40);
	Assert.assertEquals(newCigar.toString(), "10I40M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 39);
	Assert.assertEquals(newCigar.toString(), "1M10I39M");

	newCigar = indelShifter.shiftCigarLeft(cigar, 30);
	Assert.assertEquals(newCigar.toString(), "10M10I30M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 1);
	Assert.assertEquals(newCigar.toString(), "39M10I1M");
}
 
Example 2
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Converts a CIGAR that may contain new style operators for match and
 * mismatch into the legacy style CIGAR.
 * @param cigar the CIGAR to convert
 * @return the legacy CIGAR
 */
public static Cigar convertToLegacyCigar(Cigar cigar) {
  final Cigar cg = new Cigar();
  int count = 0;
  for (int i = 0; i < cigar.numCigarElements(); ++i) {
    final CigarElement ce = cigar.getCigarElement(i);
    if (ce.getOperator() == CigarOperator.EQ || ce.getOperator() == CigarOperator.X) {
      count += ce.getLength();
    } else {
      if (count > 0) {
        cg.add(new CigarElement(count, CigarOperator.M));
      }
      cg.add(ce);
      count = 0;
    }
  }
  if (count > 0) {
    cg.add(new CigarElement(count, CigarOperator.M));
  }
  return cg;
}
 
Example 3
Source File: NDNCigarReadTransformer.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Refactor cigar strings that contain N-D-N elements to one N element (with total length of the three refactored elements).
 */
@VisibleForTesting
protected static Cigar refactorNDNtoN(final Cigar originalCigar) {
    final Cigar refactoredCigar = new Cigar();
    final int cigarLength = originalCigar.numCigarElements();
    for(int i = 0; i < cigarLength; i++){
        final CigarElement element = originalCigar.getCigarElement(i);
        if(element.getOperator() == CigarOperator.N && thereAreAtLeast2MoreElements(i,cigarLength)){
            final CigarElement nextElement = originalCigar.getCigarElement(i+1);
            final CigarElement nextNextElement = originalCigar.getCigarElement(i+2);

            // if it is N-D-N replace with N (with the total length) otherwise just add the first N.
            if(nextElement.getOperator() == CigarOperator.D && nextNextElement.getOperator() == CigarOperator.N){
                final int threeElementsLength = element.getLength() + nextElement.getLength() + nextNextElement.getLength();
                final CigarElement refactoredElement = new CigarElement(threeElementsLength,CigarOperator.N);
                refactoredCigar.add(refactoredElement);
                i += 2; //skip the elements that were refactored
            } else {
                refactoredCigar.add(element);  // add only the first N
            }
        } else {
            refactoredCigar.add(element);  // add any non-N element
        }
    }
    return refactoredCigar;
}
 
Example 4
Source File: ReferenceConfidenceModel.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a reference haplotype for an active region
 *
 * @param activeRegion the active region
 * @param refBases the ref bases
 * @param paddedReferenceLoc the location spanning of the refBases -- can be longer than activeRegion.getLocation()
 * @return a reference haplotype
 */
public static Haplotype createReferenceHaplotype(final AssemblyRegion activeRegion, final byte[] refBases, final SimpleInterval paddedReferenceLoc) {
    Utils.nonNull(activeRegion, "null region");
    Utils.nonNull(refBases, "null refBases");
    Utils.nonNull(paddedReferenceLoc, "null paddedReferenceLoc");

    final int alignmentStart = activeRegion.getExtendedSpan().getStart() - paddedReferenceLoc.getStart();
    if ( alignmentStart < 0 ) {
        throw new IllegalStateException("Bad alignment start in createReferenceHaplotype " + alignmentStart);
    }
    final Haplotype refHaplotype = new Haplotype(refBases, true);
    refHaplotype.setAlignmentStartHapwrtRef(alignmentStart);
    final Cigar c = new Cigar();
    c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
    refHaplotype.setCigar(c);
    return refHaplotype;
}
 
Example 5
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftCigarLeft_complex() {
	//3S69M1I18M1D9M
	Cigar cigar = new Cigar();
	
	cigar.add(new CigarElement(3, CigarOperator.S));
	cigar.add(new CigarElement(69, CigarOperator.M));
	cigar.add(new CigarElement(1, CigarOperator.I));
	cigar.add(new CigarElement(18, CigarOperator.M));
	cigar.add(new CigarElement(1, CigarOperator.D));
	cigar.add(new CigarElement(9, CigarOperator.M));
	
	Cigar newCigar;
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 1);
	assertEquals(newCigar.toString(), "3S68M1I18M1D10M");
}
 
Example 6
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftCigarLeft_multipleIndels() {
	Cigar cigar = new Cigar();
	
	cigar.add(new CigarElement(20, CigarOperator.M));
	cigar.add(new CigarElement(1, CigarOperator.I));
	cigar.add(new CigarElement(5, CigarOperator.M));
	cigar.add(new CigarElement(3, CigarOperator.D));
	cigar.add(new CigarElement(24, CigarOperator.M));
	
	Cigar newCigar;
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 20);
	Assert.assertEquals(newCigar.toString(), "1I5M3D44M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 10);
	Assert.assertEquals(newCigar.toString(), "10M1I5M3D34M");

	newCigar = indelShifter.shiftCigarLeft(cigar, 1);
	Assert.assertEquals(newCigar.toString(), "19M1I5M3D25M");
}
 
Example 7
Source File: TestUtils.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public static Cigar createCigar(int preSc, int preMatch, int nonRef, int postMatch, int postSc)
{
    if (preSc == 0 && preMatch == 0 && nonRef == 0 && postMatch == 0 && postSc == 0)
        return null;

    Cigar cigar = new Cigar();

    if (preSc > 0)
        cigar.add(new CigarElement(preSc, CigarOperator.SOFT_CLIP));

    if (preMatch > 0)
        cigar.add(new CigarElement(preMatch, CigarOperator.MATCH_OR_MISMATCH));

    if (nonRef > 0)
        cigar.add(new CigarElement(nonRef, CigarOperator.SKIPPED_REGION));

    if (postMatch > 0)
        cigar.add(new CigarElement(postMatch, CigarOperator.MATCH_OR_MISMATCH));

    if (postSc > 0)
        cigar.add(new CigarElement(postSc, CigarOperator.SOFT_CLIP));

    return cigar;
}
 
Example 8
Source File: IndelShifterTest.java    From abra2 with MIT License 5 votes vote down vote up
@Test (groups = "unit" )
public void testShiftCigarLeft_basic() {
	Cigar cigar = new Cigar();
	
	cigar.add(new CigarElement(10, CigarOperator.M));
	cigar.add(new CigarElement(3, CigarOperator.D));
	cigar.add(new CigarElement(40, CigarOperator.M));
	
	Cigar newCigar;

	newCigar = indelShifter.shiftCigarLeft(cigar, 10);
	Assert.assertEquals(newCigar.toString(), "3D50M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 9);
	Assert.assertEquals(newCigar.toString(), "1M3D49M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 8);
	Assert.assertEquals(newCigar.toString(), "2M3D48M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 4);
	Assert.assertEquals(newCigar.toString(), "6M3D44M");

	newCigar = indelShifter.shiftCigarLeft(cigar, 2);
	Assert.assertEquals(newCigar.toString(), "8M3D42M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 1);
	Assert.assertEquals(newCigar.toString(), "9M3D41M");
}
 
Example 9
Source File: IndelShifterTest.java    From abra2 with MIT License 5 votes vote down vote up
@Test (groups = "unit" )
public void testShiftCigarLeft_softClipping() {
	Cigar cigar = new Cigar();
	
	cigar.add(new CigarElement(2, CigarOperator.S));
	cigar.add(new CigarElement(6, CigarOperator.M));
	cigar.add(new CigarElement(2, CigarOperator.I));
	cigar.add(new CigarElement(30, CigarOperator.M));
	cigar.add(new CigarElement(10, CigarOperator.S));
	
	Cigar newCigar;

	newCigar = indelShifter.shiftCigarLeft(cigar, 6);
	Assert.assertEquals(newCigar.toString(), "2S2I36M10S");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 5);
	Assert.assertEquals(newCigar.toString(), "2S1M2I35M10S");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 4);
	Assert.assertEquals(newCigar.toString(), "2S2M2I34M10S");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 3);
	Assert.assertEquals(newCigar.toString(), "2S3M2I33M10S");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 2);
	Assert.assertEquals(newCigar.toString(), "2S4M2I32M10S");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 1);
	Assert.assertEquals(newCigar.toString(), "2S5M2I31M10S");
}
 
Example 10
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
/**
 * Remove leading or trailing soft clips from the input read.
 * Does not modify a read entirely comprised of soft clips.
 */
public static void removeSoftClips(SAMRecord read) {
	
	Cigar cigar = read.getCigar();
	
	CigarElement firstElement = cigar.getCigarElement(0);
	CigarElement lastElement  = cigar.getCigarElement(cigar.numCigarElements()-1);
	
	if ((firstElement.getOperator() == CigarOperator.S) ||
		(lastElement.getOperator() == CigarOperator.S)) {
	
		Cigar newCigar = new Cigar();
		
		String bases = read.getReadString();
		//String qualities = read.getBaseQualityString();
				
		if (firstElement.getOperator() == CigarOperator.S) {
			bases = bases.substring(firstElement.getLength(), bases.length());
			//qualities = qualities.substring(firstElement.getLength(), qualities.length()-1);
		} else {
			newCigar.add(firstElement);
		}
		
		for (int i=1; i<cigar.numCigarElements()-1; i++) {
			newCigar.add(cigar.getCigarElement(i));
		}
		
		if (lastElement.getOperator() == CigarOperator.S) {
			bases = bases.substring(0, bases.length() - lastElement.getLength());
			//qualities = qualities.substring(0, qualities.length() - lastElement.getLength() - 1);
		} else {
			newCigar.add(lastElement);
		}
		
		read.setCigar(newCigar);
		read.setReadString(bases);
		//read.setBaseQualityString(qualities);
	}
}
 
Example 11
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
/**
 * Replace hard clips with soft clips.
 */
public static void replaceHardClips(SAMRecord read) {
	Cigar cigar = read.getCigar();
	
	if (cigar.getCigarElements().size() > 0) {
		CigarElement firstElement = cigar.getCigarElement(0);
		CigarElement lastElement  = cigar.getCigarElement(cigar.numCigarElements()-1);
		
		if ((firstElement.getOperator() == CigarOperator.H) ||
			(lastElement.getOperator() == CigarOperator.H)) {
			
			Cigar newCigar = new Cigar();
			
			boolean isFirst = true;
			
			for (CigarElement element : cigar.getCigarElements()) {
				if (element.getOperator() != CigarOperator.H) {
					newCigar.add(element);
				} else {
					CigarElement softClip = new CigarElement(element.getLength(), CigarOperator.S);
					newCigar.add(softClip);
					
					if (isFirst) {
						read.setReadString(padBases(element.getLength()) + read.getReadString());
					} else {
						read.setReadString(read.getReadString() + padBases(element.getLength()));							
					}
				}
				
				isFirst = false;
			}
			
			read.setCigar(newCigar);
		}
	}
}
 
Example 12
Source File: BaseQualityClipReadTransformerTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "sequenceStrings")
public void testApply(final String quals_in, final String basesOut, final String qualsOut, final String cigarOut) throws Exception {
    final String basesIn = "CTCAAGTACAAGCTGATCCAGACCTACAGGGTGATGTCATTAGAGGCACTGATAACACACACACTATGGGGTGGGGGTGGACAGTTCCCCACTGCAATCC";
    final BaseQualityClipReadTransformer trans = new BaseQualityClipReadTransformer(15);
    final Cigar cigar = new Cigar();
    cigar.add(new CigarElement(basesIn.length(), CigarOperator.MATCH_OR_MISMATCH));
    final GATKRead readIn = ArtificialReadUtils.createArtificialRead(basesIn.getBytes(),SAMUtils.fastqToPhred(quals_in), cigar.toString());
    final GATKRead readOut = trans.apply(readIn);
    Assert.assertEquals(readOut.getBases(),basesOut.getBytes());
    Assert.assertEquals(readOut.getBaseQualities(),SAMUtils.fastqToPhred(qualsOut));
    Assert.assertEquals(readOut.getCigar().toString(), cigarOut);
}
 
Example 13
Source File: ReadThreadingAssemblerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
        final Haplotype refHaplotype = new Haplotype(refBases, true);
        final Cigar c = new Cigar();
        c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
        refHaplotype.setCigar(c);

        final AssemblyRegion activeRegion = new AssemblyRegion(loc, true, 0, header);
        activeRegion.addAll(reads);
//        logger.warn("Assembling " + activeRegion + " with " + engine);
        final AssemblyResultSet assemblyResultSet =  assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, null, header,
                                                                                SmithWatermanJavaAligner.getInstance());
        return assemblyResultSet.getHaplotypeList();
    }
 
Example 14
Source File: MergeBamAlignmentTest.java    From picard with MIT License 5 votes vote down vote up
private SAMRecord makeRead(final SAMFileHeader alignedHeader, final SAMRecord unmappedRec, final HitSpec hitSpec, final int hitIndex) {
    final SAMRecord rec = new SAMRecord(alignedHeader);
    rec.setReadName(unmappedRec.getReadName());
    rec.setReadBases(unmappedRec.getReadBases());
    rec.setBaseQualities(unmappedRec.getBaseQualities());
    rec.setMappingQuality(hitSpec.mapq);
    if (!hitSpec.primary) rec.setNotPrimaryAlignmentFlag(true);
    final Cigar cigar = new Cigar();
    final int readLength = rec.getReadLength();
    if (hitSpec.filtered) {
        // Add two insertions so alignment is filtered.
        cigar.add(new CigarElement(readLength-4, CigarOperator.M));
        cigar.add(new CigarElement(1, CigarOperator.I));
        cigar.add(new CigarElement(1, CigarOperator.M));
        cigar.add(new CigarElement(1, CigarOperator.I));
        cigar.add(new CigarElement(1, CigarOperator.M));
    } else {
        cigar.add(new CigarElement(readLength, CigarOperator.M));
    }
    rec.setCigar(cigar);

    rec.setReferenceName(bigSequenceName);
    rec.setAttribute(SAMTag.HI.name(), hitIndex);
    rec.setAlignmentStart(hitIndex + 1);

    return rec;
}
 
Example 15
Source File: ReadThreadingAssemblerUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
        final Haplotype refHaplotype = new Haplotype(refBases, true);
        final Cigar c = new Cigar();
        c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
        refHaplotype.setCigar(c);

        final AssemblyRegion activeRegion = new AssemblyRegion(loc, null, true, 0, header);
        activeRegion.addAll(reads);
//        logger.warn("Assembling " + activeRegion + " with " + engine);
        final AssemblyResultSet assemblyResultSet =  assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.<VariantContext>emptyList(), null, header);
        return assemblyResultSet.getHaplotypeList();
    }
 
Example 16
Source File: AnnotationUtilsTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testGetConsistentExons () {
	// need to build a new read that falls into 2 genes to check if it's inconsistent.
	AnnotationUtils u = AnnotationUtils.getInstance();
	OverlapDetector<Gene> geneOverlapDetector = getGeneOverlapDetector(BAM, GTF);
	Map<String, Gene> map = getGeneMap(geneOverlapDetector);
	// expected UTR and coding.
	SAMRecord rec = getReadByName("000000000-AMY9M:1:2104:20987:12124", BAM);
	// change the cigar so the read maps to two genes exons.
	// rec.setCigar(cigar);
	rec.setAlignmentStart(6269430);
	Cigar c = new Cigar ();
	c.add(new CigarElement(20, CigarOperator.M));
	c.add(new CigarElement(9741377, CigarOperator.N));
	c.add(new CigarElement(30, CigarOperator.M));
	rec.setCigar(c);
	Set<Gene> genes = new HashSet<>();
	genes.add(map.get("RPL22"));
	genes.add(map.get("PLEKHM2"));

	// This read spans 2 genes, so it's not consistent under the strict test.
	Set<Gene> actual = u.getConsistentExons(rec, genes, false);
	Set<Gene> expected = new HashSet<>();
	Assert.assertEquals(actual, expected);

	// under the less strict option, we allow the read to span 2 different genes.
	actual = u.getConsistentExons(rec, genes, true);
	expected = genes;
	Assert.assertEquals(actual, expected);

}
 
Example 17
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Calculate the cigar elements for this path against the reference sequence.
 *
 * This assumes that the reference and alt sequence are haplotypes derived from a de Bruijn graph or SeqGraph and have the same
 * ref source and ref sink vertices.  That is, the alt sequence start and end are assumed anchored to the reference start and end, which
 * occur at the ends of the padded assembly region.  Hence, unlike read alignment, there is no concept of a start or end coordinate here.
 * Furthermore, it is important to note that in the rare case that the alt cigar begins or ends with a deletion, we must keep the leading
 * or trailing deletion in order to maintain the original reference span of the alt haplotype.  This can occur, for example, when the ref
 * haplotype starts with N repeats of a long sequence and the alt haplotype starts with N-1 repeats.
 *
 * @param aligner
 * @param refSeq the reference sequence that all of the bases in this path should align to
 * @return a Cigar mapping this path to refSeq, or null if no reasonable alignment could be found
 */
public static Cigar calculateCigar(final byte[] refSeq, final byte[] altSeq, final SmithWatermanAligner aligner, final SWOverhangStrategy strategy) {
    Utils.nonNull(refSeq, "refSeq");
    Utils.nonNull(altSeq, "altSeq");
    if ( altSeq.length == 0 ) {
        // horrible edge case from the unit tests, where this path has no bases
        return new Cigar(Collections.singletonList(new CigarElement(refSeq.length, CigarOperator.D)));
    }

    //Note: this is a performance optimization.
    // If two strings are equal (a O(n) check) then it's trivial to get CIGAR for them.
    // Furthermore, if their lengths are equal and their element-by-element comparison yields two or fewer mismatches
    // it's also a trivial M-only CIGAR, because in order to have equal length one would need at least one insertion and
    // one deletion, in which case two substitutions is a better alignment.
    if (altSeq.length == refSeq.length){
        int mismatchCount = 0;
        for (int n = 0; n < refSeq.length && mismatchCount <= 2; n++) {
            mismatchCount += (altSeq[n] == refSeq[n] ? 0 : 1);
        }
        if (mismatchCount <= 2) {
            final Cigar matching = new Cigar();
            matching.add(new CigarElement(refSeq.length, CigarOperator.MATCH_OR_MISMATCH));
            return matching;
        }
    }

    final Cigar nonStandard;

    final String paddedRef = SW_PAD + new String(refSeq) + SW_PAD;
    final String paddedPath = SW_PAD + new String(altSeq) + SW_PAD;
    final SmithWatermanAlignment alignment = aligner.align(paddedRef.getBytes(), paddedPath.getBytes(), NEW_SW_PARAMETERS, strategy);

    if ( isSWFailure(alignment) ) {
        return null;
    }

    // cut off the padding bases
    final int baseStart = SW_PAD.length();
    final int baseEnd = paddedPath.length() - SW_PAD.length() - 1; // -1 because it's inclusive
    final CigarBuilder.Result trimmedCigarAndDeletionsRemoved = AlignmentUtils.trimCigarByBases(alignment.getCigar(), baseStart, baseEnd);

    nonStandard = trimmedCigarAndDeletionsRemoved.getCigar();

    // leading deletion removed by cigar trimming shift the alignment start to the right
    final int trimmedLeadingDeletions = trimmedCigarAndDeletionsRemoved.getLeadingDeletionBasesRemoved();

    // trailing deletions should be kept in order to left-align
    final int trimmedTrailingDeletions = trimmedCigarAndDeletionsRemoved.getTrailingDeletionBasesRemoved();

    if ( trimmedTrailingDeletions > 0  ) {
        nonStandard.add(new CigarElement(trimmedTrailingDeletions, CigarOperator.D));
    }

    final CigarBuilder.Result leftAlignmentResult = AlignmentUtils.leftAlignIndels(nonStandard, refSeq, altSeq, trimmedLeadingDeletions);

    // we must account for possible leading deletions removed when trimming the padding and when left-aligning
    // trailing deletions removed when trimming have already been restored for left-alignment, but left-alingment may have removed them again.
    final int totalLeadingDeletionsRemoved = trimmedLeadingDeletions + leftAlignmentResult.getLeadingDeletionBasesRemoved();
    final int totalTrailingDeletionsRemoved = leftAlignmentResult.getTrailingDeletionBasesRemoved();

    if (totalLeadingDeletionsRemoved == 0 && totalTrailingDeletionsRemoved == 0) {
        return leftAlignmentResult.getCigar();
    } else {
        final List<CigarElement> resultElements = new ArrayList<>();
        if (totalLeadingDeletionsRemoved > 0) {
            resultElements.add(new CigarElement(totalLeadingDeletionsRemoved, CigarOperator.D));
        }
        resultElements.addAll(leftAlignmentResult.getCigar().getCigarElements());
        if (totalTrailingDeletionsRemoved > 0) {
            resultElements.add(new CigarElement(totalTrailingDeletionsRemoved, CigarOperator.D));
        }
        return new Cigar(resultElements);
    }
}
 
Example 18
Source File: KBestHaplotypeFinderUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testLeftAlignCigarSequentially() {
    String preRefString = "GATCGATCGATC";
    String postRefString = "TTT";
    String refString = "ATCGAGGAGAGCGCCCCG";
    String indelString1 = "X";
    String indelString2 = "YZ";
    int refIndel1 = 10;
    int refIndel2 = 12;

    for ( final int indelSize1 : Arrays.asList(1, 2, 3, 4) ) {
        for ( final int indelOp1 : Arrays.asList(1, -1) ) {
            for ( final int indelSize2 : Arrays.asList(1, 2, 3, 4) ) {
                for ( final int indelOp2 : Arrays.asList(1, -1) ) {

                    Cigar expectedCigar = new Cigar();
                    expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
                    expectedCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
                    expectedCigar.add(new CigarElement((indelOp1 < 0 ? refIndel1 - indelSize1 : refIndel1), CigarOperator.M));
                    expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));
                    expectedCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
                    expectedCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2 - indelSize2) * 2 : refIndel2 * 2), CigarOperator.M));
                    expectedCigar.add(new CigarElement(refString.length(), CigarOperator.M));

                    Cigar givenCigar = new Cigar();
                    givenCigar.add(new CigarElement(refString.length() + refIndel1/2, CigarOperator.M));
                    givenCigar.add(new CigarElement(indelSize1, (indelOp1 > 0 ? CigarOperator.I : CigarOperator.D)));
                    givenCigar.add(new CigarElement((indelOp1 < 0 ? (refIndel1/2 - indelSize1) : refIndel1/2) + refString.length() + refIndel2/2 * 2, CigarOperator.M));
                    givenCigar.add(new CigarElement(indelSize2 * 2, (indelOp2 > 0 ? CigarOperator.I : CigarOperator.D)));
                    givenCigar.add(new CigarElement((indelOp2 < 0 ? (refIndel2/2 - indelSize2) * 2 : refIndel2/2 * 2) + refString.length(), CigarOperator.M));

                    String theRef = preRefString + refString + Strings.repeat(indelString1, refIndel1) + refString + Strings.repeat(indelString2, refIndel2) + refString + postRefString;
                    String theRead = refString + Strings.repeat(indelString1, refIndel1 + indelOp1 * indelSize1) + refString + Strings.repeat(indelString2, refIndel2 + indelOp2 * indelSize2) + refString;

                    Cigar calculatedCigar = CigarUtils.leftAlignCigarSequentially(AlignmentUtils.consolidateCigar(givenCigar), theRef.getBytes(), theRead.getBytes(), preRefString.length(), 0);
                    Assert.assertEquals(AlignmentUtils.consolidateCigar(calculatedCigar).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar strings do not match!");
                }
            }
        }
    }
}
 
Example 19
Source File: KBestHaplotypeFinderUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test(dataProvider = "BasicBubbleDataProvider")
public void testBasicBubbleData(final int refBubbleLength, final int altBubbleLength) {
    // Construct the assembly graph
    SeqGraph graph = new SeqGraph(3);
    final String preRef = "ATGG";
    final String postRef = "GGGGC";

    SeqVertex v = new SeqVertex(preRef);
    SeqVertex v2Ref = new SeqVertex(Strings.repeat("A", refBubbleLength));
    SeqVertex v2Alt = new SeqVertex(Strings.repeat("A", altBubbleLength-1) + "T");
    SeqVertex v3 = new SeqVertex(postRef);

    graph.addVertex(v);
    graph.addVertex(v2Ref);
    graph.addVertex(v2Alt);
    graph.addVertex(v3);
    graph.addEdge(v, v2Ref, new BaseEdge(true, 10));
    graph.addEdge(v2Ref, v3, new BaseEdge(true, 10));
    graph.addEdge(v, v2Alt, new BaseEdge(false, 5));
    graph.addEdge(v2Alt, v3, new BaseEdge(false, 5));

    // Construct the test path
    Path<SeqVertex,BaseEdge> path = new Path<>(v, graph);
    path = new Path<>(path, graph.getEdge(v, v2Alt));
    path = new Path<>(path, graph.getEdge(v2Alt, v3));

    // Construct the actual cigar string implied by the test path
    Cigar expectedCigar = new Cigar();
    expectedCigar.add(new CigarElement(preRef.length(), CigarOperator.M));
    if( refBubbleLength > altBubbleLength ) {
        expectedCigar.add(new CigarElement(refBubbleLength - altBubbleLength, CigarOperator.D));
        expectedCigar.add(new CigarElement(altBubbleLength, CigarOperator.M));
    } else if ( refBubbleLength < altBubbleLength ) {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
        expectedCigar.add(new CigarElement(altBubbleLength - refBubbleLength, CigarOperator.I));
    } else {
        expectedCigar.add(new CigarElement(refBubbleLength, CigarOperator.M));
    }
    expectedCigar.add(new CigarElement(postRef.length(), CigarOperator.M));

    final String ref = preRef + v2Ref.getSequenceString() + postRef;
    Assert.assertEquals(path.calculateCigar(ref.getBytes()).toString(), AlignmentUtils.consolidateCigar(expectedCigar).toString(), "Cigar string mismatch");
}
 
Example 20
Source File: ReadCountsTest.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
@Test
public void testReadRegionTypes()
{
    String REF_BASE_STR_1 = "ABCDEFGHIJKLMNOPQRST"; // currently unused

    // single read and exon
    RegionReadData region = new RegionReadData("1", 100, 200);

    // read covers part of the exon
    ReadRecord read = createReadRecord(1, "1", 110, 130, REF_BASE_STR_1, createCigar(0, 21, 0));

    assertEquals(1, read.getMappedRegionCoords().size());
    assertEquals(110, read.getMappedRegionCoords().get(0)[SE_START]);
    assertEquals(130, read.getMappedRegionCoords().get(0)[SE_END]);

    // test classification of reads
    assertEquals(WITHIN_EXON, read.getRegionMatchType(region));

    read = createReadRecord(1, "1", 90, 110, REF_BASE_STR_1, createCigar(0, 21, 0));
    assertEquals(EXON_INTRON, read.getRegionMatchType(region));

    read = createReadRecord(1, "1", 100, 200, REF_BASE_STR_1, createCigar(0, 101, 0));
    assertEquals(EXON_MATCH, read.getRegionMatchType(region));

    read = createReadRecord(1, "1", 100, 150, REF_BASE_STR_1, createCigar(0, 51, 0));
    assertEquals(EXON_BOUNDARY, read.getRegionMatchType(region));

    // read covering multiple exons
    Cigar cigar = new Cigar();
    cigar.add(new CigarElement(11, CigarOperator.M)); // matches half of first exon
    cigar.add(new CigarElement(19, CigarOperator.N));
    cigar.add(new CigarElement(21, CigarOperator.M)); // matches all of second exon
    cigar.add(new CigarElement(19, CigarOperator.N));
    cigar.add(new CigarElement(25, CigarOperator.M)); // matches past last exon into intron

    read = createReadRecord(1, "1", 110, 204, REF_BASE_STR_1, cigar);

    assertEquals(3, read.getMappedRegionCoords().size());
    assertEquals(110, read.getMappedRegionCoords().get(0)[SE_START]);
    assertEquals(120, read.getMappedRegionCoords().get(0)[SE_END]);
    assertEquals(140, read.getMappedRegionCoords().get(1)[SE_START]);
    assertEquals(160, read.getMappedRegionCoords().get(1)[SE_END]);
    assertEquals(180, read.getMappedRegionCoords().get(2)[SE_START]);
    assertEquals(204, read.getMappedRegionCoords().get(2)[SE_END]);

    RegionReadData region1 = new RegionReadData("1", 100, 120);
    RegionReadData region2 = new RegionReadData("1", 140, 160);
    RegionReadData region3 = new RegionReadData("1", 180, 200);

    assertEquals(EXON_BOUNDARY, read.getRegionMatchType(region1));
    assertEquals(EXON_MATCH, read.getRegionMatchType(region2));
    assertEquals(EXON_INTRON, read.getRegionMatchType(region3));
}