htsjdk.samtools.Cigar Java Examples

The following examples show how to use htsjdk.samtools.Cigar. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftCigarLeft_complex() {
	//3S69M1I18M1D9M
	Cigar cigar = new Cigar();
	
	cigar.add(new CigarElement(3, CigarOperator.S));
	cigar.add(new CigarElement(69, CigarOperator.M));
	cigar.add(new CigarElement(1, CigarOperator.I));
	cigar.add(new CigarElement(18, CigarOperator.M));
	cigar.add(new CigarElement(1, CigarOperator.D));
	cigar.add(new CigarElement(9, CigarOperator.M));
	
	Cigar newCigar;
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 1);
	assertEquals(newCigar.toString(), "3S68M1I18M1D10M");
}
 
Example #2
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testClipCountsByHand() {
    // int[] is leftHard, leftSoft, rightHard, rightSoft
    final List<Pair<Cigar, int[]>> tests = Arrays.asList(
            Pair.of("13H3M35D13M2I45S30H", new int[] {13, 0, 30, 45}),
            Pair.of("1H3S67M13S", new int[] {1, 3, 0, 13}),
            Pair.of("13M30S10S1H", new int[] {0, 0, 1, 40}),
            Pair.of("113S4M", new int[] {0, 113, 0, 0}),
            Pair.of("5H3H10M2S1S", new int[] {8, 0, 0, 3}),
            Pair.of("10M", new int[] {0, 0, 0, 0}),
            Pair.of("1H2H3S4S10M5S6S7H8H", new int[] {3, 7, 15, 11})
    )
            .stream().map(pair -> Pair.of(TextCigarCodec.decode(pair.getLeft()), pair.getRight())).collect(Collectors.toList());

    for (final Pair<Cigar, int[]> test : tests) {
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[0]);
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[1]);
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[2]);
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[3]);
    }
}
 
Example #3
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(groups = "sv")
public void testGappedAlignmentBreaker_OneDeletion() {
    final Cigar cigar = TextCigarCodec.decode("2S205M2D269M77S");
    final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 575),
            3, 476, cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);

    final List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval,
            1, cigar.getReadLength())).collect(Collectors.toList());
    Assert.assertEquals(generatedARList.size(), 2);
    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(new SimpleInterval("1", 100, 304),
            3, 207, TextCigarCodec.decode("2S205M346S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(new SimpleInterval("1", 307, 575),
            208, 476, TextCigarCodec.decode("207S269M77S"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
}
 
Example #4
Source File: TestUtils.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public static ReadRecord[] createSupplementaryReadPair(final int id, final GeneCollection gc1, final GeneCollection gc2,
        int posStart1, int posEnd1, int posStart2, int posEnd2, final Cigar cigar1, final Cigar cigar2, boolean firstInPair)
{
    int readBaseLength = cigar1.getCigarElements().stream()
            .filter(x -> x.getOperator() != N && x.getOperator() != D)
            .mapToInt(x -> x.getLength()).sum();

    String readBases = generateRandomBases(readBaseLength);

    ReadRecord read1 = createMappedRead(id, gc1, posStart1, posEnd1, cigar1, readBases);
    ReadRecord read2 = createMappedRead(id, gc2, posStart2, posEnd2, cigar2, readBases);
    read1.setFlag(FIRST_OF_PAIR, firstInPair);
    read2.setFlag(SECOND_OF_PAIR, !firstInPair);
    read1.setSuppAlignment(String.format("%s;%d;%s", read2.Chromosome, read2.PosStart, read2.Cigar.toString()));
    read2.setSuppAlignment(String.format("%s;%d;%s", read1.Chromosome, read1.PosStart, read1.Cigar.toString()));

    return new ReadRecord[] { read1, read2 };
}
 
Example #5
Source File: ReferenceConfidenceModel.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a reference haplotype for an active region
 *
 * @param activeRegion the active region
 * @param refBases the ref bases
 * @param paddedReferenceLoc the location spanning of the refBases -- can be longer than activeRegion.getLocation()
 * @return a reference haplotype
 */
public static Haplotype createReferenceHaplotype(final AssemblyRegion activeRegion, final byte[] refBases, final SimpleInterval paddedReferenceLoc) {
    Utils.nonNull(activeRegion, "null region");
    Utils.nonNull(refBases, "null refBases");
    Utils.nonNull(paddedReferenceLoc, "null paddedReferenceLoc");

    final int alignmentStart = activeRegion.getExtendedSpan().getStart() - paddedReferenceLoc.getStart();
    if ( alignmentStart < 0 ) {
        throw new IllegalStateException("Bad alignment start in createReferenceHaplotype " + alignmentStart);
    }
    final Haplotype refHaplotype = new Haplotype(refBases, true);
    refHaplotype.setAlignmentStartHapwrtRef(alignmentStart);
    final Cigar c = new Cigar();
    c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
    refHaplotype.setCigar(c);
    return refHaplotype;
}
 
Example #6
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftIndelsLeft() throws Exception {
	
	CompareToReference2 c2r = new CompareToReference2();
	c2r.init("test-data/test.fa");
	/*
	TCGAATCGATATATTTCCGGAACAGACTCAG
	------CGATAT--TTCCGGAA--------- <-- orig
	------CG--ATATTTCCGGAA--------- <-- new
	1234567890123456789012
	*/
	
	int refStart = 7;
	int refEnd = 22;
	Cigar cigar = TextCigarCodec.decode("6M2D8M");
	String seq = "CGATATTTCCGGAA";
	
	// 1 based input
	Cigar newCigar = indelShifter.shiftIndelsLeft(refStart, refEnd, "seq1", cigar, seq, c2r);
	assertEquals(TextCigarCodec.encode(newCigar), "2M2D12M");
}
 
Example #7
Source File: Read.java    From cramtools with Apache License 2.0 6 votes vote down vote up
SAMRecord firstSAMRecord(SAMFileHeader header) {
	SAMRecord r = new SAMRecord(header);
	r.setReadName(evidenceRecord.getReadName());
	r.setReferenceName(evidenceRecord.Chromosome);
	r.setAlignmentStart(Integer.valueOf(evidenceRecord.OffsetInReference) + 1);
	r.setMappingQuality(Integer.valueOf(evidenceRecord.ScoreAllele0));
	r.setReadPairedFlag(true);
	r.setReadUnmappedFlag(false);
	r.setReadNegativeStrandFlag(negative);
	r.setFirstOfPairFlag(evidenceRecord.side == 0);
	r.setSecondOfPairFlag(!r.getFirstOfPairFlag());

	r.setCigar(new Cigar(Utils.toCigarOperatorList(firstHalf.samCigarElements)));

	r.setReadBases(Utils.toByteArray(firstHalf.readBasesBuf));
	r.setBaseQualities(Utils.toByteArray(firstHalf.readScoresBuf));

	r.setAttribute("GC", Utils.toString(firstHalf.gcList));
	r.setAttribute("GS", Utils.toString(firstHalf.gsBuf));
	r.setAttribute("GQ", SAMUtils.phredToFastq(Utils.toByteArray(firstHalf.gqBuf)));

	return r;
}
 
Example #8
Source File: IndelShifter.java    From abra2 with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
	CompareToReference2 c2r = new CompareToReference2();
	c2r.init("/home/lmose/dev/reference/hg19/chr7.fa");
	
	String seq = "CGCTGGCGGCCTTCCTGCCCGACCTGTCCCTGGCCAAGAGGAAGACCTGGCGGAACCCTTGGCGGCGTTCCTACCACAAGCGCAAGAAGGCCGAGTGGGAGGTGGACCCTGACAACTGCGAGGAGGTGAAGCAGACACCGCCCTACGACAGCAGCCACCGCATCCTGGACGTCATGGACATGACGATCTTCGACTTCCTCATGGGAAACATGGACCGTCACCACTACGAGACTTGTGAGAAGTTTGGGAATGAAACGTTCATCATCCACTTAGACAATGGAAGAGGGATCCGGA";
	IndelShifter is = new IndelShifter();
	Cigar orig = TextCigarCodec.decode("7M88I1M10D109M241D82M60D7M");
	
	Cigar cigar = is.shiftAllIndelsLeft(296604, 297121, "chr7", orig, 
			seq, 
			c2r);
	
	System.out.println("old: " + orig + ", " + orig.getReadLength());
	System.out.println("new: " + cigar + ", " + cigar.getReadLength());
	//296604, 297121,chr7, 7M88I1M10D109M241D82M60D7M, CGCTGGCGGCCTTCCTGCCCGACCTGTCCCTGGCCAAGAGGAAGACCTGGCGGAACCCTTGGCGGCGTTCCTACCACAAGCGCAAGAAGGCCGAGTGGGAGGTGGACCCTGACAACTGCGAGGAGGTGAAGCAGACACCGCCCTACGACAGCAGCCACCGCATCCTGGACGTCATGGACATGACGATCTTCGACTTCCTCATGGGAAACATGGACCGTCACCACTACGAGACTTGTGAGAAGTTTGGGAATGAAACGTTCATCATCCACTTAGACAATGGAAGAGGGATCCGGA
}
 
Example #9
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(groups = "sv")
public void testGappedAlignmentBreaker_HardAndSoftClip() {

    final Cigar cigar = TextCigarCodec.decode("1H2S3M5I10M20D6M7S8H");
    final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 138),
            4, 27, cigar, true, 60, 0,
            100, ContigAlignmentsModifier.AlnModType.NONE);

    final List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval,
            1, cigar.getReadLength()+1+8)).collect(Collectors.toList());

    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(new SimpleInterval("1", 100, 102),
            4, 6, TextCigarCodec.decode("1H2S3M28S8H"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(new SimpleInterval("1", 103, 112),
            12, 21, TextCigarCodec.decode("1H10S10M13S8H"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(2), new AlignmentInterval(new SimpleInterval("1", 133, 138),
            22, 27, TextCigarCodec.decode("1H20S6M7S8H"), true,
            60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
}
 
Example #10
Source File: SAMRecordUtils.java    From abra2 with MIT License 6 votes vote down vote up
public static String getLeadingClips(SAMRecord read) {
	List<CigarElement> elems = read.getCigar().getCigarElements();
	
	List<CigarElement> leading = new ArrayList<CigarElement>();
	
	for (CigarElement elem : elems) {
		if (isClip(elem)) {
			leading.add(elem);
		} else {
			break;
		}
	}
	
	String ret = "";
	if (leading.size() > 0) {
		Cigar cigar = new Cigar(leading);
		ret = TextCigarCodec.encode(cigar);
	}
	
	return ret;
}
 
Example #11
Source File: SAMRecordUtils.java    From abra2 with MIT License 6 votes vote down vote up
public static String getTrailingClips(SAMRecord read) {
	List<CigarElement> elems = read.getCigar().getCigarElements();
	
	List<CigarElement> trailing = new ArrayList<CigarElement>();
	boolean isNonClippedReached = false;
	
	for (CigarElement elem : elems) {
		if (isClip(elem)) {
			if (isNonClippedReached) {
				trailing.add(elem);
			}
		} else {
			isNonClippedReached = true;
		}
	}

	String ret = "";
	if (trailing.size() > 0) {
		Cigar cigar = new Cigar(trailing);
		ret =  TextCigarCodec.encode(cigar);
	}
	
	return ret;
}
 
Example #12
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Convert the 'I' CigarElement, if it is at either end (terminal) of the input cigar, to a corresponding 'S' operator.
 * Note that we allow CIGAR of the format '10H10S10I10M', but disallows the format if after the conversion the cigar turns into a giant clip,
 * e.g. '10H10S10I10S10H' is not allowed (if allowed, it becomes a giant clip of '10H30S10H' which is non-sense).
 *
 * @return a pair of number of clipped (hard and soft, including the ones from the converted terminal 'I') bases at the front and back of the
 *         input {@code cigarAlongInput5to3Direction}.
 *
 * @throws IllegalArgumentException when the checks as described above fail.
 */
public static Cigar convertTerminalInsertionToSoftClip(final Cigar cigar) {

    if (cigar.numCigarElements() < 2 ) {
        return cigar;
    }

    final CigarBuilder builder = new CigarBuilder();
    for (int n = 0; n < cigar.numCigarElements(); n++) {
        final CigarElement element = cigar.getCigarElement(n);
        if (element.getOperator() != CigarOperator.INSERTION) { // not an insertion
            builder.add(element);
        } else if (n == 0 || n == cigar.numCigarElements() - 1) {   // terminal insertion with no clipping -- convert to soft clip
            builder.add(new CigarElement(element.getLength(), CigarOperator.SOFT_CLIP));
        } else if (cigar.getCigarElement(n-1).getOperator().isClipping() || cigar.getCigarElement(n+1).getOperator().isClipping()) {    // insertion preceding or following clip
            builder.add(new CigarElement(element.getLength(), CigarOperator.SOFT_CLIP));
        } else {    // interior insertion
            builder.add(element);
        }
    }

    return builder.make();
}
 
Example #13
Source File: NDNCigarReadTransformer.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Refactor cigar strings that contain N-D-N elements to one N element (with total length of the three refactored elements).
 */
@VisibleForTesting
protected static Cigar refactorNDNtoN(final Cigar originalCigar) {
    final Cigar refactoredCigar = new Cigar();
    final int cigarLength = originalCigar.numCigarElements();
    for(int i = 0; i < cigarLength; i++){
        final CigarElement element = originalCigar.getCigarElement(i);
        if(element.getOperator() == CigarOperator.N && thereAreAtLeast2MoreElements(i,cigarLength)){
            final CigarElement nextElement = originalCigar.getCigarElement(i+1);
            final CigarElement nextNextElement = originalCigar.getCigarElement(i+2);

            // if it is N-D-N replace with N (with the total length) otherwise just add the first N.
            if(nextElement.getOperator() == CigarOperator.D && nextNextElement.getOperator() == CigarOperator.N){
                final int threeElementsLength = element.getLength() + nextElement.getLength() + nextNextElement.getLength();
                final CigarElement refactoredElement = new CigarElement(threeElementsLength,CigarOperator.N);
                refactoredCigar.add(refactoredElement);
                i += 2; //skip the elements that were refactored
            } else {
                refactoredCigar.add(element);  // add only the first N
            }
        } else {
            refactoredCigar.add(element);  // add any non-N element
        }
    }
    return refactoredCigar;
}
 
Example #14
Source File: ReadClipperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testHardClipByReferenceCoordinates() {
    for (final Cigar cigar : cigarList) {
        final GATKRead read = ReadClipperTestUtils.makeReadFromCigar(cigar);
        final int start = getSoftStart(read);
        final int stop = getSoftEnd(read);

        for (int i = start; i <= stop; i++) {
            final GATKRead clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i);
            if (!clipLeft.isEmpty()) {
                Assert.assertTrue(clipLeft.getStart() >= Math.min(read.getEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getStart(), i + 1, read.getCigar().toString(), clipLeft.getCigar().toString()));
                assertRefAlignmentConsistent(clipLeft);
                assertReadLengthConsistent(clipLeft);
            }

            final GATKRead clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1);
            if (!clipRight.isEmpty() && clipRight.getStart() <= clipRight.getEnd()) {             // alnStart > alnEnd if the entire read is a soft clip now. We can't test those.
                Assert.assertTrue(clipRight.getEnd() <= Math.max(read.getStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getEnd(), i - 1, read.getCigar().toString(), clipRight.getCigar().toString()));
                assertRefAlignmentConsistent(clipRight);
                assertReadLengthConsistent(clipRight);
            }
        }
    }
}
 
Example #15
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftCigarLeft_insertAtTail() {
	Cigar cigar = new Cigar();
	
	cigar.add(new CigarElement(40, CigarOperator.M));
	cigar.add(new CigarElement(10, CigarOperator.I));
	
	Cigar newCigar;

	newCigar = indelShifter.shiftCigarLeft(cigar, 40);
	Assert.assertEquals(newCigar.toString(), "10I40M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 39);
	Assert.assertEquals(newCigar.toString(), "1M10I39M");

	newCigar = indelShifter.shiftCigarLeft(cigar, 30);
	Assert.assertEquals(newCigar.toString(), "10M10I30M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 1);
	Assert.assertEquals(newCigar.toString(), "39M10I1M");
}
 
Example #16
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
/**
 * Replace hard clips with soft clips.
 */
public static void replaceHardClips(SAMRecord read) {
	Cigar cigar = read.getCigar();
	
	if (cigar.getCigarElements().size() > 0) {
		CigarElement firstElement = cigar.getCigarElement(0);
		CigarElement lastElement  = cigar.getCigarElement(cigar.numCigarElements()-1);
		
		if ((firstElement.getOperator() == CigarOperator.H) ||
			(lastElement.getOperator() == CigarOperator.H)) {
			
			Cigar newCigar = new Cigar();
			
			boolean isFirst = true;
			
			for (CigarElement element : cigar.getCigarElements()) {
				if (element.getOperator() != CigarOperator.H) {
					newCigar.add(element);
				} else {
					CigarElement softClip = new CigarElement(element.getLength(), CigarOperator.S);
					newCigar.add(softClip);
					
					if (isFirst) {
						read.setReadString(padBases(element.getLength()) + read.getReadString());
					} else {
						read.setReadString(read.getReadString() + padBases(element.getLength()));							
					}
				}
				
				isFirst = false;
			}
			
			read.setCigar(newCigar);
		}
	}
}
 
Example #17
Source File: GraphBasedKBestHaplotypeFinderUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(enabled = true)
public void testLeftAlignCigarSequentiallyAdjacentID() {
    final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT";
    final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT";
    final Cigar originalCigar = TextCigarCodec.decode("18M4I12M4D2M");

    final Cigar result = AlignmentUtils.leftAlignIndels(originalCigar, ref.getBytes(), hap.getBytes(), 0).getCigar();
    logger.warn("Result is " + result);
    Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different");
}
 
Example #18
Source File: BaseQualityClipReadTransformerTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "sequenceStrings")
public void testApply(final String quals_in, final String basesOut, final String qualsOut, final String cigarOut) throws Exception {
    final String basesIn = "CTCAAGTACAAGCTGATCCAGACCTACAGGGTGATGTCATTAGAGGCACTGATAACACACACACTATGGGGTGGGGGTGGACAGTTCCCCACTGCAATCC";
    final BaseQualityClipReadTransformer trans = new BaseQualityClipReadTransformer(15);
    final Cigar cigar = new Cigar();
    cigar.add(new CigarElement(basesIn.length(), CigarOperator.MATCH_OR_MISMATCH));
    final GATKRead readIn = ArtificialReadUtils.createArtificialRead(basesIn.getBytes(),SAMUtils.fastqToPhred(quals_in), cigar.toString());
    final GATKRead readOut = trans.apply(readIn);
    Assert.assertEquals(readOut.getBases(),basesOut.getBytes());
    Assert.assertEquals(readOut.getBaseQualities(),SAMUtils.fastqToPhred(qualsOut));
    Assert.assertEquals(readOut.getCigar().toString(), cigarOut);
}
 
Example #19
Source File: ReadThreadingAssemblerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
        final Haplotype refHaplotype = new Haplotype(refBases, true);
        final Cigar c = new Cigar();
        c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
        refHaplotype.setCigar(c);

        final AssemblyRegion activeRegion = new AssemblyRegion(loc, true, 0, header);
        activeRegion.addAll(reads);
//        logger.warn("Assembling " + activeRegion + " with " + engine);
        final AssemblyResultSet assemblyResultSet =  assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, null, header,
                                                                                SmithWatermanJavaAligner.getInstance());
        return assemblyResultSet.getHaplotypeList();
    }
 
Example #20
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "randomValidCigars")
public void testReadLength(final Cigar cigar) {
    final int actual = CigarUtils.countUnclippedReadBases(cigar);
    final int expected = cigar.getCigarElements().stream()
            .filter(ce -> ce.getOperator().consumesReadBases() || ce.getOperator().isClipping())
            .mapToInt(CigarElement::getLength).sum();
    Assert.assertEquals(actual, expected);
}
 
Example #21
Source File: AlignmentUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Workhorse for trimCigarByBases and trimCigarByReference
 *
 * @param cigar a non-null Cigar to trim down
 * @param start Where should we start keeping bases in the cigar (inclusive)?  The first position is 0
 * @param end Where should we stop keeping bases in the cigar (inclusive)?  The maximum value is cigar.getLength() - 1
 * @param byReference should start and end be interpreted as position in the reference or the read to trim to/from?
 * @return a non-null cigar
 */
@SuppressWarnings("fallthrough")
private static CigarBuilder.Result trimCigar(final Cigar cigar, final int start, final int end, final boolean byReference) {
    ParamUtils.isPositiveOrZero(start, "start position can't be negative");
    Utils.validateArg(end >= start, () -> "end " + end + " is before start " + start);
    final CigarBuilder newElements = new CigarBuilder();

    // these variables track the inclusive start and exclusive end of the current cigar element in reference (if byReference) or read (otherwise) coordinates
    int elementStart;           // inclusive
    int elementEnd = 0;         // exclusive -- start of next element
    for ( final CigarElement elt : cigar.getCigarElements() ) {
        elementStart = elementEnd;
        elementEnd = elementStart + (byReference ? lengthOnReference(elt) : lengthOnRead(elt));

        // we are careful to include zero-length elements at both ends, that is, elements with elementStart == elementEnd == start and elementStart == elementEnd == end + 1
        if (elementEnd < start || (elementEnd == start && elementStart < start)) {
            continue;
        } else if (elementStart > end && elementEnd > end + 1) {
            break;
        }

        final int overlapLength = elementEnd == elementStart ? elt.getLength() : Math.min(end + 1, elementEnd) - Math.max(start, elementStart);
        newElements.add(new CigarElement(overlapLength, elt.getOperator()));
    }
    Utils.validateArg(elementEnd > end, () -> "cigar elements don't reach end position (inclusive) " + end);

    return newElements.makeAndRecordDeletionsRemovedResult();
}
 
Example #22
Source File: SWNativeAlignerWrapper.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public SmithWatermanAlignment align(final byte[] reference, final byte[] alternate, final SWParameters parameters, final SWOverhangStrategy overhangStrategy){
    long startTime = System.nanoTime();

    Utils.nonNull(parameters);
    Utils.nonNull(overhangStrategy);

    // avoid running full Smith-Waterman if there is an exact match of alternate in reference
    int matchIndex = -1;
    if (overhangStrategy == SWOverhangStrategy.SOFTCLIP || overhangStrategy == SWOverhangStrategy.IGNORE) {
        // Use a substring search to find an exact match of the alternate in the reference
        // NOTE: This approach only works for SOFTCLIP and IGNORE overhang strategies
        matchIndex = Utils.lastIndexOf(reference, alternate);
    }

    final SmithWatermanAlignment alignmentResult;

    if (matchIndex != -1) {
        // generate the alignment result when the substring search was successful
        alignmentResult = new SWNativeResultWrapper(new Cigar(Collections.singletonList(new CigarElement(alternate.length, CigarOperator.M))), matchIndex);
    }
    else {
        // run full Smith-Waterman
        final SWNativeAlignerResult alignment = aligner.align(reference, alternate,parameters,overhangStrategy);
        alignmentResult =  new SWNativeResultWrapper(alignment);
    }

    totalComputeTime += System.nanoTime() - startTime;
    return alignmentResult;
}
 
Example #23
Source File: MergeBamAlignmentTest.java    From picard with MIT License 5 votes vote down vote up
private SAMRecord makeRead(final SAMFileHeader alignedHeader, final SAMRecord unmappedRec, final HitSpec hitSpec, final int hitIndex) {
    final SAMRecord rec = new SAMRecord(alignedHeader);
    rec.setReadName(unmappedRec.getReadName());
    rec.setReadBases(unmappedRec.getReadBases());
    rec.setBaseQualities(unmappedRec.getBaseQualities());
    rec.setMappingQuality(hitSpec.mapq);
    if (!hitSpec.primary) rec.setNotPrimaryAlignmentFlag(true);
    final Cigar cigar = new Cigar();
    final int readLength = rec.getReadLength();
    if (hitSpec.filtered) {
        // Add two insertions so alignment is filtered.
        cigar.add(new CigarElement(readLength-4, CigarOperator.M));
        cigar.add(new CigarElement(1, CigarOperator.I));
        cigar.add(new CigarElement(1, CigarOperator.M));
        cigar.add(new CigarElement(1, CigarOperator.I));
        cigar.add(new CigarElement(1, CigarOperator.M));
    } else {
        cigar.add(new CigarElement(readLength, CigarOperator.M));
    }
    rec.setCigar(cigar);

    rec.setReferenceName(bigSequenceName);
    rec.setAttribute(SAMTag.HI.name(), hitIndex);
    rec.setAlignmentStart(hitIndex + 1);

    return rec;
}
 
Example #24
Source File: IndelShifter.java    From abra2 with MIT License 5 votes vote down vote up
private int firstIndelOffset(Cigar cigar) {
	int pos = 0;
	
	for (CigarElement elem : cigar.getCigarElements()) {
		if ((elem.getOperator() == CigarOperator.D) || (elem.getOperator() == CigarOperator.I)) {
			return pos;
		} else if (elem.getOperator() != CigarOperator.S) {
			pos += elem.getLength();
		}
	}
	
	throw new IllegalArgumentException("No indel for record: [" + cigar + "]");
}
 
Example #25
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Inverts the order of the operators in the cigar.
 * Eg 10M1D20M -> 20M1D10M
 */
public static Cigar invertCigar (final Cigar cigar) {
    Utils.nonNull(cigar);
    final List<CigarElement>  els = new ArrayList<>(cigar.getCigarElements());
    Collections.reverse(els);
    return new Cigar(els);
}
 
Example #26
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static Iterable<AlignmentInterval> willThrowOnInvalidCigar(final Cigar cigar, final int readStart) throws GATKException {
    final AlignmentInterval detailsDoesnotMatter = new AlignmentInterval(
            new SimpleInterval("1", 1, cigar.getReferenceLength()),
            readStart, readStart+cigar.getReadLength() - CigarUtils.countClippedBases(cigar, CigarOperator.SOFT_CLIP), cigar,
            true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    return ContigAlignmentsModifier.splitGappedAlignment(detailsDoesnotMatter, 1, cigar.getReadLength() + CigarUtils.countClippedBases(cigar, CigarOperator.HARD_CLIP));
}
 
Example #27
Source File: GraphBasedKBestHaplotypeFinderUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "SystematicRefAltSWTestData")
public void testRefAltSW(final String prefix, final String end, final String refMid, final String altMid, final String midCigar) {
    // Construct the assembly graph
    SeqGraph graph = new SeqGraph(11);

    final int padSize = 0;
    SeqVertex top = new SeqVertex(Strings.repeat("N", padSize));
    SeqVertex ref = new SeqVertex(prefix + refMid + end);
    SeqVertex alt = new SeqVertex(prefix + altMid + end);
    SeqVertex bot = new SeqVertex(Strings.repeat("N", padSize));

    graph.addVertices(top, ref, alt, bot);
    graph.addEdges(() -> new BaseEdge(true, 1), top, ref, bot);
    graph.addEdges(() -> new BaseEdge(false, 1), top, alt, bot);

    // Construct the test path
    Path<SeqVertex,BaseEdge> path = makePath(Arrays.asList(top, alt, bot), graph);

    final CigarBuilder expected = new CigarBuilder();
    expected.add(new CigarElement(padSize, CigarOperator.M));
    if ( ! prefix.equals("") ) {
        expected.add(new CigarElement(prefix.length(), CigarOperator.M));
    }
    expected.addAll(TextCigarCodec.decode(midCigar));
    if ( ! end.equals("") ) {
        expected.add(new CigarElement(end.length(), CigarOperator.M));
    }
    expected.add(new CigarElement(padSize, CigarOperator.M));

    final String refString = top.getSequenceString() + ref.getSequenceString() + bot.getSequenceString();
    final Cigar pathCigar = path.calculateCigar(refString.getBytes(), SmithWatermanJavaAligner.getInstance());

    logger.warn("diffs: " + ref + " vs. " + alt + " cigar " + midCigar);
    logger.warn("Path " + path + " with cigar " + pathCigar);
    Assert.assertEquals(new CigarBuilder().addAll(pathCigar).make(true), expected.make(true), "Cigar mismatch: ref = " + refString + " vs alt = " + new String(path.getBases()));
}
 
Example #28
Source File: HaplotypeUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testSimpleDeletionAllele() {
    final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";

    final ArrayList<CigarElement> h1CigarList = new ArrayList<>();
    h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
    final Cigar h1Cigar = new Cigar(h1CigarList);
    String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA";
    basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases);
    h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA";
    basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases);
    h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA";
    basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases);
}
 
Example #29
Source File: SAMRecordUtilsTest.java    From abra2 with MIT License 5 votes vote down vote up
@Test (groups = "unit")
public void testGetLeadingClips() {
	SAMRecord read = new SAMRecord(null);
	Cigar cigar = getCigar("10S15M8S");
	read.setCigar(cigar);
	String leading = SAMRecordUtils.getLeadingClips(read);
	Assert.assertEquals(leading, "10S");
}
 
Example #30
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "testData_unclipCigar")
public void testUnclipCigar(final String cigarStrIn, final String expectedCigarStrOut){
    final Cigar cigarIn = TextCigarCodec.decode(cigarStrIn);
    final Cigar cigarOut = CigarUtils.removeClipsAndPadding(cigarIn);
    final String actualCigarStrOut = TextCigarCodec.encode(cigarOut);
    Assert.assertEquals(actualCigarStrOut, expectedCigarStrOut);
}