htsjdk.samtools.Cigar Java Examples
The following examples show how to use
htsjdk.samtools.Cigar.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IndelShifterTest.java From abra2 with MIT License | 6 votes |
@Test (groups = "unit" ) public void testShiftCigarLeft_complex() { //3S69M1I18M1D9M Cigar cigar = new Cigar(); cigar.add(new CigarElement(3, CigarOperator.S)); cigar.add(new CigarElement(69, CigarOperator.M)); cigar.add(new CigarElement(1, CigarOperator.I)); cigar.add(new CigarElement(18, CigarOperator.M)); cigar.add(new CigarElement(1, CigarOperator.D)); cigar.add(new CigarElement(9, CigarOperator.M)); Cigar newCigar; newCigar = indelShifter.shiftCigarLeft(cigar, 1); assertEquals(newCigar.toString(), "3S68M1I18M1D10M"); }
Example #2
Source File: CigarUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testClipCountsByHand() { // int[] is leftHard, leftSoft, rightHard, rightSoft final List<Pair<Cigar, int[]>> tests = Arrays.asList( Pair.of("13H3M35D13M2I45S30H", new int[] {13, 0, 30, 45}), Pair.of("1H3S67M13S", new int[] {1, 3, 0, 13}), Pair.of("13M30S10S1H", new int[] {0, 0, 1, 40}), Pair.of("113S4M", new int[] {0, 113, 0, 0}), Pair.of("5H3H10M2S1S", new int[] {8, 0, 0, 3}), Pair.of("10M", new int[] {0, 0, 0, 0}), Pair.of("1H2H3S4S10M5S6S7H8H", new int[] {3, 7, 15, 11}) ) .stream().map(pair -> Pair.of(TextCigarCodec.decode(pair.getLeft()), pair.getRight())).collect(Collectors.toList()); for (final Pair<Cigar, int[]> test : tests) { Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[0]); Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[1]); Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[2]); Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[3]); } }
Example #3
Source File: ContigAlignmentsModifierUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(groups = "sv") public void testGappedAlignmentBreaker_OneDeletion() { final Cigar cigar = TextCigarCodec.decode("2S205M2D269M77S"); final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 575), 3, 476, cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval, 1, cigar.getReadLength())).collect(Collectors.toList()); Assert.assertEquals(generatedARList.size(), 2); Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(new SimpleInterval("1", 100, 304), 3, 207, TextCigarCodec.decode("2S205M346S"), true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)); Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(new SimpleInterval("1", 307, 575), 208, 476, TextCigarCodec.decode("207S269M77S"), true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)); }
Example #4
Source File: TestUtils.java From hmftools with GNU General Public License v3.0 | 6 votes |
public static ReadRecord[] createSupplementaryReadPair(final int id, final GeneCollection gc1, final GeneCollection gc2, int posStart1, int posEnd1, int posStart2, int posEnd2, final Cigar cigar1, final Cigar cigar2, boolean firstInPair) { int readBaseLength = cigar1.getCigarElements().stream() .filter(x -> x.getOperator() != N && x.getOperator() != D) .mapToInt(x -> x.getLength()).sum(); String readBases = generateRandomBases(readBaseLength); ReadRecord read1 = createMappedRead(id, gc1, posStart1, posEnd1, cigar1, readBases); ReadRecord read2 = createMappedRead(id, gc2, posStart2, posEnd2, cigar2, readBases); read1.setFlag(FIRST_OF_PAIR, firstInPair); read2.setFlag(SECOND_OF_PAIR, !firstInPair); read1.setSuppAlignment(String.format("%s;%d;%s", read2.Chromosome, read2.PosStart, read2.Cigar.toString())); read2.setSuppAlignment(String.format("%s;%d;%s", read1.Chromosome, read1.PosStart, read1.Cigar.toString())); return new ReadRecord[] { read1, read2 }; }
Example #5
Source File: ReferenceConfidenceModel.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Create a reference haplotype for an active region * * @param activeRegion the active region * @param refBases the ref bases * @param paddedReferenceLoc the location spanning of the refBases -- can be longer than activeRegion.getLocation() * @return a reference haplotype */ public static Haplotype createReferenceHaplotype(final AssemblyRegion activeRegion, final byte[] refBases, final SimpleInterval paddedReferenceLoc) { Utils.nonNull(activeRegion, "null region"); Utils.nonNull(refBases, "null refBases"); Utils.nonNull(paddedReferenceLoc, "null paddedReferenceLoc"); final int alignmentStart = activeRegion.getExtendedSpan().getStart() - paddedReferenceLoc.getStart(); if ( alignmentStart < 0 ) { throw new IllegalStateException("Bad alignment start in createReferenceHaplotype " + alignmentStart); } final Haplotype refHaplotype = new Haplotype(refBases, true); refHaplotype.setAlignmentStartHapwrtRef(alignmentStart); final Cigar c = new Cigar(); c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); refHaplotype.setCigar(c); return refHaplotype; }
Example #6
Source File: IndelShifterTest.java From abra2 with MIT License | 6 votes |
@Test (groups = "unit" ) public void testShiftIndelsLeft() throws Exception { CompareToReference2 c2r = new CompareToReference2(); c2r.init("test-data/test.fa"); /* TCGAATCGATATATTTCCGGAACAGACTCAG ------CGATAT--TTCCGGAA--------- <-- orig ------CG--ATATTTCCGGAA--------- <-- new 1234567890123456789012 */ int refStart = 7; int refEnd = 22; Cigar cigar = TextCigarCodec.decode("6M2D8M"); String seq = "CGATATTTCCGGAA"; // 1 based input Cigar newCigar = indelShifter.shiftIndelsLeft(refStart, refEnd, "seq1", cigar, seq, c2r); assertEquals(TextCigarCodec.encode(newCigar), "2M2D12M"); }
Example #7
Source File: Read.java From cramtools with Apache License 2.0 | 6 votes |
SAMRecord firstSAMRecord(SAMFileHeader header) { SAMRecord r = new SAMRecord(header); r.setReadName(evidenceRecord.getReadName()); r.setReferenceName(evidenceRecord.Chromosome); r.setAlignmentStart(Integer.valueOf(evidenceRecord.OffsetInReference) + 1); r.setMappingQuality(Integer.valueOf(evidenceRecord.ScoreAllele0)); r.setReadPairedFlag(true); r.setReadUnmappedFlag(false); r.setReadNegativeStrandFlag(negative); r.setFirstOfPairFlag(evidenceRecord.side == 0); r.setSecondOfPairFlag(!r.getFirstOfPairFlag()); r.setCigar(new Cigar(Utils.toCigarOperatorList(firstHalf.samCigarElements))); r.setReadBases(Utils.toByteArray(firstHalf.readBasesBuf)); r.setBaseQualities(Utils.toByteArray(firstHalf.readScoresBuf)); r.setAttribute("GC", Utils.toString(firstHalf.gcList)); r.setAttribute("GS", Utils.toString(firstHalf.gsBuf)); r.setAttribute("GQ", SAMUtils.phredToFastq(Utils.toByteArray(firstHalf.gqBuf))); return r; }
Example #8
Source File: IndelShifter.java From abra2 with MIT License | 6 votes |
public static void main(String[] args) throws IOException { CompareToReference2 c2r = new CompareToReference2(); c2r.init("/home/lmose/dev/reference/hg19/chr7.fa"); String seq = "CGCTGGCGGCCTTCCTGCCCGACCTGTCCCTGGCCAAGAGGAAGACCTGGCGGAACCCTTGGCGGCGTTCCTACCACAAGCGCAAGAAGGCCGAGTGGGAGGTGGACCCTGACAACTGCGAGGAGGTGAAGCAGACACCGCCCTACGACAGCAGCCACCGCATCCTGGACGTCATGGACATGACGATCTTCGACTTCCTCATGGGAAACATGGACCGTCACCACTACGAGACTTGTGAGAAGTTTGGGAATGAAACGTTCATCATCCACTTAGACAATGGAAGAGGGATCCGGA"; IndelShifter is = new IndelShifter(); Cigar orig = TextCigarCodec.decode("7M88I1M10D109M241D82M60D7M"); Cigar cigar = is.shiftAllIndelsLeft(296604, 297121, "chr7", orig, seq, c2r); System.out.println("old: " + orig + ", " + orig.getReadLength()); System.out.println("new: " + cigar + ", " + cigar.getReadLength()); //296604, 297121,chr7, 7M88I1M10D109M241D82M60D7M, CGCTGGCGGCCTTCCTGCCCGACCTGTCCCTGGCCAAGAGGAAGACCTGGCGGAACCCTTGGCGGCGTTCCTACCACAAGCGCAAGAAGGCCGAGTGGGAGGTGGACCCTGACAACTGCGAGGAGGTGAAGCAGACACCGCCCTACGACAGCAGCCACCGCATCCTGGACGTCATGGACATGACGATCTTCGACTTCCTCATGGGAAACATGGACCGTCACCACTACGAGACTTGTGAGAAGTTTGGGAATGAAACGTTCATCATCCACTTAGACAATGGAAGAGGGATCCGGA }
Example #9
Source File: ContigAlignmentsModifierUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test(groups = "sv") public void testGappedAlignmentBreaker_HardAndSoftClip() { final Cigar cigar = TextCigarCodec.decode("1H2S3M5I10M20D6M7S8H"); final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 138), 4, 27, cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); final List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval, 1, cigar.getReadLength()+1+8)).collect(Collectors.toList()); Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(new SimpleInterval("1", 100, 102), 4, 6, TextCigarCodec.decode("1H2S3M28S8H"), true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)); Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(new SimpleInterval("1", 103, 112), 12, 21, TextCigarCodec.decode("1H10S10M13S8H"), true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)); Assert.assertEquals(generatedARList.get(2), new AlignmentInterval(new SimpleInterval("1", 133, 138), 22, 27, TextCigarCodec.decode("1H20S6M7S8H"), true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT)); }
Example #10
Source File: SAMRecordUtils.java From abra2 with MIT License | 6 votes |
public static String getLeadingClips(SAMRecord read) { List<CigarElement> elems = read.getCigar().getCigarElements(); List<CigarElement> leading = new ArrayList<CigarElement>(); for (CigarElement elem : elems) { if (isClip(elem)) { leading.add(elem); } else { break; } } String ret = ""; if (leading.size() > 0) { Cigar cigar = new Cigar(leading); ret = TextCigarCodec.encode(cigar); } return ret; }
Example #11
Source File: SAMRecordUtils.java From abra2 with MIT License | 6 votes |
public static String getTrailingClips(SAMRecord read) { List<CigarElement> elems = read.getCigar().getCigarElements(); List<CigarElement> trailing = new ArrayList<CigarElement>(); boolean isNonClippedReached = false; for (CigarElement elem : elems) { if (isClip(elem)) { if (isNonClippedReached) { trailing.add(elem); } } else { isNonClippedReached = true; } } String ret = ""; if (trailing.size() > 0) { Cigar cigar = new Cigar(trailing); ret = TextCigarCodec.encode(cigar); } return ret; }
Example #12
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Convert the 'I' CigarElement, if it is at either end (terminal) of the input cigar, to a corresponding 'S' operator. * Note that we allow CIGAR of the format '10H10S10I10M', but disallows the format if after the conversion the cigar turns into a giant clip, * e.g. '10H10S10I10S10H' is not allowed (if allowed, it becomes a giant clip of '10H30S10H' which is non-sense). * * @return a pair of number of clipped (hard and soft, including the ones from the converted terminal 'I') bases at the front and back of the * input {@code cigarAlongInput5to3Direction}. * * @throws IllegalArgumentException when the checks as described above fail. */ public static Cigar convertTerminalInsertionToSoftClip(final Cigar cigar) { if (cigar.numCigarElements() < 2 ) { return cigar; } final CigarBuilder builder = new CigarBuilder(); for (int n = 0; n < cigar.numCigarElements(); n++) { final CigarElement element = cigar.getCigarElement(n); if (element.getOperator() != CigarOperator.INSERTION) { // not an insertion builder.add(element); } else if (n == 0 || n == cigar.numCigarElements() - 1) { // terminal insertion with no clipping -- convert to soft clip builder.add(new CigarElement(element.getLength(), CigarOperator.SOFT_CLIP)); } else if (cigar.getCigarElement(n-1).getOperator().isClipping() || cigar.getCigarElement(n+1).getOperator().isClipping()) { // insertion preceding or following clip builder.add(new CigarElement(element.getLength(), CigarOperator.SOFT_CLIP)); } else { // interior insertion builder.add(element); } } return builder.make(); }
Example #13
Source File: NDNCigarReadTransformer.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Refactor cigar strings that contain N-D-N elements to one N element (with total length of the three refactored elements). */ @VisibleForTesting protected static Cigar refactorNDNtoN(final Cigar originalCigar) { final Cigar refactoredCigar = new Cigar(); final int cigarLength = originalCigar.numCigarElements(); for(int i = 0; i < cigarLength; i++){ final CigarElement element = originalCigar.getCigarElement(i); if(element.getOperator() == CigarOperator.N && thereAreAtLeast2MoreElements(i,cigarLength)){ final CigarElement nextElement = originalCigar.getCigarElement(i+1); final CigarElement nextNextElement = originalCigar.getCigarElement(i+2); // if it is N-D-N replace with N (with the total length) otherwise just add the first N. if(nextElement.getOperator() == CigarOperator.D && nextNextElement.getOperator() == CigarOperator.N){ final int threeElementsLength = element.getLength() + nextElement.getLength() + nextNextElement.getLength(); final CigarElement refactoredElement = new CigarElement(threeElementsLength,CigarOperator.N); refactoredCigar.add(refactoredElement); i += 2; //skip the elements that were refactored } else { refactoredCigar.add(element); // add only the first N } } else { refactoredCigar.add(element); // add any non-N element } } return refactoredCigar; }
Example #14
Source File: ReadClipperUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testHardClipByReferenceCoordinates() { for (final Cigar cigar : cigarList) { final GATKRead read = ReadClipperTestUtils.makeReadFromCigar(cigar); final int start = getSoftStart(read); final int stop = getSoftEnd(read); for (int i = start; i <= stop; i++) { final GATKRead clipLeft = (new ReadClipper(read)).hardClipByReferenceCoordinates(-1, i); if (!clipLeft.isEmpty()) { Assert.assertTrue(clipLeft.getStart() >= Math.min(read.getEnd(), i + 1), String.format("Clipped alignment start (%d) is less the expected (%d): %s -> %s", clipLeft.getStart(), i + 1, read.getCigar().toString(), clipLeft.getCigar().toString())); assertRefAlignmentConsistent(clipLeft); assertReadLengthConsistent(clipLeft); } final GATKRead clipRight = (new ReadClipper(read)).hardClipByReferenceCoordinates(i, -1); if (!clipRight.isEmpty() && clipRight.getStart() <= clipRight.getEnd()) { // alnStart > alnEnd if the entire read is a soft clip now. We can't test those. Assert.assertTrue(clipRight.getEnd() <= Math.max(read.getStart(), i - 1), String.format("Clipped alignment end (%d) is greater than expected (%d): %s -> %s", clipRight.getEnd(), i - 1, read.getCigar().toString(), clipRight.getCigar().toString())); assertRefAlignmentConsistent(clipRight); assertReadLengthConsistent(clipRight); } } } }
Example #15
Source File: IndelShifterTest.java From abra2 with MIT License | 6 votes |
@Test (groups = "unit" ) public void testShiftCigarLeft_insertAtTail() { Cigar cigar = new Cigar(); cigar.add(new CigarElement(40, CigarOperator.M)); cigar.add(new CigarElement(10, CigarOperator.I)); Cigar newCigar; newCigar = indelShifter.shiftCigarLeft(cigar, 40); Assert.assertEquals(newCigar.toString(), "10I40M"); newCigar = indelShifter.shiftCigarLeft(cigar, 39); Assert.assertEquals(newCigar.toString(), "1M10I39M"); newCigar = indelShifter.shiftCigarLeft(cigar, 30); Assert.assertEquals(newCigar.toString(), "10M10I30M"); newCigar = indelShifter.shiftCigarLeft(cigar, 1); Assert.assertEquals(newCigar.toString(), "39M10I1M"); }
Example #16
Source File: SAMRecordUtils.java From abra2 with MIT License | 5 votes |
/** * Replace hard clips with soft clips. */ public static void replaceHardClips(SAMRecord read) { Cigar cigar = read.getCigar(); if (cigar.getCigarElements().size() > 0) { CigarElement firstElement = cigar.getCigarElement(0); CigarElement lastElement = cigar.getCigarElement(cigar.numCigarElements()-1); if ((firstElement.getOperator() == CigarOperator.H) || (lastElement.getOperator() == CigarOperator.H)) { Cigar newCigar = new Cigar(); boolean isFirst = true; for (CigarElement element : cigar.getCigarElements()) { if (element.getOperator() != CigarOperator.H) { newCigar.add(element); } else { CigarElement softClip = new CigarElement(element.getLength(), CigarOperator.S); newCigar.add(softClip); if (isFirst) { read.setReadString(padBases(element.getLength()) + read.getReadString()); } else { read.setReadString(read.getReadString() + padBases(element.getLength())); } } isFirst = false; } read.setCigar(newCigar); } } }
Example #17
Source File: GraphBasedKBestHaplotypeFinderUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(enabled = true) public void testLeftAlignCigarSequentiallyAdjacentID() { final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT"; final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT"; final Cigar originalCigar = TextCigarCodec.decode("18M4I12M4D2M"); final Cigar result = AlignmentUtils.leftAlignIndels(originalCigar, ref.getBytes(), hap.getBytes(), 0).getCigar(); logger.warn("Result is " + result); Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different"); }
Example #18
Source File: BaseQualityClipReadTransformerTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "sequenceStrings") public void testApply(final String quals_in, final String basesOut, final String qualsOut, final String cigarOut) throws Exception { final String basesIn = "CTCAAGTACAAGCTGATCCAGACCTACAGGGTGATGTCATTAGAGGCACTGATAACACACACACTATGGGGTGGGGGTGGACAGTTCCCCACTGCAATCC"; final BaseQualityClipReadTransformer trans = new BaseQualityClipReadTransformer(15); final Cigar cigar = new Cigar(); cigar.add(new CigarElement(basesIn.length(), CigarOperator.MATCH_OR_MISMATCH)); final GATKRead readIn = ArtificialReadUtils.createArtificialRead(basesIn.getBytes(),SAMUtils.fastqToPhred(quals_in), cigar.toString()); final GATKRead readOut = trans.apply(readIn); Assert.assertEquals(readOut.getBases(),basesOut.getBytes()); Assert.assertEquals(readOut.getBaseQualities(),SAMUtils.fastqToPhred(qualsOut)); Assert.assertEquals(readOut.getCigar().toString(), cigarOut); }
Example #19
Source File: ReadThreadingAssemblerUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) { final Haplotype refHaplotype = new Haplotype(refBases, true); final Cigar c = new Cigar(); c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); refHaplotype.setCigar(c); final AssemblyRegion activeRegion = new AssemblyRegion(loc, true, 0, header); activeRegion.addAll(reads); // logger.warn("Assembling " + activeRegion + " with " + engine); final AssemblyResultSet assemblyResultSet = assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, null, header, SmithWatermanJavaAligner.getInstance()); return assemblyResultSet.getHaplotypeList(); }
Example #20
Source File: CigarUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "randomValidCigars") public void testReadLength(final Cigar cigar) { final int actual = CigarUtils.countUnclippedReadBases(cigar); final int expected = cigar.getCigarElements().stream() .filter(ce -> ce.getOperator().consumesReadBases() || ce.getOperator().isClipping()) .mapToInt(CigarElement::getLength).sum(); Assert.assertEquals(actual, expected); }
Example #21
Source File: AlignmentUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Workhorse for trimCigarByBases and trimCigarByReference * * @param cigar a non-null Cigar to trim down * @param start Where should we start keeping bases in the cigar (inclusive)? The first position is 0 * @param end Where should we stop keeping bases in the cigar (inclusive)? The maximum value is cigar.getLength() - 1 * @param byReference should start and end be interpreted as position in the reference or the read to trim to/from? * @return a non-null cigar */ @SuppressWarnings("fallthrough") private static CigarBuilder.Result trimCigar(final Cigar cigar, final int start, final int end, final boolean byReference) { ParamUtils.isPositiveOrZero(start, "start position can't be negative"); Utils.validateArg(end >= start, () -> "end " + end + " is before start " + start); final CigarBuilder newElements = new CigarBuilder(); // these variables track the inclusive start and exclusive end of the current cigar element in reference (if byReference) or read (otherwise) coordinates int elementStart; // inclusive int elementEnd = 0; // exclusive -- start of next element for ( final CigarElement elt : cigar.getCigarElements() ) { elementStart = elementEnd; elementEnd = elementStart + (byReference ? lengthOnReference(elt) : lengthOnRead(elt)); // we are careful to include zero-length elements at both ends, that is, elements with elementStart == elementEnd == start and elementStart == elementEnd == end + 1 if (elementEnd < start || (elementEnd == start && elementStart < start)) { continue; } else if (elementStart > end && elementEnd > end + 1) { break; } final int overlapLength = elementEnd == elementStart ? elt.getLength() : Math.min(end + 1, elementEnd) - Math.max(start, elementStart); newElements.add(new CigarElement(overlapLength, elt.getOperator())); } Utils.validateArg(elementEnd > end, () -> "cigar elements don't reach end position (inclusive) " + end); return newElements.makeAndRecordDeletionsRemovedResult(); }
Example #22
Source File: SWNativeAlignerWrapper.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public SmithWatermanAlignment align(final byte[] reference, final byte[] alternate, final SWParameters parameters, final SWOverhangStrategy overhangStrategy){ long startTime = System.nanoTime(); Utils.nonNull(parameters); Utils.nonNull(overhangStrategy); // avoid running full Smith-Waterman if there is an exact match of alternate in reference int matchIndex = -1; if (overhangStrategy == SWOverhangStrategy.SOFTCLIP || overhangStrategy == SWOverhangStrategy.IGNORE) { // Use a substring search to find an exact match of the alternate in the reference // NOTE: This approach only works for SOFTCLIP and IGNORE overhang strategies matchIndex = Utils.lastIndexOf(reference, alternate); } final SmithWatermanAlignment alignmentResult; if (matchIndex != -1) { // generate the alignment result when the substring search was successful alignmentResult = new SWNativeResultWrapper(new Cigar(Collections.singletonList(new CigarElement(alternate.length, CigarOperator.M))), matchIndex); } else { // run full Smith-Waterman final SWNativeAlignerResult alignment = aligner.align(reference, alternate,parameters,overhangStrategy); alignmentResult = new SWNativeResultWrapper(alignment); } totalComputeTime += System.nanoTime() - startTime; return alignmentResult; }
Example #23
Source File: MergeBamAlignmentTest.java From picard with MIT License | 5 votes |
private SAMRecord makeRead(final SAMFileHeader alignedHeader, final SAMRecord unmappedRec, final HitSpec hitSpec, final int hitIndex) { final SAMRecord rec = new SAMRecord(alignedHeader); rec.setReadName(unmappedRec.getReadName()); rec.setReadBases(unmappedRec.getReadBases()); rec.setBaseQualities(unmappedRec.getBaseQualities()); rec.setMappingQuality(hitSpec.mapq); if (!hitSpec.primary) rec.setNotPrimaryAlignmentFlag(true); final Cigar cigar = new Cigar(); final int readLength = rec.getReadLength(); if (hitSpec.filtered) { // Add two insertions so alignment is filtered. cigar.add(new CigarElement(readLength-4, CigarOperator.M)); cigar.add(new CigarElement(1, CigarOperator.I)); cigar.add(new CigarElement(1, CigarOperator.M)); cigar.add(new CigarElement(1, CigarOperator.I)); cigar.add(new CigarElement(1, CigarOperator.M)); } else { cigar.add(new CigarElement(readLength, CigarOperator.M)); } rec.setCigar(cigar); rec.setReferenceName(bigSequenceName); rec.setAttribute(SAMTag.HI.name(), hitIndex); rec.setAlignmentStart(hitIndex + 1); return rec; }
Example #24
Source File: IndelShifter.java From abra2 with MIT License | 5 votes |
private int firstIndelOffset(Cigar cigar) { int pos = 0; for (CigarElement elem : cigar.getCigarElements()) { if ((elem.getOperator() == CigarOperator.D) || (elem.getOperator() == CigarOperator.I)) { return pos; } else if (elem.getOperator() != CigarOperator.S) { pos += elem.getLength(); } } throw new IllegalArgumentException("No indel for record: [" + cigar + "]"); }
Example #25
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Inverts the order of the operators in the cigar. * Eg 10M1D20M -> 20M1D10M */ public static Cigar invertCigar (final Cigar cigar) { Utils.nonNull(cigar); final List<CigarElement> els = new ArrayList<>(cigar.getCigarElements()); Collections.reverse(els); return new Cigar(els); }
Example #26
Source File: ContigAlignmentsModifierUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static Iterable<AlignmentInterval> willThrowOnInvalidCigar(final Cigar cigar, final int readStart) throws GATKException { final AlignmentInterval detailsDoesnotMatter = new AlignmentInterval( new SimpleInterval("1", 1, cigar.getReferenceLength()), readStart, readStart+cigar.getReadLength() - CigarUtils.countClippedBases(cigar, CigarOperator.SOFT_CLIP), cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE); return ContigAlignmentsModifier.splitGappedAlignment(detailsDoesnotMatter, 1, cigar.getReadLength() + CigarUtils.countClippedBases(cigar, CigarOperator.HARD_CLIP)); }
Example #27
Source File: GraphBasedKBestHaplotypeFinderUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "SystematicRefAltSWTestData") public void testRefAltSW(final String prefix, final String end, final String refMid, final String altMid, final String midCigar) { // Construct the assembly graph SeqGraph graph = new SeqGraph(11); final int padSize = 0; SeqVertex top = new SeqVertex(Strings.repeat("N", padSize)); SeqVertex ref = new SeqVertex(prefix + refMid + end); SeqVertex alt = new SeqVertex(prefix + altMid + end); SeqVertex bot = new SeqVertex(Strings.repeat("N", padSize)); graph.addVertices(top, ref, alt, bot); graph.addEdges(() -> new BaseEdge(true, 1), top, ref, bot); graph.addEdges(() -> new BaseEdge(false, 1), top, alt, bot); // Construct the test path Path<SeqVertex,BaseEdge> path = makePath(Arrays.asList(top, alt, bot), graph); final CigarBuilder expected = new CigarBuilder(); expected.add(new CigarElement(padSize, CigarOperator.M)); if ( ! prefix.equals("") ) { expected.add(new CigarElement(prefix.length(), CigarOperator.M)); } expected.addAll(TextCigarCodec.decode(midCigar)); if ( ! end.equals("") ) { expected.add(new CigarElement(end.length(), CigarOperator.M)); } expected.add(new CigarElement(padSize, CigarOperator.M)); final String refString = top.getSequenceString() + ref.getSequenceString() + bot.getSequenceString(); final Cigar pathCigar = path.calculateCigar(refString.getBytes(), SmithWatermanJavaAligner.getInstance()); logger.warn("diffs: " + ref + " vs. " + alt + " cigar " + midCigar); logger.warn("Path " + path + " with cigar " + pathCigar); Assert.assertEquals(new CigarBuilder().addAll(pathCigar).make(true), expected.make(true), "Cigar mismatch: ref = " + refString + " vs alt = " + new String(path.getBases())); }
Example #28
Source File: HaplotypeUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testSimpleDeletionAllele() { final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA"; final ArrayList<CigarElement> h1CigarList = new ArrayList<>(); h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "ATCAACTGGTCAACTGGTCAACTGGTCA"; basicInsertTest("ACTGG", "A", 0, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAGTCAACTGGTCAACTGGTCA"; basicInsertTest("AACTG", "A", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAATCAACTGGTCA"; basicInsertTest("ACTGG", "A", 16, h1Cigar, bases, h1bases); }
Example #29
Source File: SAMRecordUtilsTest.java From abra2 with MIT License | 5 votes |
@Test (groups = "unit") public void testGetLeadingClips() { SAMRecord read = new SAMRecord(null); Cigar cigar = getCigar("10S15M8S"); read.setCigar(cigar); String leading = SAMRecordUtils.getLeadingClips(read); Assert.assertEquals(leading, "10S"); }
Example #30
Source File: CigarUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test(dataProvider = "testData_unclipCigar") public void testUnclipCigar(final String cigarStrIn, final String expectedCigarStrOut){ final Cigar cigarIn = TextCigarCodec.decode(cigarStrIn); final Cigar cigarOut = CigarUtils.removeClipsAndPadding(cigarIn); final String actualCigarStrOut = TextCigarCodec.encode(cigarOut); Assert.assertEquals(actualCigarStrOut, expectedCigarStrOut); }