htsjdk.samtools.CigarOperator Java Examples
The following examples show how to use
htsjdk.samtools.CigarOperator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ClippingOp.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private GATKRead applySoftClipBases(final GATKRead readCopied) { Utils.validateArg(!readCopied.isUnmapped(), "Read Clipper cannot soft clip unmapped reads"); if (readCopied.getLength() <= 2) { // see GATK issue #2022 -- we can't soft-clip all bases return readCopied; } // see GATK issue #2022 -- we can't soft-clip all bases final int myStop = Math.min(stop, start + readCopied.getLength() - 2); Utils.validate(start <= 0 || myStop == readCopied.getLength() - 1, () -> String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", readCopied.getName(), start, myStop)); final Cigar oldCigar = readCopied.getCigar(); final Cigar newCigar = CigarUtils.clipCigar(oldCigar, start, myStop + 1, CigarOperator.SOFT_CLIP); readCopied.setCigar(newCigar); final int alignmentStartShift = start == 0 ? CigarUtils.alignmentStartShift(oldCigar, stop + 1) : 0; final int newStart = readCopied.getStart() + alignmentStartShift; readCopied.setPosition(readCopied.getContig(), newStart); return readCopied; }
Example #2
Source File: SamRecordComparision.java From cramtools with Apache License 2.0 | 6 votes |
private static boolean testBaseCategory(BaseCategory c, ScoreDiff diff) { switch (c.type) { case FLANKING_DELETION: return diff.nextBaseCoP == CigarOperator.enumToCharacter(CigarOperator.DELETION) || diff.prevBaseCoP == CigarOperator.enumToCharacter(CigarOperator.DELETION); case INSERTION: return diff.cigarOp == CigarOperator.enumToCharacter(CigarOperator.INSERTION); case LOWER_COVERAGE: return diff.coverage < c.param; case MATCH: return diff.base == diff.refBase; case MISMATCH: return diff.base != diff.refBase; case PILEUP: return diff.pileup <= c.param; default: throw new RuntimeException("Unknown read category: " + c.type.name()); } }
Example #3
Source File: SAMRecordUtils.java From abra2 with MIT License | 6 votes |
public static String getMappedReadPortion(SAMRecord read) { int start = 0; List<CigarElement> elems = read.getCigar().getCigarElements(); int i = 0; while (i < elems.size() && isClip(elems.get(i))) { if (elems.get(i).getOperator() == CigarOperator.S) { start += elems.get(i).getLength(); } i += 1; } int stop = read.getReadLength(); i = elems.size()-1; while (i >= 0 && isClip(elems.get(i))) { if (elems.get(i).getOperator() == CigarOperator.S) { stop -= elems.get(i).getLength(); } i -= 1; } return read.getReadString().substring(start, stop); }
Example #4
Source File: ReadClassifier.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private void checkBigIndel( final List<CigarElement> cigarElements, final GATKRead read, final List<BreakpointEvidence> evidenceList ) { int locus = read.getStart(); for ( final CigarElement ele : cigarElements ) { final CigarOperator op = ele.getOperator(); if ( ele.getLength() >= MIN_INDEL_LEN ) { if ( op == CigarOperator.INSERTION ) { evidenceList.add(new BreakpointEvidence.LargeIndel(read, readMetadata, locus)); } else if ( op == CigarOperator.DELETION ) { evidenceList.add(new BreakpointEvidence.LargeIndel(read, readMetadata, locus+ele.getLength()/2)); } } if ( op.consumesReferenceBases() ) { locus += ele.getLength(); } } }
Example #5
Source File: ReadClipperUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testRevertSoftClippedBasesWithThreshold() { for (final Cigar cigar : cigarList) { final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP); final int tailSoftClips = leadingCigarElementLength(CigarUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP); final GATKRead read = ReadClipperTestUtils.makeReadFromCigar(cigar); final GATKRead unclipped = ReadClipper.revertSoftClippedBases(read); assertUnclippedLimits(read, unclipped); // Make sure limits haven't changed Assert.assertNull(read.getCigar().isValid(null, -1)); Assert.assertNull(unclipped.getCigar().isValid(null, -1)); if (!(leadingSoftClips > 0 || tailSoftClips > 0)) Assert.assertEquals(read.getCigar().toString(), unclipped.getCigar().toString()); } }
Example #6
Source File: IndelShifterTest.java From abra2 with MIT License | 6 votes |
@Test (groups = "unit" ) public void testShiftCigarLeft_multipleIndels() { Cigar cigar = new Cigar(); cigar.add(new CigarElement(20, CigarOperator.M)); cigar.add(new CigarElement(1, CigarOperator.I)); cigar.add(new CigarElement(5, CigarOperator.M)); cigar.add(new CigarElement(3, CigarOperator.D)); cigar.add(new CigarElement(24, CigarOperator.M)); Cigar newCigar; newCigar = indelShifter.shiftCigarLeft(cigar, 20); Assert.assertEquals(newCigar.toString(), "1I5M3D44M"); newCigar = indelShifter.shiftCigarLeft(cigar, 10); Assert.assertEquals(newCigar.toString(), "10M1I5M3D34M"); newCigar = indelShifter.shiftCigarLeft(cigar, 1); Assert.assertEquals(newCigar.toString(), "19M1I5M3D25M"); }
Example #7
Source File: IndelShifterTest.java From abra2 with MIT License | 6 votes |
@Test (groups = "unit" ) public void testShiftCigarLeft_complex() { //3S69M1I18M1D9M Cigar cigar = new Cigar(); cigar.add(new CigarElement(3, CigarOperator.S)); cigar.add(new CigarElement(69, CigarOperator.M)); cigar.add(new CigarElement(1, CigarOperator.I)); cigar.add(new CigarElement(18, CigarOperator.M)); cigar.add(new CigarElement(1, CigarOperator.D)); cigar.add(new CigarElement(9, CigarOperator.M)); Cigar newCigar; newCigar = indelShifter.shiftCigarLeft(cigar, 1); assertEquals(newCigar.toString(), "3S68M1I18M1D10M"); }
Example #8
Source File: ReferenceConfidenceModel.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Create a reference haplotype for an active region * * @param activeRegion the active region * @param refBases the ref bases * @param paddedReferenceLoc the location spanning of the refBases -- can be longer than activeRegion.getLocation() * @return a reference haplotype */ public static Haplotype createReferenceHaplotype(final AssemblyRegion activeRegion, final byte[] refBases, final SimpleInterval paddedReferenceLoc) { Utils.nonNull(activeRegion, "null region"); Utils.nonNull(refBases, "null refBases"); Utils.nonNull(paddedReferenceLoc, "null paddedReferenceLoc"); final int alignmentStart = activeRegion.getExtendedSpan().getStart() - paddedReferenceLoc.getStart(); if ( alignmentStart < 0 ) { throw new IllegalStateException("Bad alignment start in createReferenceHaplotype " + alignmentStart); } final Haplotype refHaplotype = new Haplotype(refBases, true); refHaplotype.setAlignmentStartHapwrtRef(alignmentStart); final Cigar c = new Cigar(); c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); refHaplotype.setCigar(c); return refHaplotype; }
Example #9
Source File: CigarBuilder.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
private void advanceSectionAndValidateCigarOrder(CigarOperator operator) { if (operator == CigarOperator.HARD_CLIP) { if (section == Section.LEFT_SOFT_CLIP || section == Section.MIDDLE || section == Section.RIGHT_SOFT_CLIP) { section = Section.RIGHT_HARD_CLIP; } } else if (operator == CigarOperator.SOFT_CLIP) { Utils.validate(section != Section.RIGHT_HARD_CLIP, "cigar has already reached its right hard clip"); if (section == Section.LEFT_HARD_CLIP) { section = Section.LEFT_SOFT_CLIP; } else if(section == Section.MIDDLE) { section = Section.RIGHT_SOFT_CLIP; } } else { Utils.validate(section != Section.RIGHT_SOFT_CLIP && section != Section.RIGHT_HARD_CLIP, "cigar has already reached right clip"); if (section == Section.LEFT_HARD_CLIP || section == Section.LEFT_SOFT_CLIP) { section = Section.MIDDLE; } } }
Example #10
Source File: CigarUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testClipCountsByHand() { // int[] is leftHard, leftSoft, rightHard, rightSoft final List<Pair<Cigar, int[]>> tests = Arrays.asList( Pair.of("13H3M35D13M2I45S30H", new int[] {13, 0, 30, 45}), Pair.of("1H3S67M13S", new int[] {1, 3, 0, 13}), Pair.of("13M30S10S1H", new int[] {0, 0, 1, 40}), Pair.of("113S4M", new int[] {0, 113, 0, 0}), Pair.of("5H3H10M2S1S", new int[] {8, 0, 0, 3}), Pair.of("10M", new int[] {0, 0, 0, 0}), Pair.of("1H2H3S4S10M5S6S7H8H", new int[] {3, 7, 15, 11}) ) .stream().map(pair -> Pair.of(TextCigarCodec.decode(pair.getLeft()), pair.getRight())).collect(Collectors.toList()); for (final Pair<Cigar, int[]> test : tests) { Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[0]); Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[1]); Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[2]); Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[3]); } }
Example #11
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Count the number of soft- or hard- clipped bases from either the left or right end of a cigar */ public static int countClippedBases(final Cigar cigar, final ClippingTail tail, final CigarOperator typeOfClip) { Utils.validateArg(typeOfClip.isClipping(), "typeOfClip must be a clipping operator"); final int size = cigar.numCigarElements(); if (size < 2) { Utils.validateArg(size == 1 && !cigar.getFirstCigarElement().getOperator().isClipping(), "cigar is empty or completely clipped."); return 0; } int result = 0; for (int n = 0; n < size; n++) { final int index = (tail == ClippingTail.LEFT_TAIL ? n : size - n - 1); final CigarElement element = cigar.getCigarElement(index); if (!element.getOperator().isClipping()) { return result; } else if (element.getOperator() == typeOfClip) { result += element.getLength(); } } throw new IllegalArgumentException("Input cigar " + cigar + " is completely clipped."); }
Example #12
Source File: BamFragmentAllocator.java From hmftools with GNU General Public License v3.0 | 6 votes |
private void processIntronicReads(final List<GeneReadData> genes, final ReadRecord read1, final ReadRecord read2) { if(read1.Cigar.containsOperator(CigarOperator.N) || read2.Cigar.containsOperator(CigarOperator.N)) { mCurrentGenes.addCount(ALT, 1); if(mAltSpliceJunctionFinder != null) mAltSpliceJunctionFinder.evaluateFragmentReads(genes, read1, read2, Lists.newArrayList()); return; } List<String> unsplicedGeneIds = genes.stream().map(x -> x.GeneData.GeneId).collect(Collectors.toList()); CategoryCountsData catCounts = getCategoryCountsData(Lists.newArrayList(), unsplicedGeneIds); List<int[]> readRegions = deriveCommonRegions(read1.getMappedRegionCoords(), read2.getMappedRegionCoords()); addGcCounts(catCounts, readRegions); mCurrentGenes.addCount(UNSPLICED, 1); }
Example #13
Source File: AbstractReadThreadingGraph.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Determine whether the provided cigar is okay to merge into the reference path * * @param cigar the cigar to analyze * @param requireFirstElementM if true, require that the first cigar element be an M operator in order for it to be okay * @param requireLastElementM if true, require that the last cigar element be an M operator in order for it to be okay * @return true if it's okay to merge, false otherwise */ @VisibleForTesting static boolean cigarIsOkayToMerge(final Cigar cigar, final boolean requireFirstElementM, final boolean requireLastElementM) { final List<CigarElement> elements = cigar.getCigarElements(); final int numElements = elements.size(); // don't allow more than a couple of different ops if (numElements == 0 || numElements > MAX_CIGAR_COMPLEXITY) { return false; } // the first element must be an M if (requireFirstElementM && elements.get(0).getOperator() != CigarOperator.M) { return false; } // the last element must be an M if (requireLastElementM && elements.get(numElements - 1).getOperator() != CigarOperator.M) { return false; } // note that there are checks for too many mismatches in the dangling branch later in the process return true; }
Example #14
Source File: SamUtils.java From rtg-tools with BSD 2-Clause "Simplified" License | 6 votes |
/** * Converts a CIGAR that may contain new style operators for match and * mismatch into the legacy style CIGAR. * @param cigar the CIGAR to convert * @return the legacy CIGAR */ public static Cigar convertToLegacyCigar(Cigar cigar) { final Cigar cg = new Cigar(); int count = 0; for (int i = 0; i < cigar.numCigarElements(); ++i) { final CigarElement ce = cigar.getCigarElement(i); if (ce.getOperator() == CigarOperator.EQ || ce.getOperator() == CigarOperator.X) { count += ce.getLength(); } else { if (count > 0) { cg.add(new CigarElement(count, CigarOperator.M)); } cg.add(ce); count = 0; } } if (count > 0) { cg.add(new CigarElement(count, CigarOperator.M)); } return cg; }
Example #15
Source File: SAMRecordWrapper.java From abra2 with MIT License | 6 votes |
public int getAdjustedAlignmentStart() { int start = 0; if (adjustedAlignmentStart > -1) { start = adjustedAlignmentStart; } else { start = samRecord.getAlignmentStart(); if (samRecord.getCigar().numCigarElements() > 0) { CigarElement elem = samRecord.getCigar().getCigarElement(0); if (elem.getOperator() == CigarOperator.S) { start -= elem.getLength(); if (start < 1) { start = 1; } } } } return start; }
Example #16
Source File: PileupElementUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@DataProvider(name = "PrevAndNextTest_simple") public Object[][] makePrevAndNextTest_simple() { final List<Object[]> tests = new LinkedList<>(); for (final CigarOperator firstOp : Arrays.asList(M)) { for (final CigarOperator lastOp : Arrays.asList(M)) { for (final CigarOperator middleOp : Arrays.asList(CigarOperator.I, CigarOperator.P, CigarOperator.S)) { final int readLength = 3; final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "read", 0, 1, readLength); read.setBases(Utils.dupBytes((byte) 'A', readLength)); read.setBaseQualities(Utils.dupBytes((byte) 30, readLength)); String cigar = "1" + firstOp; cigar += "1" + middleOp; cigar += "1" + lastOp; read.setCigar(cigar); tests.add(new Object[]{read, middleOp}); } } } return tests.toArray(new Object[][]{}); }
Example #17
Source File: XGBoostEvidenceFilter.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
CigarQualityInfo(final BreakpointEvidence evidence) { if(evidence instanceof ReadEvidence) { int numMatched = 0; int refLength = 0; final String cigarString = ((ReadEvidence) evidence).getCigarString(); for (final CigarElement element : TextCigarCodec.decode(cigarString).getCigarElements()) { final CigarOperator op = element.getOperator(); if (op.consumesReferenceBases()) { refLength += element.getLength(); if (op.consumesReadBases()) { numMatched += element.getLength(); } } } basesMatched = numMatched; referenceLength = refLength; } else { basesMatched = NON_READ_CIGAR_LENGTHS; referenceLength = NON_READ_CIGAR_LENGTHS; } }
Example #18
Source File: HaplotypeUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Test public void testComplexSNPAllele() { final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; final ArrayList<CigarElement> h1CigarList = new ArrayList<>(); h1CigarList.add(new CigarElement(4, CigarOperator.M)); h1CigarList.add(new CigarElement(10, CigarOperator.I)); h1CigarList.add(new CigarElement(8, CigarOperator.M)); h1CigarList.add(new CigarElement(3, CigarOperator.D)); h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AGCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC"; basicInsertTest("T", "G", 1, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCTATCG" + "AGGGGGA" + "AGGC"; basicInsertTest("G", "T", 7, h1Cigar, bases, h1bases); h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGCGGGA" + "AGGC"; basicInsertTest("G", "C", 17, h1Cigar, bases, h1bases); }
Example #19
Source File: FusionUtils.java From hmftools with GNU General Public License v3.0 | 6 votes |
public static int[] findSplitReadJunction(final ReadRecord read) { if(!read.containsSplit()) return null; final int maxSplitLength = read.Cigar.getCigarElements().stream() .filter(x -> x.getOperator() == CigarOperator.N) .mapToInt(x -> x.getLength()).max().orElse(0); final List<int[]> mappedCoords = read.getMappedRegionCoords(); for(int i = 0; i < mappedCoords.size() - 1; ++i) { final int[] lowerCoords = mappedCoords.get(i); final int[] upperCoords = mappedCoords.get(i + 1); if(upperCoords[SE_START] - lowerCoords[SE_END] - 1 == maxSplitLength) { return new int[] { lowerCoords[SE_END], upperCoords[SE_START] }; } } return null; }
Example #20
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * How many bases to the right does a read's alignment start shift given its cigar and the number of left soft clips */ public static int alignmentStartShift(final Cigar cigar, final int numClipped) { int refBasesClipped = 0; int elementStart = 0; // this and elementEnd are indices in the read's bases for (final CigarElement element : cigar.getCigarElements()) { final CigarOperator operator = element.getOperator(); // hard clips consume neither read bases nor reference bases and are irrelevant if (operator == CigarOperator.HARD_CLIP) { continue; } final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0); if (elementEnd <= numClipped) { // totally within clipped span -- this includes deletions immediately following clipping refBasesClipped += operator.consumesReferenceBases() ? element.getLength() : 0; } else if (elementStart < numClipped) { // clip in middle of element, which means the element necessarily consumes read bases final int clippedLength = numClipped - elementStart; refBasesClipped += operator.consumesReferenceBases() ? clippedLength : 0; break; } elementStart = elementEnd; } return refBasesClipped; }
Example #21
Source File: LocusIteratorByStateBaseTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@DataProvider(name = "LIBSTest") public Object[][] createLIBSTests(final List<Integer> cigarLengths, final List<Integer> combinations) { final List<Object[]> tests = new LinkedList<>(); final List<CigarOperator> allOps = Arrays.asList(CigarOperator.values()); final List<CigarElement> singleCigars = new LinkedList<>(); for ( final int len : cigarLengths ) singleCigars.addAll(allOps.stream().map(op -> new CigarElement(len, op)).collect(Collectors.toList())); for ( final int complexity : combinations ) { for ( final List<CigarElement> elements : Utils.makePermutations(singleCigars, complexity, true) ) { final LIBSTest test = makePermutationTest(elements); if ( test != null ) tests.add(new Object[]{test}); } } return tests.toArray(new Object[][]{}); }
Example #22
Source File: ReadRecord.java From hmftools with GNU General Public License v3.0 | 6 votes |
public static int calcFragmentLength(final ReadRecord read1, final ReadRecord read2) { int insertSize = abs(read1.fragmentInsertSize()); if(!read1.containsSplit() && !read2.containsSplit()) return insertSize; // find unique split lengths and remove them List<Integer> splitLengths = read1.Cigar.getCigarElements().stream() .filter(x -> x.getOperator() == CigarOperator.N).map(x -> x.getLength()).collect(Collectors.toList()); for(final CigarElement element : read2.Cigar.getCigarElements()) { if(element.getOperator() == CigarOperator.N && !splitLengths.contains(element.getLength())) splitLengths.add(element.getLength()); } int totalSplitLength = splitLengths.stream().mapToInt(x -> x).sum(); return insertSize - totalSplitLength; }
Example #23
Source File: SAMRecordUtils.java From abra2 with MIT License | 5 votes |
/** * Remove leading or trailing soft clips from the input read. * Does not modify a read entirely comprised of soft clips. */ public static void removeSoftClips(SAMRecord read) { Cigar cigar = read.getCigar(); CigarElement firstElement = cigar.getCigarElement(0); CigarElement lastElement = cigar.getCigarElement(cigar.numCigarElements()-1); if ((firstElement.getOperator() == CigarOperator.S) || (lastElement.getOperator() == CigarOperator.S)) { Cigar newCigar = new Cigar(); String bases = read.getReadString(); //String qualities = read.getBaseQualityString(); if (firstElement.getOperator() == CigarOperator.S) { bases = bases.substring(firstElement.getLength(), bases.length()); //qualities = qualities.substring(firstElement.getLength(), qualities.length()-1); } else { newCigar.add(firstElement); } for (int i=1; i<cigar.numCigarElements()-1; i++) { newCigar.add(cigar.getCigarElement(i)); } if (lastElement.getOperator() == CigarOperator.S) { bases = bases.substring(0, bases.length() - lastElement.getLength()); //qualities = qualities.substring(0, qualities.length() - lastElement.getLength() - 1); } else { newCigar.add(lastElement); } read.setCigar(newCigar); read.setReadString(bases); //read.setBaseQualityString(qualities); } }
Example #24
Source File: CigarUtils.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private static int countRefBasesAndMaybeAlsoClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex, final boolean includeSoftClips, final boolean includeHardClips) { Utils.nonNull(elems); Utils.validateArg(cigarStartIndex >= 0 && cigarEndIndex <= elems.size() && cigarStartIndex <= cigarEndIndex, () -> "invalid index:" + 0 + " -" + elems.size()); int result = 0; for (final CigarElement elem : elems.subList(cigarStartIndex, cigarEndIndex)) { final CigarOperator op = elem.getOperator(); if (op.consumesReferenceBases() || (includeSoftClips && op == CigarOperator.SOFT_CLIP) || (includeHardClips && op == CigarOperator.HARD_CLIP)) { result += elem.getLength(); } } return result; }
Example #25
Source File: HaplotypeUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testSimpleSNPAllele() { final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA"; final ArrayList<CigarElement> h1CigarList = new ArrayList<>(); h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M)); final Cigar h1Cigar = new Cigar(h1CigarList); String h1bases = "AGTGGTCAACTGGTCAACTGGTCAACTGGTCA"; basicInsertTest("C", "G", 1, h1Cigar, bases, h1bases); h1bases = "ACTGGTCTACTGGTCAACTGGTCAACTGGTCA"; basicInsertTest("A", "T", 7, h1Cigar, bases, h1bases); h1bases = "ACTGGTCAACTGGTCAAATGGTCAACTGGTCA"; basicInsertTest("C", "A", 17, h1Cigar, bases, h1bases); }
Example #26
Source File: ContigAlignmentsModifierUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@DataProvider(name = "forCigarExtraction") private Object[][] createTestDataForCigarExtraction() { final List<Object[]> data = new ArrayList<>(20); SimpleInterval refSpan = new SimpleInterval("chr1", 82666357, 82666765); AlignmentInterval alignment = new AlignmentInterval(refSpan, 69, 472, TextCigarCodec.decode("68S122M5D282M"), true, 60, 11, 353, ContigAlignmentsModifier.AlnModType.NONE); List<CigarElement> left = Arrays.asList(new CigarElement(68, CigarOperator.S)); List<CigarElement> middle = Arrays.asList(new CigarElement(122, CigarOperator.M), new CigarElement(5, CigarOperator.D), new CigarElement(282, CigarOperator.M)); List<CigarElement> right = Collections.emptyList(); data.add(new Object[]{alignment, left, middle, right}); refSpan = new SimpleInterval("chr3", 61792401, 61792448); alignment = new AlignmentInterval(refSpan, 43, 90, TextCigarCodec.decode("42H48M382H"), true, 46, 1, 43, ContigAlignmentsModifier.AlnModType.NONE); left = Arrays.asList(new CigarElement(42, CigarOperator.H)); middle = Arrays.asList(new CigarElement(48, CigarOperator.M)); right = Arrays.asList(new CigarElement(382, CigarOperator.H)); data.add(new Object[]{alignment, left, middle, right}); refSpan = new SimpleInterval("chrY", 26303624, 26303671); alignment = new AlignmentInterval(refSpan, 1, 48, TextCigarCodec.decode("48M424H"), true, 0, 2, 38, ContigAlignmentsModifier.AlnModType.NONE); left = Collections.emptyList(); middle = Arrays.asList(new CigarElement(48, CigarOperator.M)); right = Arrays.asList(new CigarElement(424, CigarOperator.H)); data.add(new Object[]{alignment, left, middle, right}); return data.toArray(new Object[data.size()][]); }
Example #27
Source File: ReadClipper.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Will hard clip every soft clipped bases in the read. * * @return a new read without the soft clipped bases (Could be an empty, unmapped read if it was all soft and hard clips). */ private GATKRead hardClipSoftClippedBases () { if (read.isEmpty()) { return read; } int readIndex = 0; int cutLeft = -1; // first position to hard clip (inclusive) int cutRight = -1; // first position to hard clip (inclusive) boolean rightTail = false; // trigger to stop clipping the left tail and start cutting the right tail for (final CigarElement cigarElement : read.getCigarElements()) { if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) { if (rightTail) { cutRight = readIndex; } else { cutLeft = readIndex + cigarElement.getLength() - 1; } } else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) { rightTail = true; } if (cigarElement.getOperator().consumesReadBases()) { readIndex += cigarElement.getLength(); } } // It is extremely important that we cut the end first otherwise the read coordinates change. if (cutRight >= 0) { this.addOp(new ClippingOp(cutRight, read.getLength() - 1)); } if (cutLeft >= 0) { this.addOp(new ClippingOp(0, cutLeft)); } return clipRead(ClippingRepresentation.HARDCLIP_BASES); }
Example #28
Source File: PairedEndAndSplitReadEvidenceCollection.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
private SplitPos getSplitPosition(GATKRead read) { if (read.getCigar().getFirstCigarElement().getOperator() == CigarOperator.M) { final int matchLength = read.getCigar().getCigarElements().stream().filter(e -> e.getOperator().consumesReferenceBases()).mapToInt(CigarElement::getLength).sum(); return new SplitPos(read.getStart() + matchLength, POSITION.RIGHT); } else if (read.getCigar().getLastCigarElement().getOperator() == CigarOperator.M) { return new SplitPos(read.getStart(), POSITION.LEFT); } return new SplitPos(-1, POSITION.MIDDLE); }
Example #29
Source File: CigarModifierTest.java From VarDictJava with MIT License | 5 votes |
@Test(dataProvider = "cigar") public void getCigarOperatorTest(Object cigarObject) { Cigar cigar = (Cigar) cigarObject; CigarOperator[] operators = new CigarOperator[] { CigarOperator.M, CigarOperator.S, CigarOperator.I, CigarOperator.D, CigarOperator.N, CigarOperator.H, }; for (int i = 0; i < cigar.numCigarElements(); i++) { Assert.assertEquals(CigarParser.getCigarOperator(cigar, i), operators[i]); } }
Example #30
Source File: ReadThreadingAssemblerUnitTest.java From gatk-protected with BSD 3-Clause "New" or "Revised" License | 5 votes |
private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) { final Haplotype refHaplotype = new Haplotype(refBases, true); final Cigar c = new Cigar(); c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M)); refHaplotype.setCigar(c); final AssemblyRegion activeRegion = new AssemblyRegion(loc, null, true, 0, header); activeRegion.addAll(reads); // logger.warn("Assembling " + activeRegion + " with " + engine); final AssemblyResultSet assemblyResultSet = assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.<VariantContext>emptyList(), null, header); return assemblyResultSet.getHaplotypeList(); }