htsjdk.samtools.CigarOperator Java Examples

The following examples show how to use htsjdk.samtools.CigarOperator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ClippingOp.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private GATKRead applySoftClipBases(final GATKRead readCopied) {
    Utils.validateArg(!readCopied.isUnmapped(), "Read Clipper cannot soft clip unmapped reads");

    if (readCopied.getLength() <= 2) {  // see GATK issue #2022 -- we can't soft-clip all bases
        return readCopied;
    }

    // see GATK issue #2022 -- we can't soft-clip all bases
    final int myStop = Math.min(stop, start + readCopied.getLength() - 2);
    Utils.validate(start <= 0 || myStop == readCopied.getLength() - 1, () -> String.format("Cannot apply soft clipping operator to the middle of a read: %s to be clipped at %d-%d", readCopied.getName(), start, myStop));

    final Cigar oldCigar = readCopied.getCigar();
    final Cigar newCigar = CigarUtils.clipCigar(oldCigar, start, myStop + 1, CigarOperator.SOFT_CLIP);
    readCopied.setCigar(newCigar);

    final int alignmentStartShift = start == 0 ? CigarUtils.alignmentStartShift(oldCigar, stop + 1) : 0;
    final int newStart = readCopied.getStart() + alignmentStartShift;
    readCopied.setPosition(readCopied.getContig(), newStart);
    return readCopied;
}
 
Example #2
Source File: SamRecordComparision.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static boolean testBaseCategory(BaseCategory c, ScoreDiff diff) {
	switch (c.type) {
	case FLANKING_DELETION:
		return diff.nextBaseCoP == CigarOperator.enumToCharacter(CigarOperator.DELETION)
				|| diff.prevBaseCoP == CigarOperator.enumToCharacter(CigarOperator.DELETION);
	case INSERTION:
		return diff.cigarOp == CigarOperator.enumToCharacter(CigarOperator.INSERTION);
	case LOWER_COVERAGE:
		return diff.coverage < c.param;
	case MATCH:
		return diff.base == diff.refBase;
	case MISMATCH:
		return diff.base != diff.refBase;
	case PILEUP:
		return diff.pileup <= c.param;

	default:
		throw new RuntimeException("Unknown read category: " + c.type.name());
	}
}
 
Example #3
Source File: SAMRecordUtils.java    From abra2 with MIT License 6 votes vote down vote up
public static String getMappedReadPortion(SAMRecord read) {
	int start = 0; 
	
	List<CigarElement> elems = read.getCigar().getCigarElements();
	int i = 0;
	while (i < elems.size() && isClip(elems.get(i))) {
		if (elems.get(i).getOperator() == CigarOperator.S) {
			start += elems.get(i).getLength();
		}
		
		i += 1;
	}
	
	int stop = read.getReadLength();
	i = elems.size()-1;
	while (i >= 0 && isClip(elems.get(i))) {
		if (elems.get(i).getOperator() == CigarOperator.S) {
			stop -= elems.get(i).getLength();
		}
		
		i -= 1;
	}
	
	return read.getReadString().substring(start, stop);
}
 
Example #4
Source File: ReadClassifier.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void checkBigIndel( final List<CigarElement> cigarElements,
                           final GATKRead read,
                           final List<BreakpointEvidence> evidenceList ) {
    int locus = read.getStart();
    for ( final CigarElement ele : cigarElements ) {
        final CigarOperator op = ele.getOperator();
        if ( ele.getLength() >= MIN_INDEL_LEN ) {
            if ( op == CigarOperator.INSERTION ) {
                evidenceList.add(new BreakpointEvidence.LargeIndel(read, readMetadata, locus));
            } else if ( op == CigarOperator.DELETION ) {
                evidenceList.add(new BreakpointEvidence.LargeIndel(read, readMetadata, locus+ele.getLength()/2));
            }
        }
        if ( op.consumesReferenceBases() ) {
            locus += ele.getLength();
        }
    }
}
 
Example #5
Source File: ReadClipperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testRevertSoftClippedBasesWithThreshold() {
    for (final Cigar cigar : cigarList) {
        final int leadingSoftClips = leadingCigarElementLength(cigar, CigarOperator.SOFT_CLIP);
        final int tailSoftClips = leadingCigarElementLength(CigarUtils.invertCigar(cigar), CigarOperator.SOFT_CLIP);

        final GATKRead read = ReadClipperTestUtils.makeReadFromCigar(cigar);
        final GATKRead unclipped = ReadClipper.revertSoftClippedBases(read);

        assertUnclippedLimits(read, unclipped);                                                                     // Make sure limits haven't changed
        Assert.assertNull(read.getCigar().isValid(null, -1));
        Assert.assertNull(unclipped.getCigar().isValid(null, -1));

        if (!(leadingSoftClips > 0 || tailSoftClips > 0))
            Assert.assertEquals(read.getCigar().toString(), unclipped.getCigar().toString());

    }
}
 
Example #6
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftCigarLeft_multipleIndels() {
	Cigar cigar = new Cigar();
	
	cigar.add(new CigarElement(20, CigarOperator.M));
	cigar.add(new CigarElement(1, CigarOperator.I));
	cigar.add(new CigarElement(5, CigarOperator.M));
	cigar.add(new CigarElement(3, CigarOperator.D));
	cigar.add(new CigarElement(24, CigarOperator.M));
	
	Cigar newCigar;
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 20);
	Assert.assertEquals(newCigar.toString(), "1I5M3D44M");
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 10);
	Assert.assertEquals(newCigar.toString(), "10M1I5M3D34M");

	newCigar = indelShifter.shiftCigarLeft(cigar, 1);
	Assert.assertEquals(newCigar.toString(), "19M1I5M3D25M");
}
 
Example #7
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftCigarLeft_complex() {
	//3S69M1I18M1D9M
	Cigar cigar = new Cigar();
	
	cigar.add(new CigarElement(3, CigarOperator.S));
	cigar.add(new CigarElement(69, CigarOperator.M));
	cigar.add(new CigarElement(1, CigarOperator.I));
	cigar.add(new CigarElement(18, CigarOperator.M));
	cigar.add(new CigarElement(1, CigarOperator.D));
	cigar.add(new CigarElement(9, CigarOperator.M));
	
	Cigar newCigar;
	
	newCigar = indelShifter.shiftCigarLeft(cigar, 1);
	assertEquals(newCigar.toString(), "3S68M1I18M1D10M");
}
 
Example #8
Source File: ReferenceConfidenceModel.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a reference haplotype for an active region
 *
 * @param activeRegion the active region
 * @param refBases the ref bases
 * @param paddedReferenceLoc the location spanning of the refBases -- can be longer than activeRegion.getLocation()
 * @return a reference haplotype
 */
public static Haplotype createReferenceHaplotype(final AssemblyRegion activeRegion, final byte[] refBases, final SimpleInterval paddedReferenceLoc) {
    Utils.nonNull(activeRegion, "null region");
    Utils.nonNull(refBases, "null refBases");
    Utils.nonNull(paddedReferenceLoc, "null paddedReferenceLoc");

    final int alignmentStart = activeRegion.getExtendedSpan().getStart() - paddedReferenceLoc.getStart();
    if ( alignmentStart < 0 ) {
        throw new IllegalStateException("Bad alignment start in createReferenceHaplotype " + alignmentStart);
    }
    final Haplotype refHaplotype = new Haplotype(refBases, true);
    refHaplotype.setAlignmentStartHapwrtRef(alignmentStart);
    final Cigar c = new Cigar();
    c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
    refHaplotype.setCigar(c);
    return refHaplotype;
}
 
Example #9
Source File: CigarBuilder.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void advanceSectionAndValidateCigarOrder(CigarOperator operator) {
    if (operator == CigarOperator.HARD_CLIP) {
        if (section == Section.LEFT_SOFT_CLIP || section == Section.MIDDLE || section == Section.RIGHT_SOFT_CLIP) {
            section = Section.RIGHT_HARD_CLIP;
        }
    } else if (operator == CigarOperator.SOFT_CLIP) {
        Utils.validate(section != Section.RIGHT_HARD_CLIP, "cigar has already reached its right hard clip");
        if (section == Section.LEFT_HARD_CLIP) {
            section = Section.LEFT_SOFT_CLIP;
        } else if(section == Section.MIDDLE) {
            section = Section.RIGHT_SOFT_CLIP;
        }
    } else {
        Utils.validate(section != Section.RIGHT_SOFT_CLIP && section != Section.RIGHT_HARD_CLIP, "cigar has already reached right clip");
        if (section == Section.LEFT_HARD_CLIP || section == Section.LEFT_SOFT_CLIP) {
            section = Section.MIDDLE;
        }
    }
}
 
Example #10
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testClipCountsByHand() {
    // int[] is leftHard, leftSoft, rightHard, rightSoft
    final List<Pair<Cigar, int[]>> tests = Arrays.asList(
            Pair.of("13H3M35D13M2I45S30H", new int[] {13, 0, 30, 45}),
            Pair.of("1H3S67M13S", new int[] {1, 3, 0, 13}),
            Pair.of("13M30S10S1H", new int[] {0, 0, 1, 40}),
            Pair.of("113S4M", new int[] {0, 113, 0, 0}),
            Pair.of("5H3H10M2S1S", new int[] {8, 0, 0, 3}),
            Pair.of("10M", new int[] {0, 0, 0, 0}),
            Pair.of("1H2H3S4S10M5S6S7H8H", new int[] {3, 7, 15, 11})
    )
            .stream().map(pair -> Pair.of(TextCigarCodec.decode(pair.getLeft()), pair.getRight())).collect(Collectors.toList());

    for (final Pair<Cigar, int[]> test : tests) {
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[0]);
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.LEFT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[1]);
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.HARD_CLIP), test.getRight()[2]);
        Assert.assertEquals(CigarUtils.countClippedBases(test.getLeft(), ClippingTail.RIGHT_TAIL, CigarOperator.SOFT_CLIP), test.getRight()[3]);
    }
}
 
Example #11
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Count the number of soft- or hard- clipped bases from either the left or right end of a cigar
 */
public static int countClippedBases(final Cigar cigar, final ClippingTail tail, final CigarOperator typeOfClip) {
    Utils.validateArg(typeOfClip.isClipping(), "typeOfClip must be a clipping operator");

    final int size = cigar.numCigarElements();
    if (size < 2) {
        Utils.validateArg(size == 1 && !cigar.getFirstCigarElement().getOperator().isClipping(), "cigar is empty or completely clipped.");
        return 0;
    }

    int result = 0;

    for (int n = 0; n < size; n++) {
        final int index = (tail == ClippingTail.LEFT_TAIL ? n : size - n - 1);
        final CigarElement element = cigar.getCigarElement(index);
        if (!element.getOperator().isClipping()) {
            return result;
        } else if (element.getOperator() == typeOfClip) {
            result += element.getLength();
        }
    }

    throw new IllegalArgumentException("Input cigar " + cigar + " is completely clipped.");
}
 
Example #12
Source File: BamFragmentAllocator.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private void processIntronicReads(final List<GeneReadData> genes, final ReadRecord read1, final ReadRecord read2)
{
    if(read1.Cigar.containsOperator(CigarOperator.N) || read2.Cigar.containsOperator(CigarOperator.N))
    {
        mCurrentGenes.addCount(ALT, 1);

        if(mAltSpliceJunctionFinder != null)
            mAltSpliceJunctionFinder.evaluateFragmentReads(genes, read1, read2, Lists.newArrayList());

        return;
    }

    List<String> unsplicedGeneIds = genes.stream().map(x -> x.GeneData.GeneId).collect(Collectors.toList());

    CategoryCountsData catCounts = getCategoryCountsData(Lists.newArrayList(), unsplicedGeneIds);

    List<int[]> readRegions = deriveCommonRegions(read1.getMappedRegionCoords(), read2.getMappedRegionCoords());
    addGcCounts(catCounts, readRegions);

    mCurrentGenes.addCount(UNSPLICED, 1);
}
 
Example #13
Source File: AbstractReadThreadingGraph.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Determine whether the provided cigar is okay to merge into the reference path
 *
 * @param cigar                the cigar to analyze
 * @param requireFirstElementM if true, require that the first cigar element be an M operator in order for it to be okay
 * @param requireLastElementM  if true, require that the last cigar element be an M operator in order for it to be okay
 * @return true if it's okay to merge, false otherwise
 */
@VisibleForTesting
static boolean cigarIsOkayToMerge(final Cigar cigar, final boolean requireFirstElementM, final boolean requireLastElementM) {
    final List<CigarElement> elements = cigar.getCigarElements();
    final int numElements = elements.size();

    // don't allow more than a couple of different ops
    if (numElements == 0 || numElements > MAX_CIGAR_COMPLEXITY) {
        return false;
    }

    // the first element must be an M
    if (requireFirstElementM && elements.get(0).getOperator() != CigarOperator.M) {
        return false;
    }

    // the last element must be an M
    if (requireLastElementM && elements.get(numElements - 1).getOperator() != CigarOperator.M) {
        return false;
    }

    // note that there are checks for too many mismatches in the dangling branch later in the process

    return true;
}
 
Example #14
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Converts a CIGAR that may contain new style operators for match and
 * mismatch into the legacy style CIGAR.
 * @param cigar the CIGAR to convert
 * @return the legacy CIGAR
 */
public static Cigar convertToLegacyCigar(Cigar cigar) {
  final Cigar cg = new Cigar();
  int count = 0;
  for (int i = 0; i < cigar.numCigarElements(); ++i) {
    final CigarElement ce = cigar.getCigarElement(i);
    if (ce.getOperator() == CigarOperator.EQ || ce.getOperator() == CigarOperator.X) {
      count += ce.getLength();
    } else {
      if (count > 0) {
        cg.add(new CigarElement(count, CigarOperator.M));
      }
      cg.add(ce);
      count = 0;
    }
  }
  if (count > 0) {
    cg.add(new CigarElement(count, CigarOperator.M));
  }
  return cg;
}
 
Example #15
Source File: SAMRecordWrapper.java    From abra2 with MIT License 6 votes vote down vote up
public int getAdjustedAlignmentStart() {
	
	int start = 0;
	
	if (adjustedAlignmentStart > -1) {
		start = adjustedAlignmentStart;
	} else {
	
		start = samRecord.getAlignmentStart();
		
		if (samRecord.getCigar().numCigarElements() > 0) {
			CigarElement elem = samRecord.getCigar().getCigarElement(0);
			if (elem.getOperator() == CigarOperator.S) {
				start -= elem.getLength();
				if (start < 1) {
					start = 1;
				}
			}
		}
	}
	
	return start;
}
 
Example #16
Source File: PileupElementUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider(name = "PrevAndNextTest_simple")
public Object[][] makePrevAndNextTest_simple() {
    final List<Object[]> tests = new LinkedList<>();

    for (final CigarOperator firstOp : Arrays.asList(M)) {
        for (final CigarOperator lastOp : Arrays.asList(M)) {
            for (final CigarOperator middleOp : Arrays.asList(CigarOperator.I, CigarOperator.P, CigarOperator.S)) {
                final int readLength = 3;
                final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "read", 0, 1, readLength);
                read.setBases(Utils.dupBytes((byte) 'A', readLength));
                read.setBaseQualities(Utils.dupBytes((byte) 30, readLength));

                String cigar = "1" + firstOp;
                cigar += "1" + middleOp;
                cigar += "1" + lastOp;
                read.setCigar(cigar);

                tests.add(new Object[]{read, middleOp});
            }
        }
    }

    return tests.toArray(new Object[][]{});
}
 
Example #17
Source File: XGBoostEvidenceFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
CigarQualityInfo(final BreakpointEvidence evidence) {
    if(evidence instanceof ReadEvidence) {
        int numMatched = 0;
        int refLength = 0;
        final String cigarString = ((ReadEvidence) evidence).getCigarString();
        for (final CigarElement element : TextCigarCodec.decode(cigarString).getCigarElements()) {
            final CigarOperator op = element.getOperator();
            if (op.consumesReferenceBases()) {
                refLength += element.getLength();
                if (op.consumesReadBases()) {
                    numMatched += element.getLength();
                }
            }
        }
        basesMatched = numMatched;
        referenceLength = refLength;
    }
    else {
        basesMatched = NON_READ_CIGAR_LENGTHS;
        referenceLength = NON_READ_CIGAR_LENGTHS;
    }
}
 
Example #18
Source File: HaplotypeUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testComplexSNPAllele() {
    final String bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";

    final ArrayList<CigarElement> h1CigarList = new ArrayList<>();
    h1CigarList.add(new CigarElement(4, CigarOperator.M));
    h1CigarList.add(new CigarElement(10, CigarOperator.I));
    h1CigarList.add(new CigarElement(8, CigarOperator.M));
    h1CigarList.add(new CigarElement(3, CigarOperator.D));
    h1CigarList.add(new CigarElement(7 + 4, CigarOperator.M));
    final Cigar h1Cigar = new Cigar(h1CigarList);
    String h1bases = "AGCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGGGGGA" + "AGGC";
    basicInsertTest("T", "G", 1, h1Cigar, bases, h1bases);
    h1bases = "ATCG" + "CCGGCCGGCC" + "ATCTATCG" + "AGGGGGA" + "AGGC";
    basicInsertTest("G", "T", 7, h1Cigar, bases, h1bases);
    h1bases = "ATCG" + "CCGGCCGGCC" + "ATCGATCG" + "AGCGGGA" + "AGGC";
    basicInsertTest("G", "C", 17, h1Cigar, bases, h1bases);
}
 
Example #19
Source File: FusionUtils.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public static int[] findSplitReadJunction(final ReadRecord read)
{
    if(!read.containsSplit())
        return null;

    final int maxSplitLength = read.Cigar.getCigarElements().stream()
            .filter(x -> x.getOperator() == CigarOperator.N)
            .mapToInt(x -> x.getLength()).max().orElse(0);

    final List<int[]> mappedCoords = read.getMappedRegionCoords();
    for(int i = 0; i < mappedCoords.size() - 1; ++i)
    {
        final int[] lowerCoords = mappedCoords.get(i);
        final int[] upperCoords = mappedCoords.get(i + 1);

        if(upperCoords[SE_START] - lowerCoords[SE_END] - 1 == maxSplitLength)
        {
            return new int[] { lowerCoords[SE_END], upperCoords[SE_START] };
        }
    }

    return null;
}
 
Example #20
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * How many bases to the right does a read's alignment start shift given its cigar and the number of left soft clips
 */
public static int alignmentStartShift(final Cigar cigar, final int numClipped) {
    int refBasesClipped = 0;

    int elementStart = 0;   // this and elementEnd are indices in the read's bases
    for (final CigarElement element : cigar.getCigarElements()) {
        final CigarOperator operator = element.getOperator();
        // hard clips consume neither read bases nor reference bases and are irrelevant
        if (operator == CigarOperator.HARD_CLIP) {
            continue;
        }
        final int elementEnd = elementStart + (operator.consumesReadBases() ? element.getLength() : 0);

        if (elementEnd <= numClipped) {  // totally within clipped span -- this includes deletions immediately following clipping
            refBasesClipped += operator.consumesReferenceBases() ? element.getLength() : 0;
        } else if (elementStart < numClipped) { // clip in middle of element, which means the element necessarily consumes read bases
            final int clippedLength = numClipped - elementStart;
            refBasesClipped += operator.consumesReferenceBases() ? clippedLength : 0;
            break;
        }
        elementStart = elementEnd;
    }
    return refBasesClipped;
}
 
Example #21
Source File: LocusIteratorByStateBaseTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider(name = "LIBSTest")
public Object[][] createLIBSTests(final List<Integer> cigarLengths, final List<Integer> combinations) {
    final List<Object[]> tests = new LinkedList<>();

    final List<CigarOperator> allOps = Arrays.asList(CigarOperator.values());

    final List<CigarElement> singleCigars = new LinkedList<>();
    for ( final int len : cigarLengths )
        singleCigars.addAll(allOps.stream().map(op -> new CigarElement(len, op)).collect(Collectors.toList()));

    for ( final int complexity : combinations ) {
        for ( final List<CigarElement> elements : Utils.makePermutations(singleCigars, complexity, true) ) {
            final LIBSTest test = makePermutationTest(elements);
            if ( test != null ) tests.add(new Object[]{test});
        }
    }

    return tests.toArray(new Object[][]{});
}
 
Example #22
Source File: ReadRecord.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
public static int calcFragmentLength(final ReadRecord read1, final ReadRecord read2)
{
    int insertSize = abs(read1.fragmentInsertSize());

    if(!read1.containsSplit() && !read2.containsSplit())
        return insertSize;

    // find unique split lengths and remove them

    List<Integer> splitLengths = read1.Cigar.getCigarElements().stream()
            .filter(x -> x.getOperator() == CigarOperator.N).map(x -> x.getLength()).collect(Collectors.toList());

    for(final CigarElement element : read2.Cigar.getCigarElements())
    {
        if(element.getOperator() == CigarOperator.N && !splitLengths.contains(element.getLength()))
            splitLengths.add(element.getLength());
    }

    int totalSplitLength = splitLengths.stream().mapToInt(x -> x).sum();

    return insertSize - totalSplitLength;
}
 
Example #23
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
/**
 * Remove leading or trailing soft clips from the input read.
 * Does not modify a read entirely comprised of soft clips.
 */
public static void removeSoftClips(SAMRecord read) {
	
	Cigar cigar = read.getCigar();
	
	CigarElement firstElement = cigar.getCigarElement(0);
	CigarElement lastElement  = cigar.getCigarElement(cigar.numCigarElements()-1);
	
	if ((firstElement.getOperator() == CigarOperator.S) ||
		(lastElement.getOperator() == CigarOperator.S)) {
	
		Cigar newCigar = new Cigar();
		
		String bases = read.getReadString();
		//String qualities = read.getBaseQualityString();
				
		if (firstElement.getOperator() == CigarOperator.S) {
			bases = bases.substring(firstElement.getLength(), bases.length());
			//qualities = qualities.substring(firstElement.getLength(), qualities.length()-1);
		} else {
			newCigar.add(firstElement);
		}
		
		for (int i=1; i<cigar.numCigarElements()-1; i++) {
			newCigar.add(cigar.getCigarElement(i));
		}
		
		if (lastElement.getOperator() == CigarOperator.S) {
			bases = bases.substring(0, bases.length() - lastElement.getLength());
			//qualities = qualities.substring(0, qualities.length() - lastElement.getLength() - 1);
		} else {
			newCigar.add(lastElement);
		}
		
		read.setCigar(newCigar);
		read.setReadString(bases);
		//read.setBaseQualityString(qualities);
	}
}
 
Example #24
Source File: CigarUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static int countRefBasesAndMaybeAlsoClips(final List<CigarElement> elems, final int cigarStartIndex, final int cigarEndIndex, final boolean includeSoftClips, final boolean includeHardClips) {
    Utils.nonNull(elems);
    Utils.validateArg(cigarStartIndex >= 0 && cigarEndIndex <= elems.size() && cigarStartIndex <= cigarEndIndex, () -> "invalid index:" + 0 + " -" + elems.size());

    int result = 0;
    for (final CigarElement elem : elems.subList(cigarStartIndex, cigarEndIndex)) {
        final CigarOperator op = elem.getOperator();
        if (op.consumesReferenceBases() || (includeSoftClips && op == CigarOperator.SOFT_CLIP) || (includeHardClips && op == CigarOperator.HARD_CLIP)) {
            result += elem.getLength();
        }
    }

    return result;
}
 
Example #25
Source File: HaplotypeUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testSimpleSNPAllele() {
    final String bases = "ACTGGTCAACTGGTCAACTGGTCAACTGGTCA";

    final ArrayList<CigarElement> h1CigarList = new ArrayList<>();
    h1CigarList.add(new CigarElement(bases.length(), CigarOperator.M));
    final Cigar h1Cigar = new Cigar(h1CigarList);
    String h1bases = "AGTGGTCAACTGGTCAACTGGTCAACTGGTCA";
    basicInsertTest("C", "G", 1, h1Cigar, bases, h1bases);
    h1bases = "ACTGGTCTACTGGTCAACTGGTCAACTGGTCA";
    basicInsertTest("A", "T", 7, h1Cigar, bases, h1bases);
    h1bases = "ACTGGTCAACTGGTCAAATGGTCAACTGGTCA";
    basicInsertTest("C", "A", 17, h1Cigar, bases, h1bases);
}
 
Example #26
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider(name = "forCigarExtraction")
private Object[][] createTestDataForCigarExtraction() {

    final List<Object[]> data = new ArrayList<>(20);

    SimpleInterval refSpan = new SimpleInterval("chr1", 82666357, 82666765);
    AlignmentInterval alignment = new AlignmentInterval(refSpan, 69, 472,
            TextCigarCodec.decode("68S122M5D282M"), true, 60, 11,
            353, ContigAlignmentsModifier.AlnModType.NONE);
    List<CigarElement> left = Arrays.asList(new CigarElement(68, CigarOperator.S));
    List<CigarElement> middle = Arrays.asList(new CigarElement(122, CigarOperator.M), new CigarElement(5, CigarOperator.D), new CigarElement(282, CigarOperator.M));
    List<CigarElement> right = Collections.emptyList();
    data.add(new Object[]{alignment, left, middle, right});

    refSpan = new SimpleInterval("chr3", 61792401, 61792448);
    alignment = new AlignmentInterval(refSpan, 43, 90,
            TextCigarCodec.decode("42H48M382H"), true, 46, 1,
            43, ContigAlignmentsModifier.AlnModType.NONE);
    left = Arrays.asList(new CigarElement(42, CigarOperator.H));
    middle = Arrays.asList(new CigarElement(48, CigarOperator.M));
    right = Arrays.asList(new CigarElement(382, CigarOperator.H));
    data.add(new Object[]{alignment, left, middle, right});

    refSpan = new SimpleInterval("chrY", 26303624, 26303671);
    alignment = new AlignmentInterval(refSpan, 1, 48,
            TextCigarCodec.decode("48M424H"), true, 0, 2,
            38, ContigAlignmentsModifier.AlnModType.NONE);
    left = Collections.emptyList();
    middle = Arrays.asList(new CigarElement(48, CigarOperator.M));
    right = Arrays.asList(new CigarElement(424, CigarOperator.H));
    data.add(new Object[]{alignment, left, middle, right});

    return data.toArray(new Object[data.size()][]);
}
 
Example #27
Source File: ReadClipper.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Will hard clip every soft clipped bases in the read.
 *
 * @return a new read without the soft clipped bases (Could be an empty, unmapped read if it was all soft and hard clips).
 */
private GATKRead hardClipSoftClippedBases () {
    if (read.isEmpty()) {
        return read;
    }

    int readIndex = 0;
    int cutLeft = -1;            // first position to hard clip (inclusive)
    int cutRight = -1;           // first position to hard clip (inclusive)
    boolean rightTail = false;   // trigger to stop clipping the left tail and start cutting the right tail

    for (final CigarElement cigarElement : read.getCigarElements()) {
        if (cigarElement.getOperator() == CigarOperator.SOFT_CLIP) {
            if (rightTail) {
                cutRight = readIndex;
            }
            else {
                cutLeft = readIndex + cigarElement.getLength() - 1;
            }
        }
        else if (cigarElement.getOperator() != CigarOperator.HARD_CLIP) {
            rightTail = true;
        }

        if (cigarElement.getOperator().consumesReadBases()) {
            readIndex += cigarElement.getLength();
        }
    }

    // It is extremely important that we cut the end first otherwise the read coordinates change.
    if (cutRight >= 0) {
        this.addOp(new ClippingOp(cutRight, read.getLength() - 1));
    }
    if (cutLeft >= 0) {
        this.addOp(new ClippingOp(0, cutLeft));
    }

    return clipRead(ClippingRepresentation.HARDCLIP_BASES);
}
 
Example #28
Source File: PairedEndAndSplitReadEvidenceCollection.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private SplitPos getSplitPosition(GATKRead read) {
    if (read.getCigar().getFirstCigarElement().getOperator() == CigarOperator.M) {
        final int matchLength = read.getCigar().getCigarElements().stream().filter(e -> e.getOperator().consumesReferenceBases()).mapToInt(CigarElement::getLength).sum();
        return new SplitPos(read.getStart() + matchLength, POSITION.RIGHT);
    } else if (read.getCigar().getLastCigarElement().getOperator() == CigarOperator.M) {
        return new SplitPos(read.getStart(), POSITION.LEFT);
    }

    return new SplitPos(-1, POSITION.MIDDLE);
}
 
Example #29
Source File: CigarModifierTest.java    From VarDictJava with MIT License 5 votes vote down vote up
@Test(dataProvider = "cigar")
public void getCigarOperatorTest(Object cigarObject) {
    Cigar cigar = (Cigar) cigarObject;
    CigarOperator[] operators = new CigarOperator[] {
            CigarOperator.M,
            CigarOperator.S,
            CigarOperator.I,
            CigarOperator.D,
            CigarOperator.N,
            CigarOperator.H,
    };
    for (int i = 0; i < cigar.numCigarElements(); i++) {
        Assert.assertEquals(CigarParser.getCigarOperator(cigar, i), operators[i]);
    }
}
 
Example #30
Source File: ReadThreadingAssemblerUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private List<Haplotype> assemble(final ReadThreadingAssembler assembler, final byte[] refBases, final SimpleInterval loc, final List<GATKRead> reads) {
        final Haplotype refHaplotype = new Haplotype(refBases, true);
        final Cigar c = new Cigar();
        c.add(new CigarElement(refHaplotype.getBases().length, CigarOperator.M));
        refHaplotype.setCigar(c);

        final AssemblyRegion activeRegion = new AssemblyRegion(loc, null, true, 0, header);
        activeRegion.addAll(reads);
//        logger.warn("Assembling " + activeRegion + " with " + engine);
        final AssemblyResultSet assemblyResultSet =  assembler.runLocalAssembly(activeRegion, refHaplotype, refBases, loc, Collections.<VariantContext>emptyList(), null, header);
        return assemblyResultSet.getHaplotypeList();
    }