Java Code Examples for htsjdk.samtools.TextCigarCodec#decode()

The following examples show how to use htsjdk.samtools.TextCigarCodec#decode() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftIndelsLeft() throws Exception {
	
	CompareToReference2 c2r = new CompareToReference2();
	c2r.init("test-data/test.fa");
	/*
	TCGAATCGATATATTTCCGGAACAGACTCAG
	------CGATAT--TTCCGGAA--------- <-- orig
	------CG--ATATTTCCGGAA--------- <-- new
	1234567890123456789012
	*/
	
	int refStart = 7;
	int refEnd = 22;
	Cigar cigar = TextCigarCodec.decode("6M2D8M");
	String seq = "CGATATTTCCGGAA";
	
	// 1 based input
	Cigar newCigar = indelShifter.shiftIndelsLeft(refStart, refEnd, "seq1", cigar, seq, c2r);
	assertEquals(TextCigarCodec.encode(newCigar), "2M2D12M");
}
 
Example 2
Source File: IndelShifterTest.java    From abra2 with MIT License 6 votes vote down vote up
@Test (groups = "unit" )
public void testShiftIndelsLeft_LocalRef() throws Exception {
	
	CompareToReference2 c2r = new CompareToReference2();
	c2r.initLocal("seq1", "TCGAATCGATATATTTCCGGAACAGACTCAG");
	//c2r.init("test-data/test.fa");
	/*
	TCGAATCGATATATTTCCGGAACAGACTCAG
	------CGATAT--TTCCGGAA--------- <-- orig
	------CG--ATATTTCCGGAA--------- <-- new
	1234567890123456789012
	*/
	
	int refStart = 7;
	int refEnd = 22;
	Cigar cigar = TextCigarCodec.decode("6M2D8M");
	String seq = "CGATATTTCCGGAA";
	
	// 1 based input
	Cigar newCigar = indelShifter.shiftIndelsLeft(refStart, refEnd, "seq1", cigar, seq, c2r);
	assertEquals(TextCigarCodec.encode(newCigar), "2M2D12M");
}
 
Example 3
Source File: ContigAlignmentsModifierUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(groups = "sv")
public void testGappedAlignmentBreaker_GapSizeSensitivity() {

    final Cigar cigar = TextCigarCodec.decode("10M10D10M60I10M10I10M50D10M");
    final AlignmentInterval alignmentInterval = new AlignmentInterval(new SimpleInterval("1", 100, 209),
            1, 120, cigar, true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);

    final List<AlignmentInterval> generatedARList = Utils.stream(ContigAlignmentsModifier.splitGappedAlignment(alignmentInterval,
            StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigAlignmentsSparkArgumentCollection.GAPPED_ALIGNMENT_BREAK_DEFAULT_SENSITIVITY,
            cigar.getReadLength())).collect(Collectors.toList());

    Assert.assertEquals(generatedARList.size(), 3);
    Assert.assertEquals(generatedARList.get(0), new AlignmentInterval(new SimpleInterval("1", 100, 129),
            1, 20, TextCigarCodec.decode("10M10D10M100S"),
            true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(1), new AlignmentInterval(new SimpleInterval("1", 130, 149),
            81, 110, TextCigarCodec.decode("80S10M10I10M10S"),
            true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
    Assert.assertEquals(generatedARList.get(2), new AlignmentInterval(new SimpleInterval("1", 200, 209),
            111, 120, TextCigarCodec.decode("110S10M"),
            true, 60, NO_NM, NO_AS, ContigAlignmentsModifier.AlnModType.FROM_SPLIT_GAPPED_ALIGNMENT));
}
 
Example 4
Source File: AssemblyBasedSVDiscoveryTestDataProviderForSimpleSV.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * 40-'A' + 10-'C'+10-'T' + 40-'G' where the segment 10-'C'+10-'T' is deleted (forward strand representation description).
 *
 * Return a list of two entries for positive and reverse strand representations.
 */
private static List<TestDataForSimpleSV>
forSimpleDeletion() {

    final List<TestDataForSimpleSV> result = new ArrayList<>();
    // simple deletion '+' strand representation
    final String leftRefFlank = TestUtilsForAssemblyBasedSVDiscovery.makeDummySequence('A', 40);
    final String rightRefFlank = TestUtilsForAssemblyBasedSVDiscovery.makeDummySequence('G', 40);
    byte[] contigSeq = (leftRefFlank + rightRefFlank).getBytes();
    String contigName = "simple_del_+";

    final SimpleInterval expectedLeftBreakpoint = new SimpleInterval("21:17000040-17000040");
    final SimpleInterval expectedRightBreakpoint = new SimpleInterval("21:17000060-17000060");
    final BreakpointComplications expectedBreakpointComplications = new BreakpointComplications.SimpleInsDelOrReplacementBreakpointComplications("", "");
    final byte[] expectedAltSeq = EMPTY_BYTE_ARRAY;
    final NovelAdjacencyAndAltHaplotype expectedNovelAdjacencyAndAltHaplotype = new NovelAdjacencyAndAltHaplotype(expectedLeftBreakpoint, expectedRightBreakpoint, NO_SWITCH, expectedBreakpointComplications, SIMPLE_DEL, expectedAltSeq);
    AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval("21", 17000001, 17000040), 1 ,40, TextCigarCodec.decode("40M40S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval("21", 17000061, 17000100), 41 ,80, TextCigarCodec.decode("40S40M"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    SimpleChimera expectedSimpleChimera = new SimpleChimera(contigName, region1, region2, NO_SWITCH, true, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);
    DistancesBetweenAlignmentsOnRefAndOnRead expectedDistances = new DistancesBetweenAlignmentsOnRefAndOnRead(20, 0, 17000040, 17000061, 40, 41);
    final List<SvType> expectedSVTypes = Collections.singletonList(makeDeletionType(new SimpleInterval("21:17000040-17000060"), Allele.create("G", true),false));
    final List<VariantContext> expectedVariants = Collections.singletonList(
            addStandardAttributes(makeDeletion(new SimpleInterval("21:17000040-17000060"), Allele.create("G", true), false),
                    40, contigName, SimpleSVType.SupportedType.DEL.name(), 17000060, -20, "", "", "").make());
    result.add(new TestDataForSimpleSV(region1, region2, contigName, contigSeq, false, expectedSimpleChimera, expectedNovelAdjacencyAndAltHaplotype, expectedSVTypes, expectedVariants, expectedDistances, BreakpointsInference.SimpleInsertionDeletionBreakpointsInference.class));

    // simple deletion '-' strand representation
    SequenceUtil.reverseComplement(leftRefFlank);
    SequenceUtil.reverseComplement(rightRefFlank);
    contigSeq = (rightRefFlank + leftRefFlank).getBytes();
    contigName = "simple_del_-";
    region1 = new AlignmentInterval(new SimpleInterval("21", 17000061, 17000100), 1 ,40, TextCigarCodec.decode("40M40S"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    region2 = new AlignmentInterval(new SimpleInterval("21", 17000001, 17000040), 41 ,80, TextCigarCodec.decode("40S40M"), false, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    expectedSimpleChimera = new SimpleChimera(contigName, region1, region2, NO_SWITCH, false, Collections.emptyList(), NO_GOOD_MAPPING_TO_NON_CANONICAL_CHROMOSOME);
    expectedDistances = new DistancesBetweenAlignmentsOnRefAndOnRead(20, 0, 17000040, 17000061, 40, 41);
    result.add(new TestDataForSimpleSV(region1, region2, contigName, contigSeq, true, expectedSimpleChimera, expectedNovelAdjacencyAndAltHaplotype, expectedSVTypes, expectedVariants, expectedDistances, BreakpointsInference.SimpleInsertionDeletionBreakpointsInference.class));

    return result;
}
 
Example 5
Source File: KBestHaplotypeFinderUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(enabled = true)
public void testLeftAlignCigarSequentiallyAdjacentID() {
    final String ref = "GTCTCTCTCTCTCTCTCTATATATATATATATATTT";
    final String hap = "GTCTCTCTCTCTCTCTCTCTCTATATATATATATTT";
    final Cigar originalCigar = TextCigarCodec.decode("18M4I12M4D2M");

    final Cigar result = CigarUtils.leftAlignCigarSequentially(originalCigar, ref.getBytes(), hap.getBytes(), 0, 0);
    logger.warn("Result is " + result);
    Assert.assertEquals(originalCigar.getReferenceLength(), result.getReferenceLength(), "Reference lengths are different");
}
 
Example 6
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "testData_invertCigar")
public void testInvertCigar(final String cigarStrIn, final String expectedCigarStrOut){
    final Cigar cigarIn = TextCigarCodec.decode(cigarStrIn);
    final Cigar cigarOut = CigarUtils.invertCigar(cigarIn);
    final String actualCigarStrOut = TextCigarCodec.encode(cigarOut);
    Assert.assertEquals(actualCigarStrOut, expectedCigarStrOut);
}
 
Example 7
Source File: ContigChimericAlignmentIterativeInterpreterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(groups = "sv")
public void testFilterByRegionTooSmall() {
    final byte[] contigSequence = AssemblyBasedSVDiscoveryTestDataProviderForInversionBreakpoints.LONG_CONTIG1.getBytes();
    final AlignmentInterval region1 = new AlignmentInterval(new SimpleInterval(AssemblyBasedSVDiscoveryTestDataProviderForInversionBreakpoints.chrForLongContig1, 20138007, 20142231), 1, contigSequence.length - 1986, TextCigarCodec.decode("1986S236M2D1572M1I798M5D730M1I347M4I535M"), false, 60, 36, 100, ContigAlignmentsModifier.AlnModType.NONE);
    final AlignmentInterval region2 = new AlignmentInterval(new SimpleInterval(AssemblyBasedSVDiscoveryTestDataProviderForInversionBreakpoints.chrForLongContig1, 20152030, 20154634), 3604, contigSequence.length, TextCigarCodec.decode("3603H24M1I611M1I1970M"), true, 60, 36, 100, ContigAlignmentsModifier.AlnModType.NONE);

    Assert.assertFalse( firstAlignmentIsTooShort(region1, region2, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigAlignmentsSparkArgumentCollection.DEFAULT_MIN_ALIGNMENT_LENGTH) );
    Assert.assertFalse( firstAlignmentIsTooShort(region2, region1, StructuralVariationDiscoveryArgumentCollection.DiscoverVariantsFromContigAlignmentsSparkArgumentCollection.DEFAULT_MIN_ALIGNMENT_LENGTH) );

    Assert.assertFalse( firstAlignmentIsTooShort(region1, region2, 3000) );
    Assert.assertTrue( firstAlignmentIsTooShort(region2, region1, 3000) );
}
 
Example 8
Source File: ReadPosRankSumTestUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testSNPBetweenTwoDeletions(){
    final Cigar cigar = TextCigarCodec.decode("10M10D1M10D10M");
    final GATKRead read = ArtificialReadUtils.createArtificialRead(cigar);
    final int start = 100;
    read.setPosition("CONTIG", start);

    Assert.assertEquals(ReadPosRankSumTest.getReadPosition(read, makeVC(start + 20)).getAsDouble(), 10.0);
    Assert.assertEquals(ReadPosRankSumTest.getReadPosition(read, makeVC(start + 19)).getAsDouble(), 10.0);
}
 
Example 9
Source File: CigarUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "testData_unclipCigar")
public void testUnclipCigar(final String cigarStrIn, final String expectedCigarStrOut){
    final Cigar cigarIn = TextCigarCodec.decode(cigarStrIn);
    final Cigar cigarOut = CigarUtils.removeClipsAndPadding(cigarIn);
    final String actualCigarStrOut = TextCigarCodec.encode(cigarOut);
    Assert.assertEquals(actualCigarStrOut, expectedCigarStrOut);
}
 
Example 10
Source File: EventMapUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "MNPTest")
public void testMNPs(final String refBases, final String haplotypeBases, final String cigar, final List<Integer> maxMnpDistance, final List<List<String>> expectedAlleles) {
    final Haplotype hap = new Haplotype(haplotypeBases.getBytes(), false, 0, TextCigarCodec.decode(cigar));
    final GenomeLoc loc = new UnvalidatingGenomeLoc(CHR, 0, 1, refBases.length());
    for (final int maxDist : maxMnpDistance) {
        final EventMap events = new EventMap(hap, refBases.getBytes(), loc, NAME, maxDist);
        Assert.assertEquals(events.getNumberOfEvents(), expectedAlleles.size());
        final List<VariantContext> foundAlleles = new ArrayList<>(events.getVariantContexts());
        for (int i = 0; i < events.getNumberOfEvents(); i++) {
            final VariantContext actual = foundAlleles.get(i);
            Assert.assertEquals(actual.getReference().getDisplayString(), expectedAlleles.get(i).get(0));
            Assert.assertEquals(actual.getAlternateAllele(0).getDisplayString(), expectedAlleles.get(i).get(1));
        }
    }
}
 
Example 11
Source File: AssemblyContigAlignmentsConfigPickerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@DataProvider
private Object[][] forRemoveNonUniqueMappings() {
    final List<Object[]> data = new ArrayList<>(20);

    // case zero: only one or two alignments
    AlignmentInterval singleAlignment = fromSAMRecordString("asm000146:tig00004\t2048\tchrUn_JTFH01001925v1_decoy\t600\t60\t646M4D370M\t*\t0\t0\tTGACTGAGCAAGTGTGGGAGTGTGAGTGAATGAGTGAGTGAGTGAATGAGTGACTGAGTGTGAGTGAACGAGTGACTGAGCAAGTGTGTGAGTGACTGAGCGAGTGTGTGTGAGTGAATAAGTGAGTGAGTGAATGAATCAGTGACTGTGTGTGATGAGCAAGTGTATGTGAATGAGTGACTGAGCAAGTGAATGAGTGTGAGTGTGTGAGTGACTGTGAGTGAGTGAATGAGCAAGTGTGAGTGAATGAGTGTGTGTGTGAATGAGCGAGTGAGTGAATAAGTGAGTGAATGAGTGTGCGAGTGAGTGAATGATTGTGAGTGAATGAGTGAGTGAGTAAGTGTGAGCGTGTGAGTGAATGAGTGTGTGTGAGTGAATGAGTGGATGAGTGTGAGTGAATGAGTGACTGAGCGAGTATGTGAGTGAATGAGTGACTGTGAGTGAGTGAGCAAGTGTGAGTGAATGAGTGTATGAGTGAATGAATGAGTGAGCGAGTGTGTGTGACTGAATGAGTGTGAGTGTGTGAGTAAATGAGTGTGTGAATGAGTGACTGTGACTGAGTGTGAATGAGTGACAGCAAGTGTGTGAGTGAATGAGTGTGAATATGAGTGAGTGAATGAGTGAGCAAGTGTGTGAGTGAATGAGTGTATGAGTGAATGAATGAGCGAGTGTGTGTGACTGAATGAGTGTGTGTGAGTAAATGAGTGTGAGTGAATGAGTGAGTGACTGTGAGTGTGAATGAGTGACTGAACAACTGTGTGTGAATGAGTGTATGAGTGAATGAATGAATGAGCGTGTGTGTGAGTGACTGAATGAGAGTGTGAGTAAATGAGTGTGAGTGAATGAGTGAGTGACTGAGTGACTGAATGAGTGACTGAGCAAGTGTGTGAGTGAATGAGTGTGAGTATGAGTGAATGAGTGAGCATGTGTGTGAGTGAGTGAGTGGGTGTGAGTGAGTGAATGAGTGACTGAATGTGTGAGTGTGAGTGAATGAGTGACTGAATGTGAGTGTGACT\t*SA:Z:chr1,4066380,-,441S22M2D18M2I138M2I10M4D28M1I24M10D13M4D49M4D51M4D65M2D50M10I34M2I56M,60,59;chr1,4064439,-,33S55M4I58M4D34M832S,20,15;chr1,4064651,-,255S32M2D99M630S,35,15;chr1,4064590,-,368S73M2D23M552S,60,8;\tMD:Z:314G2A0T0T4G3A317^GTGA370\tRG:Z:GATKSVContigAlignments\tNM:i:10\tAS:i:966\tXS:i:259", true);
    data.add(new Object[]{new GoodAndBadMappings(Collections.singletonList(singleAlignment)), new GoodAndBadMappings(Collections.singletonList(singleAlignment)),
            AssemblyContigAlignmentsConfigPicker.ALIGNMENT_MQ_THRESHOLD, AssemblyContigAlignmentsConfigPicker.ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD});

    AlignmentInterval head = new AlignmentInterval(new SimpleInterval("chr1:1-100"), 1, 100, TextCigarCodec.decode("100M94S"), true, 60, 0, 100, ContigAlignmentsModifier.AlnModType.NONE);
    AlignmentInterval tail = new AlignmentInterval(new SimpleInterval("chr1:137-200"), 137, 200, TextCigarCodec.decode("130S64M"), true, 60, 0, 64, ContigAlignmentsModifier.AlnModType.NONE);
    data.add(new Object[]{new GoodAndBadMappings(Arrays.asList(head, tail)),
                          new GoodAndBadMappings(Arrays.asList(head, tail)),
                          AssemblyContigAlignmentsConfigPicker.ALIGNMENT_MQ_THRESHOLD, AssemblyContigAlignmentsConfigPicker.ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD
    });

    // case one: some alignments are bad MQ
    int middleAlnMQ = 19;
    AlignmentInterval middle = new AlignmentInterval(new SimpleInterval("chr2:1-29"), 102, 130, TextCigarCodec.decode("101S29M64S"), false, middleAlnMQ, 2, 20, ContigAlignmentsModifier.AlnModType.NONE);
    data.add(new Object[]{new GoodAndBadMappings(Arrays.asList(head, middle, tail)),
                          new GoodAndBadMappings(Arrays.asList(head, tail), Collections.singletonList(middle)),
                          AssemblyContigAlignmentsConfigPicker.ALIGNMENT_MQ_THRESHOLD, AssemblyContigAlignmentsConfigPicker.ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD
    });
    middleAlnMQ = 20;
    middle = new AlignmentInterval(new SimpleInterval("chr2:1-29"), 102, 130, TextCigarCodec.decode("101S29M64S"), false, middleAlnMQ, 2, 20, ContigAlignmentsModifier.AlnModType.NONE);
    data.add(new Object[]{new GoodAndBadMappings(Arrays.asList(head, middle, tail)),
                          new GoodAndBadMappings(Arrays.asList(head, middle, tail)),
                          AssemblyContigAlignmentsConfigPicker.ALIGNMENT_MQ_THRESHOLD, AssemblyContigAlignmentsConfigPicker.ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD
    });

    // case two: some alignments would be too short after overlap removal

    // first, no overlap but still too short
    middleAlnMQ = 40;
    middle = new AlignmentInterval(new SimpleInterval("chr2:1-9"), 122, 130, TextCigarCodec.decode("121S9M64S"), false, middleAlnMQ, 0, 9, ContigAlignmentsModifier.AlnModType.NONE);
    data.add(new Object[]{new GoodAndBadMappings(Arrays.asList(head, middle, tail)),
                          new GoodAndBadMappings(Arrays.asList(head, tail), Collections.singletonList(middle)),
                          AssemblyContigAlignmentsConfigPicker.ALIGNMENT_MQ_THRESHOLD, AssemblyContigAlignmentsConfigPicker.ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD
    });
    // or just long enough
    middle = new AlignmentInterval(new SimpleInterval("chr2:1-10"), 121, 130, TextCigarCodec.decode("120S10M64S"), false, middleAlnMQ, 1, 9, ContigAlignmentsModifier.AlnModType.NONE);
    data.add(new Object[]{new GoodAndBadMappings(Arrays.asList(head, middle, tail)),
                          new GoodAndBadMappings(Arrays.asList(head, middle, tail)),
                          AssemblyContigAlignmentsConfigPicker.ALIGNMENT_MQ_THRESHOLD, AssemblyContigAlignmentsConfigPicker.ALIGNMENT_LOW_READ_UNIQUENESS_THRESHOLD
    });

    // now real data
    String samString = "asm000266:tig00003\t0\tchr1\t10817996\t60\t1017S43M2I870M\t*\t0\t0\tCTTCTGCCGCCCAGGCTCCCCTGGGATTCTGCAGCCTCCTCCTTGATGGCTGCTGGCCCTGCCCACCTGCCGTTCTTGCAGTGGCAAACCTGAGCCCACAGTCCCCTGCTCAAAGCCCATCGGAGGCTCCTGGGGCCTGCAGGGCCTGGTCCAGGTCCCTTCACATGACTCGCAAGGTCCCACCACCCTCTCTGGCCTCACCCTCTCCTCTCTTCGCTGGGGCTCCCCCTCTCCAATGCACTGGCCTGCACTCACTTCCCCAGGCCCAGGTGGTCTAGCCCCCACCTTTGCCCCTGCTGTGGCTTCCCAGGGAATGCTCTTCCTACCTGCTCCCTGCCCCCACCCCTCTGTTGTAAGATCTCAAATGAGACAGCACCTTCCTGGCTCCTGCCTCCCTAGCCTTGACCCCCCTGCAAGTTCCCAGAAACTCTGGCTTTTCCTGCGTGTAGGACATCACCTGGTCCCTGTCTTCAGAGAAGGACATGAAGCAAGCCCACTGGTACTGGCACCTTCATTCAGCTCATTCTTCAACCAGCAAGGATTTATTGAGCACATACTATGAACAGCTGCCAGGGCTGAGCCTGGGGTGCTTGCGCCCCTGAGGACTGGGGCCCTCAGACCCAGGGGGTATGGGTGGAAGAAGAACTTGGCTATTTAGAAAGGGACTCTAGGAAGGCACATGTCATCTCCTCTCCTCCAGGCCTGAGAGCATATACAAGGCCAGTACCATGAGCTAATAATATTTTACTTTTCCCCGTAGAGCACAGCATTGGGCTTGGCATACAGTAGGGGCTCAACCAATGCAGGCAGAAGAGAACTGACAGATGATAAGGTTTTCTTTCTTTCTTTCTTTCTCTCTTTCTCCCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCCTTTCCTTTCCTTTCCTTTCCTTTCCTTCCTTTCCTTTCCTTTCCTTTCCTTTCCCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTTTTGAGACAGAGTCTTGCTCTGCCGCCCAGGCTGGAGTGCAGTGGTGTGATCTCGGTTCACTGCAACCTCCGCCTCCCGGGTTCAAGTGATTCTTGCACCTTAGCCTCCCAAGTAGCTGGGATTACAGTTGCCCCCAACCATGCCTGGCTAATTTTTGTATTTTTAGTAGAGGCAGGGTTTCACCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGATGATCCACCCGCCTCGGCCTCCAAAAGTGCTGGGATTATAGGCATGAGCCACCGTGCCTGGCCAAAGATAACCCTACACCAGGAACTTCATGAGTTCCAGGAGGGAAAGGCAGACTAGTGTGTGTTGCAGCAGGCAGGGAGGGCTTCCTGAGGGAGGTGCTGAGAACAGGGCCTTGAAGCCCGTGGAGGCTCAAAGTAGTTGGGAGGGAGGAGGGCGGATGCTTTCTAGGGATTGTGGAGACCAGGATACAGGCAACAGGAGCAAGAGGCGTGAGGTTGAAAGCAGGTGGGAGGGGTGGGGCATCTGTACAAACATCGTGGGTGATGTTTAGGAGAGTGCCAGGCTGTGCCTCTGGCCACCACCATACCTAAGACCCCTAAGTCTTGCTCTGGCTGGGGGTGACTGCGGGCCACAGTTCTTGTCTGCAGGGAAACCAATGGCTGCAGTTAAAGACAAGGCTGCCCTCCCCCCAAGCTCCAGAGACTGGGGAGTGCCCCGGGCAGGGCTTGCCCAGACCTGGCACTCCAGCTGCACCCTCCGCCCTGGGACATCTTGTACCCAGGAGGACCTATTAAAGGGACAAAGGTCCCCATGGGGTGCAGGCACCCCAGGCTCAGCCCTGGCAGCTGGCCCGGGCTTG\t*\tSA:Z:chr1,10817090,+,854M1078S,60,3,843;chr22,22585882,+,945S125M862S,20,0,125;chr8,6150682,-,1005S91M836S,0,3,76;chr11,48640941,+,906S55M971S,60,0,55;\tMD:Z:913\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:895\tXS:i:134";
    AlignedContig alignedContig = fromPrimarySAMRecordString(samString, true);
    GoodAndBadMappings expected = new GoodAndBadMappings(
            Arrays.asList(fromSAMRecordString("asm000266:tig00003\t2048\tchr1\t10817090\t60\t854M1078S\t*\t0\t0\tCTTCTGCCGCCCAGGCTCCCCTGGGATTCTGCAGCCTCCTCCTTGATGGCTGCTGGCCCTGCCCACCTGCCGTTCTTGCAGTGGCAAACCTGAGCCCACAGTCCCCTGCTCAAAGCCCATCGGAGGCTCCTGGGGCCTGCAGGGCCTGGTCCAGGTCCCTTCACATGACTCGCAAGGTCCCACCACCCTCTCTGGCCTCACCCTCTCCTCTCTTCGCTGGGGCTCCCCCTCTCCAATGCACTGGCCTGCACTCACTTCCCCAGGCCCAGGTGGTCTAGCCCCCACCTTTGCCCCTGCTGTGGCTTCCCAGGGAATGCTCTTCCTACCTGCTCCCTGCCCCCACCCCTCTGTTGTAAGATCTCAAATGAGACAGCACCTTCCTGGCTCCTGCCTCCCTAGCCTTGACCCCCCTGCAAGTTCCCAGAAACTCTGGCTTTTCCTGCGTGTAGGACATCACCTGGTCCCTGTCTTCAGAGAAGGACATGAAGCAAGCCCACTGGTACTGGCACCTTCATTCAGCTCATTCTTCAACCAGCAAGGATTTATTGAGCACATACTATGAACAGCTGCCAGGGCTGAGCCTGGGGTGCTTGCGCCCCTGAGGACTGGGGCCCTCAGACCCAGGGGGTATGGGTGGAAGAAGAACTTGGCTATTTAGAAAGGGACTCTAGGAAGGCACATGTCATCTCCTCTCCTCCAGGCCTGAGAGCATATACAAGGCCAGTACCATGAGCTAATAATATTTTACTTTTCCCCGTAGAGCACAGCATTGGGCTTGGCATACAGTAGGGGCTCAACCAATGCAGGCAGAAGAGAACTGACAGATGATAAGGTTTTCTTTCTTTCTTTCTTTC\t*\tSA:Z:chr1,10817996,+,1017S43M2I870M,60,2;chr22,22585882,+,945S125M862S,20,0;chr8,6150682,-,1005S91M836S,0,3;chr11,48640941,+,906S55M971S,60,0;\tMD:Z:0A356G251A244\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:843\tXS:i:0", true),
                          fromSAMRecordString("asm000266:tig00003\t2048\tchr22\t22585882\t20\t945S125M862S\t*\t0\t0\tCTTTCCTTTCCTTTCCCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTT\t*\tSA:Z:chr1,10817996,+,1017S43M2I870M,60,2;chr1,10817090,+,854M1078S,60,3;chr8,6150682,-,1005S91M836S,0,3;chr11,48640941,+,906S55M971S,60,0;\tMD:Z:125\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:125\tXS:i:111", true),
                          fromSAMRecordString("asm000266:tig00003\t0\tchr1\t10817996\t60\t1017S43M2I870M\t*\t0\t0\tCTTCTGCCGCCCAGGCTCCCCTGGGATTCTGCAGCCTCCTCCTTGATGGCTGCTGGCCCTGCCCACCTGCCGTTCTTGCAGTGGCAAACCTGAGCCCACAGTCCCCTGCTCAAAGCCCATCGGAGGCTCCTGGGGCCTGCAGGGCCTGGTCCAGGTCCCTTCACATGACTCGCAAGGTCCCACCACCCTCTCTGGCCTCACCCTCTCCTCTCTTCGCTGGGGCTCCCCCTCTCCAATGCACTGGCCTGCACTCACTTCCCCAGGCCCAGGTGGTCTAGCCCCCACCTTTGCCCCTGCTGTGGCTTCCCAGGGAATGCTCTTCCTACCTGCTCCCTGCCCCCACCCCTCTGTTGTAAGATCTCAAATGAGACAGCACCTTCCTGGCTCCTGCCTCCCTAGCCTTGACCCCCCTGCAAGTTCCCAGAAACTCTGGCTTTTCCTGCGTGTAGGACATCACCTGGTCCCTGTCTTCAGAGAAGGACATGAAGCAAGCCCACTGGTACTGGCACCTTCATTCAGCTCATTCTTCAACCAGCAAGGATTTATTGAGCACATACTATGAACAGCTGCCAGGGCTGAGCCTGGGGTGCTTGCGCCCCTGAGGACTGGGGCCCTCAGACCCAGGGGGTATGGGTGGAAGAAGAACTTGGCTATTTAGAAAGGGACTCTAGGAAGGCACATGTCATCTCCTCTCCTCCAGGCCTGAGAGCATATACAAGGCCAGTACCATGAGCTAATAATATTTTACTTTTCCCCGTAGAGCACAGCATTGGGCTTGGCATACAGTAGGGGCTCAACCAATGCAGGCAGAAGAGAACTGACAGATGATAAGGTTTTCTTTCTTTCTTTCTTTCTCTCTTTCTCCCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCCTTTCCTTTCCTTTCCTTTCCTTTCCTTCCTTTCCTTTCCTTTCCTTTCCTTTCCCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTCTTTTTTGAGACAGAGTCTTGCTCTGCCGCCCAGGCTGGAGTGCAGTGGTGTGATCTCGGTTCACTGCAACCTCCGCCTCCCGGGTTCAAGTGATTCTTGCACCTTAGCCTCCCAAGTAGCTGGGATTACAGTTGCCCCCAACCATGCCTGGCTAATTTTTGTATTTTTAGTAGAGGCAGGGTTTCACCATGTTGGCCAGGCTGGTCTCAAACTCCTGACCTCAGATGATCCACCCGCCTCGGCCTCCAAAAGTGCTGGGATTATAGGCATGAGCCACCGTGCCTGGCCAAAGATAACCCTACACCAGGAACTTCATGAGTTCCAGGAGGGAAAGGCAGACTAGTGTGTGTTGCAGCAGGCAGGGAGGGCTTCCTGAGGGAGGTGCTGAGAACAGGGCCTTGAAGCCCGTGGAGGCTCAAAGTAGTTGGGAGGGAGGAGGGCGGATGCTTTCTAGGGATTGTGGAGACCAGGATACAGGCAACAGGAGCAAGAGGCGTGAGGTTGAAAGCAGGTGGGAGGGGTGGGGCATCTGTACAAACATCGTGGGTGATGTTTAGGAGAGTGCCAGGCTGTGCCTCTGGCCACCACCATACCTAAGACCCCTAAGTCTTGCTCTGGCTGGGGGTGACTGCGGGCCACAGTTCTTGTCTGCAGGGAAACCAATGGCTGCAGTTAAAGACAAGGCTGCCCTCCCCCCAAGCTCCAGAGACTGGGGAGTGCCCCGGGCAGGGCTTGCCCAGACCTGGCACTCCAGCTGCACCCTCCGCCCTGGGACATCTTGTACCCAGGAGGACCTATTAAAGGGACAAAGGTCCCCATGGGGTGCAGGCACCCCAGGCTCAGCCCTGGCAGCTGGCCCGGGCTTG\t*\tSA:Z:chr1,10817090,+,854M1078S,60,3;chr22,22585882,+,945S125M862S,20,0;chr8,6150682,-,1005S91M836S,0,3;chr11,48640941,+,906S55M971S,60,0;\tMD:Z:913\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:895\tXS:i:134", true)),
            Arrays.asList(fromSAMRecordString("asm000266:tig00003\t2064\tchr8\t6150682\t0\t1005S91M836S\t*\t0\t0\tGGAAAGGAAAGGAAAGGAAAGGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGAAAGGGAGAAAGAGAGAAAGAAAGAAAGAAAGA\t*\tSA:Z:chr1,10817996,+,1017S43M2I870M,60,2;chr1,10817090,+,854M1078S,60,3;chr22,22585882,+,945S125M862S,20,0;chr11,48640941,+,906S55M971S,60,0;\tMD:Z:62A0A7A19\tRG:Z:GATKSVContigAlignments\tNM:i:3\tAS:i:76\tXS:i:72", true),
                          fromSAMRecordString("asm000266:tig00003\t2048\tchr11\t48640941\t60\t906S55M971S\t*\t0\t0\tCTTTCCTTTCCTTTCCTTTCCTTTCCTTCCTTTCCTTTCCTTTCCTTTCCTTTCC\t*\tSA:Z:chr1,10817996,+,1017S43M2I870M,60,2;chr1,10817090,+,854M1078S,60,3;chr22,22585882,+,945S125M862S,20,0;chr8,6150682,-,1005S91M836S,0,3;\tMD:Z:55\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:55\tXS:i:0", true))
    );
    data.add(new Object[]{new GoodAndBadMappings(alignedContig.getAlignments(), Collections.emptyList()),
                          expected,
                          20, 40
    });

    return data.toArray(new Object[data.size()][]);
}
 
Example 12
Source File: SimpleAlleleCounter.java    From abra2 with MIT License 4 votes vote down vote up
private IndelInfo checkForIndelAtLocus(SAMRecord read, int refPos) {
		IndelInfo elem = null;
		
//		if (refPos == 105243047 && read.getReadName().equals("D7T4KXP1:400:C5F94ACXX:5:2302:20513:30410")) {
//			System.out.println("bar");
//		}
		
		String contigInfo = read.getStringAttribute("YA");
		if (contigInfo != null) {
			// Get assembled contig info.
			String[] fields = contigInfo.split(":");
			int contigPos = Integer.parseInt(fields[1]);
			
			Cigar contigCigar = TextCigarCodec.decode(fields[2]);
			
			// Check to see if contig contains indel at current locus
			elem = checkForIndelAtLocus(contigPos, contigCigar, refPos);
			
			if (elem != null) {
				// Now check to see if this read supports the indel
				IndelInfo readElem = checkForIndelAtLocus(read.getAlignmentStart(),
						read.getCigar(), refPos);
				
				// Allow partially overlapping indels to support contig
				// (Should only matter for inserts)
				if (readElem == null || readElem.getCigarElement().getOperator() != elem.getCigarElement().getOperator()) {
					// Read element doesn't match contig indel
					elem = null;
				} else {
					elem.setReadIndex(readElem.getReadIndex());
					
					// If this read overlaps the entire insert, capture the bases.
					if (elem.getCigarElement().getOperator() == CigarOperator.I) {

						if (elem.getCigarElement().getLength() == readElem.getCigarElement().getLength()) {
					
							String insertBases = read.getReadString().substring(readElem.getReadIndex(), readElem.getReadIndex()+readElem.getCigarElement().getLength());
							elem.setInsertBases(insertBases);
						} else if (readElem.getCigarElement().getLength() < elem.getCigarElement().getLength()) {
							
							int lengthDiff = elem.getCigarElement().getLength() - readElem.getCigarElement().getLength();
							
							if (readElem.getReadIndex() == 0) {
								elem.setReadIndex(readElem.getReadIndex() - lengthDiff);
							} else if (readElem.getReadIndex() == read.getReadLength()-1) {
								elem.setReadIndex(readElem.getReadIndex() + lengthDiff);
							}
						}
					}
				}
			}
		}
		
		return elem;
	}
 
Example 13
Source File: FisherStrandUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private GATKRead makeRead(final boolean forward) {
    Cigar cigar = TextCigarCodec.decode("10M");
    final GATKRead read = ArtificialReadUtils.createUniqueArtificialRead(cigar);
    read.setIsReverseStrand(!forward);
    return read;
}
 
Example 14
Source File: SingleContigReferenceAlignerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test(dataProvider = "testAlignmentData")
public void testAlignment(final boolean pairAlignment, final byte[] reference, final String referenceName)
    throws IOException
{
    try (final SingleSequenceReferenceAligner<byte[], AlignedContig> aligner =  SingleSequenceReferenceAligner.contigsAligner(referenceName, reference,
            a -> "ctg", a -> a);) {
        Assert.assertNotNull(aligner.getAligner());
        if (pairAlignment) {
            aligner.getAligner().alignPairs();
        }
        final Random rdn = new Random(13111);
        final RandomDNA rdnDNA = new RandomDNA(rdn);
        final List<List<AlignmentInterval>> expected = new ArrayList<>(NUM_ALIGNS << 1);
        final List<byte[]> seqs = new ArrayList<>(NUM_ALIGNS << 1);
        for (int i = 0; i < NUM_ALIGNS; i++) {
            final int start = rdn.nextInt(reference.length - READ_LENGTH) + 1;
            final boolean forward = rdn.nextBoolean();
            final boolean insert = rdn.nextDouble() < 0.1;
            final boolean deletion = !insert && rdn.nextDouble() < 0.1;
            final int indelLength = insert || deletion ? rdn.nextInt(10) + 10 : 0;
            final int indelStart = insert || deletion ? rdn.nextInt((int) (READ_LENGTH  * .50)) + 25 : -1;
            final int end = start + READ_LENGTH - 1;
            final byte[] templateSeq = Arrays.copyOfRange(reference, start - 1, end);
            final byte[] actualSeq;
            if (insert) {
                actualSeq = Arrays.copyOf(templateSeq, templateSeq.length + indelLength);
                System.arraycopy(actualSeq, indelStart - 1, actualSeq, indelStart - 1 + indelLength, templateSeq.length - indelStart + 1);
                rdnDNA.nextBases(actualSeq, indelStart - 1, indelLength);
            } else if (deletion) {
                actualSeq = Arrays.copyOf(templateSeq, templateSeq.length - indelLength);
                System.arraycopy(templateSeq, indelStart - 1 + indelLength, actualSeq, indelStart - 1, templateSeq.length - indelStart + 1 - indelLength);
            } else {
                actualSeq = templateSeq.clone();
            }

            while (insert && actualSeq[indelStart - 1] == actualSeq[indelStart + indelLength - 1]) {
                actualSeq[indelStart + indelLength - 1] = rdnDNA.nextBase();
            }
            if (!forward) {
                SequenceUtil.reverseComplement(actualSeq);
            }
            seqs.add(actualSeq);
            final Cigar cigar = (!insert && !deletion) ? TextCigarCodec.decode(READ_LENGTH + "M"):
                    (insert ? TextCigarCodec.decode( "" + (indelStart - 1) + "M" + indelLength + "I" + (READ_LENGTH - indelStart + 1) + "M")
                            : TextCigarCodec.decode( "" + (indelStart - 1) + "M" + indelLength + "D" + (READ_LENGTH - indelStart + 1 - indelLength) + "M"));

            expected.add(Collections.singletonList(new AlignmentInterval(new SimpleInterval(referenceName, start, end), 1, actualSeq.length, !forward ? CigarUtils.invertCigar(cigar) : cigar
                    , forward, 0, 0, 0, null)));
        }
        final List<AlignedContig> results = aligner.align(seqs);
        final Map<byte[], AlignedContig> mapResult = aligner.align(seqs, (b, a) -> b);
        Assert.assertEquals(results, new ArrayList<>(mapResult.values()));
        Assert.assertEquals(new ArrayList<>(mapResult.keySet()), mapResult.values().stream().map(AlignedContig::getContigSequence).collect(Collectors.toList()));
        for (int i = 0; i < NUM_ALIGNS; i++) {
            final List<AlignmentInterval> actualValue = results.get(i).getAlignments();
            final List<AlignmentInterval> expectedValue = expected.get(i);
            Assert.assertEquals(actualValue.size(), 1);
            Assert.assertEquals(actualValue.get(0).forwardStrand, expectedValue.get(0).forwardStrand);
            Assert.assertEquals(actualValue.get(0).referenceSpan, expectedValue.get(0).referenceSpan, expectedValue.get(0).cigarAlong5to3DirectionOfContig.toString());
            Assert.assertEquals(actualValue.get(0).startInAssembledContig, expectedValue.get(0).startInAssembledContig);
            Assert.assertEquals(actualValue.get(0).endInAssembledContig, expectedValue.get(0).endInAssembledContig);
            final Cigar expectedCigar = expectedValue.get(0).cigarAlong5to3DirectionOfContig;
            final Cigar actualCigar = actualValue.get(0).cigarAlong5to3DirectionOfContig;
            if (!expectedCigar.equals(actualCigar)) { // small differences may occur due to ambiguous indel location. So we check that they are small differences indeed:
                Assert.assertEquals(expectedCigar.numCigarElements(), actualCigar.numCigarElements()); // same number of elements
                Assert.assertEquals(expectedCigar.getCigarElements().stream().map(CigarElement::getOperator).collect(Collectors.toList()),
                        actualCigar.getCigarElements().stream().map(CigarElement::getOperator).collect(Collectors.toList())); // same operators sequence.
                // then we check the total lengths per operator (must be the same):
                final Map<CigarOperator, Integer> expectedLengthByOperator = expectedCigar.getCigarElements().stream()
                        .collect(Collectors.groupingBy(CigarElement::getOperator,
                                Collectors.reducing(0, CigarElement::getLength, (a, b) -> a + b)));
                final Map<CigarOperator, Integer> actualLengthByOperator = actualCigar.getCigarElements().stream()
                        .collect(Collectors.groupingBy(CigarElement::getOperator,
                                Collectors.reducing(0, CigarElement::getLength, (a, b) -> a + b)));
                Assert.assertEquals(actualLengthByOperator, expectedLengthByOperator);
                // finally we don't allow more than 5 bases length difference for any given element.
                for (int j = 0; j < expectedCigar.numCigarElements(); j++) {
                    Assert.assertTrue(Math.abs(expectedCigar.getCigarElement(j).getLength() - actualCigar.getCigarElement(j).getLength()) < 10, "actual: " + actualCigar + " != expected: " + expectedCigar);
                }
            }
        }
    }
}
 
Example 15
Source File: CpxVariantInterpreterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@BeforeTest
private static List<ValidLocalData> validInputsToOverlapYieldingStrategy() {
    final List<ValidLocalData> data = new ArrayList<>(20);

    new AlignmentInterval(new SimpleInterval("chr1", 202317371, 202317402), 1104, 1135, TextCigarCodec.decode("1085H18S32M1393H"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL);

    // from a 3-alignment real event, here we have the first two alignments, though the second alignment should yield overlap to first when considering the whole contig, here we are reusing them for case that higher chr should yield to lower chr
    data.add(new ValidLocalData(fromSAMRecordString("asm004677:tig00000\t2064\tchr3\t15737523\t60\t1425H377M1D726M\t*\t0\t0\tAGAGCAATCATAGCTCACCGTAACTTCAAATTCCTTGGCTTAAGTGATCCTCCCACCTTTGCTTCCTGAGTAGCTAGGACTACAGGTGCATGCCACCACCCCTGGCTAATTTTTTAGTTATTTTGTAGAGACAAGGTCTTGCTATGTTGCCCAGGCTGGTCTCAAACTCCTGGCCTCAAGTGATTCTCCTTCCTTGGCCTCCTGAAGTGTTGGGGTTACAAACATGAGCCAGTCATAGAGCCACACCTGGCTCTTTGAACAATTTTAAGCAATAGATTTTTTACACTGCATATATAAAAATTAAAAACATACTATTTCTCAATGTATTATACATGCTATATTTGACAAAATGAAAAATCACTAGGATCAATTTCATTAAAAAAAAAAAAAGACACAACATAGACAAGGTTCTTTTTGCAGTGCTGCTTAGAGAGAAAGTTAAGGCAATTTAAGAGTCTACTGTCCAAGGGATTTATCTCCCAGAAGCGCATGGAAAACATCACAACACTACCTCTATAAATTGGACTCATGGCAGGATTCCAAGGAATTCAGTATGCCATCAACATTAAGCAATATAATCAGAACTAAACTCAACAAAATTTTTCTTTCAGTGATAATCCTAATTTTTGGTAAATTATCGGGGAACCTGCCCCCAATAATTCAAAGTGAGTCCTTTTCTATTTTCCCTAAGTGTCGGCTGGTCTGAGAAATAAAGGGAAAGAGTACAAAAGAGAGAAATTTTAAAGCTGGGTGTCTGGGGAGACATCACATGCCCAAGCCGCAAAACCAGCAAGTTTTTATTAGTGATTTTCAAAAGGGGAGGGAGAGTACGAATAGGGTATGAGTCACAGAGATCACATGCTTCACAAGGTAATAAAATATTACAAGGCAAATGGAGGCAGGGCGAGATCACAGGACCAGGTGAAATTAACATTGCTAATGAAGTTTTGGGCACACATTGTCATTGATAACATCTTATCAGGAGACAGGGGCCGGGCACAGTGGCGCGTGCCTGTAGTCCCAGCTACTCGGGAGGCTGAGGTGGGAGGATCGCTTGAGCCCAGGAGTTCTGGGCTGTAGTGCGCTATGCCGATCGGGTGTCC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr1,202317371,-,1393S50M1085S,60,1;\tMD:Z:164C19C192^A449T276\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1071\tXS:i:161", true),
                                fromSAMRecordString("asm004677:tig00000\t2064\tchr1\t202317371\t60\t1393H50M1085H\t*\t0\t0\tGTCTTGCTCTGTTGCCCAGGCTGGAGTGCAGTAGAGCAATCATAGCTCAC\t*\tSA:Z:chr3,15736242,-,1282M1246S,60,0;chr3,15737523,-,1425S377M1D726M,60,4;\tMD:Z:41T8\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:45\tXS:i:0", true),
                                18, true,  null,
                                new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr3", 15737541, 15738626), 1, 1085, TextCigarCodec.decode("726M1D359M18S1425H"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL),
                                             new AlignmentInterval(new SimpleInterval("chr1", 202317371, 202317420), 1086, 1135, TextCigarCodec.decode("1085H50M1393H"), false, 60, 1, 45, ContigAlignmentsModifier.AlnModType.NONE))
    ));

    // DEL '+'-rep
    data.add(new ValidLocalData(fromSAMRecordString("asm016186:tig00003\t2048\tchr10\t6055203\t60\t221M1064H\t*\t0\t0\tAAATGCATTAAAGGTTGAGAAGTACTGCTTGAGCATACCTGTTCCTAGTGAATCAGACCAACCATGGAAATCATTATTCCATCTCTCTAATTTGGTGAGTTTCAATCCTAAGTGCGATTTGGAAGTATTCAAGGAACTGGAAGGGAAGGTATTGTGATGTTCGGTTCTGTTCCCCAGAGAGTCCGCTTAATTCATCTTATGAGTCTTGGCCATCACTTTTT\t*\tSA:Z:chr10,6055906,+,205S1080M,60,0;\tMD:Z:221\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:221\tXS:i:0", true),
                                fromSAMRecordString("asm016186:tig00003\t0\tchr10\t6055906\t60\t205S1080M\t*\t0\t0\tAAATGCATTAAAGGTTGAGAAGTACTGCTTGAGCATACCTGTTCCTAGTGAATCAGACCAACCATGGAAATCATTATTCCATCTCTCTAATTTGGTGAGTTTCAATCCTAAGTGCGATTTGGAAGTATTCAAGGAACTGGAAGGGAAGGTATTGTGATGTTCGGTTCTGTTCCCCAGAGAGTCCGCTTAATTCATCTTATGAGTCTTGGCCATCACTTTTTAAAGCATCCTGGATATACACTAATGTACAGCTGGCGTTTTGAACCATTACTCTCCACACATTTCCCAGTGGGAACTGTGTTTTCTTTTTGACAGAAGCAAACCACGGAGTCGCCTTGAGATCAGTCAAAACACTGAGTAATCTTAGAGTGGAGAATACCTAAAATTTCTAGAGACAATTATATTATTGTTTTCCCCTAAATGAGTGAGTTACTTGAGAATATGGTGGGGTATTCAGTGGGGGCTATTGGCAAACACTGTTGGCTGACTTCAGAATGGGAGCCATTCAGCAGCAGTTTCATCTCCCGTGTTAAACTGTGAGCACAAGAGGATAAGAGCTGGAACTTTCTTCATCTCTGTAATCCTCATGCATGAATGGATGGATGAAGGAATATCACTGAGAATTCATGTAAAGATGTGCAGATGTGCAGGGAATATTGAACTGAAATTTAAGAGGGCTGGATTCAAGTTCAGCTTCTATGTGAACTCTTTCAGTGAAGAAGGTCTTATAACTTTTAGAAGTAATCAACTGAATTAAGATTCTCTTTTCAGCTGGGCACAGTGGCTCATGCCTGTAATCCCAGCACTTTGGAAGGCCCATGCAGATGGATGGCTTGAGGCTCGGAGTTCAAGACCAGCCTACGCAACATAGCAAAAACCCCCTCTCTACTAAAATTACAAAAAAGGAGCCAGGCATGGTGGTGCACACCTGTAATCCCAGCTACTCAGGAGACAGAGGCAGGAGAATCACTTGAACCCAGGAGCTGAAGGTTGCAGTGAGTTGAGATTGCACCACTGTGCTCTAGCCTGGGCGACAGAGCGAAACTGTCCAAAAAAAAAAAAGATTCCCCTTTCTCTTCTCTCAGGGAATGGCCTGGGATAGAGAAGAGAAACAGGTTCCCACTCATGTAAAACTTCTCCCATGGGAGACAAGGACATTGTTTGAGGGGAAAATTCCTACACAAGCAAACAAACAGCAGAGGACCCCGCCCTCCCTGTCCAGGGCAGGAGCACAGTGGACCACCTGCTGCCCCTGTGTCTTGGGGCCTCTCTCCCTGGAATGTCA\t*\tSA:Z:chr10,6055203,+,221M1064S,60,0;\tMD:Z:1080\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:1080\tXS:i:39", true),
                          16, true, null,
                                new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr10", 6055203, 6055407), 1, 205, TextCigarCodec.decode("205M16S1064H"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL),
                                             new AlignmentInterval(new SimpleInterval("chr10", 6055906, 6056985), 206, 1285, TextCigarCodec.decode("205S1080M"), true, 60, 0, 1080, ContigAlignmentsModifier.AlnModType.NONE))
    ));

    // DEL '-'-rep
    data.add(new ValidLocalData(fromSAMRecordString("asm011675:tig00000\t2064\tchr7\t5803446\t60\t1046H558M\t*\t0\t0\tATATGTTTCCATGATTTCCATGAGGAAGGCATGTGCTCTGTCCCTTTCTTCCCAATGCTCACTGCGTCTTTTTTTTTTTAATAGACTTGACTTTTTTAGGCCAGTTTTAGATTCGTAGCAACATTGAGCGAAAGGTACAGAGAGTTCTCACATGTTCCCTGCCCGTGCTAATAGCCTCTCCCATTATCATCTTCCACCAAAGTGGTACATTTGTTACAATCAGGGAACCTATATTGACATCATTTTCACCCAAAGTCCACAGTTTGCATTAGGGTCCCTCTTGGTGTGGTGCATGTCATGGGCTTGGATAAATGTGTCAGAACATGCATCCACTGTTAGAGCTCTCTCAGAGGCGTTTCCCTGCCCGAAAAATCCTCCGTGCTCTGCCTCTTCATGCATCCCTCCCTTCATGCATCCGTCCCAGGGACAAAAGGTTGTCCCTGGCAACCTTTTGTTTCCAGACCCTAGCAACGGATCCTTTTACTGTCTCCATAGTTGGGCCTTTTCTGAATGTAATATAGTTGGAATCATACAGCATTAATTTTTTTTTTCCACTGA\t*\tSA:Z:chr7,5802154,-,1009M1D120M475S,60,4;\tMD:Z:8G325C223\tRG:Z:GATKSVContigAlignments\tNM:i:2\tAS:i:548\tXS:i:0", true),
                                fromSAMRecordString("asm011675:tig00000\t16\tchr7\t5802154\t60\t1009M1D120M475S\t*\t0\t0\tATATTTTCTGAGATTTATAAGCTTTTTTAAAAAATTAAGGGCTGGGCACAGTGGTTCACACTTGTAAGTGCAGCACTTTGGGAGGCTAAGGCAGGAGGATTGATTGAGGTCAGGAGTTGGAGGCTACAGTGAGCTATGATTGCACCACTGCACTCTAGCCTGGGAGACAGAGTGAGACCCTGACTCAAACAGTAATTAAATCAATAAAATTTAGAAGTTAAGATTCTTCAGCCTCTTTTGGGCTGGGCATGGTCGCTCAAGCTTGTAATCCCAGCACTCTGGGAGGCTGTGGCAGGCTGATCACTTGAGGCCAGGAGTTTGAGACCAACCTGGCCAACATTGTGAAACCCCATCTCTACTAAAAATACAGAAATTAGCCGGTGTGGTGATGCATGCCTGTAATCCCAGCTGTTCAGGAGGCTGAGACAGGAGAATTGCTTGAACTCGGGAGGTGGAGGTTGCAGTGAGCTGAGAGCATGTCACTCTATTCCAGCCCGGGCGACAGAACTAGACTCCGTCTCAAAAAAAAAAAAAAAAAGATTCTTCAGTCTCTTTTGATCTTCCTGTGCCCACTTTATGGTGCCCGGAGCTGCTGATGTTCAGATTTGCCATGGGCGGTGCTCCCCTACATCTGAAGATGCAAAGATCTCTCTTCTTCCTTGTCACCTAATCCTGCTGGCCTTCTCAGGCTCATCTGCAGAAGACCCCACTCAAAAGTAGGGTCTGGCCAGCTGCGGTGGCTCACGCTTGTAATCCCAGCACTTTGGGAGGCTGAGGCAGGTGGGATCACCTGAGGTCAGGAGTTCAAGAACAGCCTGACCAATGTGGCGAAACCCTGTCTCTACTAAAAAATACCAAAATTAGCCAGGCGTAGTGGTGGGCGCCTATAACCCCATCTACTCGGGAGGCTGAGGCTGGAGAATAGCTTGAACCTGGGGGTTGAAGGTTGCAGTGAGTCAAGATGATGCCACTGCACTCCAGTCTGGGTGAAAGAGCAAAACTCCATCTCAAAAAAAAAAAAAAAAAGGGGGGGTCTTCTTCAGAAGATATGTTTCCATGATTTCCATGAGGAAGGCATGTGCTCTGTCCCTTTCTTCCCAATGCTCACTGCGTCTTTTTTTTTTTAATAGACTTGACTTTTTTAGGCCAGTTTTAGATTCGTAGCAACATTGAGCGAAAGGTACAGAGAGTTCTCACATGTTCCCTGCCCGTGCTAATAGCCTCTCCCATTATCATCTTCCACCAAAGTGGTACATTTGTTACAATCAGGGAACCTATATTGACATCATTTTCACCCAAAGTCCACAGTTTGCATTAGGGTCCCTCTTGGTGTGGTGCATGTCATGGGCTTGGATAAATGTGTCAGAACATGCATCCACTGTTAGAGCTCTCTCAGAGGCGTTTCCCTGCCCGAAAAATCCTCCGTGCTCTGCCTCTTCATGCATCCCTCCCTTCATGCATCCGTCCCAGGGACAAAAGGTTGTCCCTGGCAACCTTTTGTTTCCAGACCCTAGCAACGGATCCTTTTACTGTCTCCATAGTTGGGCCTTTTCTGAATGTAATATAGTTGGAATCATACAGCATTAATTTTTTTTTTCCACTGA\t*\tSA:Z:chr7,5803446,-,1046S558M,60,2;\tMD:Z:516A337T154^A45G74\tRG:Z:GATKSVContigAlignments\tNM:i:4\tAS:i:1097\tXS:i:1029", true),
                          123, false, null,
                                new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr7", 5803446, 5804003), 1, 558, TextCigarCodec.decode("558M1046H"), false, 60, 2, 548, ContigAlignmentsModifier.AlnModType.NONE),
                                             new AlignmentInterval(new SimpleInterval("chr7", 5802154, 5803159), 599, 1604, TextCigarCodec.decode("598S1006M"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL))
    ));

    // INV55 '+'-rep
    data.add(new ValidLocalData(fromSAMRecordString("asm000295:tig00009\t0\tchr1\t13235227\t60\t570M151S\t*\t0\t0\tGGATTACAGGCGTGAGCCACCGCGCCCGGCCGGGGGACTCTATCTCAAAAAAAAAAAAAAAAAATTCAGTAGTAAAACTTTTGGTTAGCAGGGCACGGCTGCTCACGCCTGTAATCCCAGCACTTTGGGAGGCCGAGGCGGGCAGATCATGAGGTCAGGAGATCGACACCATCCTGGCTAACATGGTGAAACCGCATCTCTACTAAAAATAGAAAAAAAATTAGCCAGGCGTGGTGGCAGGTGCCTGTAGTCCCAGCTACTCAGGAGGCTGAGGCGGGAGAATGGCATGAACCCAGGAGGCAGAGCTTGCAGTGAGCCAAGATCATGCCACTGCACTCCAGCCTCGGTGACAGAGCAAGACTCCGTCTCAAAAATAAAAAACAAAAAAAAACTTTCGGTTAGTGTAATCTAGTCTTCCCTGTAGATGTAGCTAATTTTATTTTATTTTTATTATTATTTTTATTGAGACAGAGTCTTCCTCTGTCTGCCAGACCGGAGTACAATGGTGCGATCTCGGCTCACTGCAACCTCCATCTCCCGAGTTCAAGCGATTCTCCTGCCTCAGCCTCCTGAGTGGCTGGGATTACAAATGTGCACCACCACGCTTTGCTAAGTTTTGTATTTTTACTAGAGACAGCGTTTCCCATGTTGCCCAGGCTGGTCTTGAACTTGTGATCTCTGGTGATCTGCCCACCTCGGCCTCCCAAAGTTGTTGGGAGTG\t*\tSA:Z:chr1,13384970,-,218M503S,60,1;\tMD:Z:531T0G0C4T1G7T15A5\tRG:Z:GATKSVContigAlignments\tNM:i:7\tAS:i:535\tXS:i:0", true),
                                fromSAMRecordString("asm000295:tig00009\t2064\tchr1\t13384970\t60\t218M503H\t*\t0\t0\tCACTCCCAACAACTTTGGGAGGCCGAGGTGGGCAGATCACCAGAGATCACAAGTTCAAGACCAGCCTGGGCAACATGGGAAACGCTGTCTCTAGTAAAAATACAAAACTTAGCAAAGCGTGGTGGTGCACATTTGTAATCCCAGCCACTCAGGAGGCTGAGGCAGGAGAATCGCTTGAACTCGGGAGATGGAGGTTGCAGTGAGCCGAGATCGCACCA\t*\tSA:Z:chr1,13235227,+,570M151S,60,7;\tMD:Z:212A5\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:213\tXS:i:0", true),
                          67, true, null,
                                new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr1", 13235227, 13235729), 1, 503, TextCigarCodec.decode("503M218S"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL),
                                             new AlignmentInterval(new SimpleInterval("chr1", 13384970, 13385187), 504, 721, TextCigarCodec.decode("503H218M"), false, 60, 1, 213, ContigAlignmentsModifier.AlnModType.NONE))
    ));

    // INV55 '-'-rep
    data.add(new ValidLocalData(fromSAMRecordString("asm001039:tig00012\t2048\tchr1\t81195731\t60\t143M145H\t*\t0\t0\tTCACTAAATTCAGTACATACTCAAGGAATGAGGAAGCAAGCTCCCCCCTCCTGGAGGAAGGAGTATCAAATATCTTGTAATAATTAATAAATATTTGAAGGTAGACATTTTGAGGCTATGCCGATATCCTGTTTTTCTTTAAA\t*\tSA:Z:chr1,81194519,-,148M140S,60,0;\tMD:Z:122A20\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:138\tXS:i:22", true),
                                fromSAMRecordString("asm001039:tig00012\t16\tchr1\t81194519\t60\t148M140S\t*\t0\t0\tTCTAAAAACTGCTAAGAAACTCAAGTTTTCTGAAGTGGTCTGTTCGGAGGTTTGTGCATTTTTCAGTGATCTGTAACAATGTATTTTTCTTCCCACCATCTACTGCAGGTGGGTTCACAGGTGGGACCTGAGGAGCATTTGGGGATTTAAAGAAAAACAGGATATCGGCATAGCCTCAAAATGTCTACCTTCAAATATTTATTAATTATTACAAGATATTTGATACTCCTTCCTCCAGGAGGGGGGAGCTTGCTTCCTCATTCCTTGAGTATGTACTGAATTTAGTGA\t*\tSA:Z:chr1,81195731,+,143M145S,60,1;\tMD:Z:148\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:148\tXS:i:19", true),
                          3, false, null,
                                new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr1", 81195731, 81195873), 1, 143, TextCigarCodec.decode("143M145H"), true, 60, 1, 138, ContigAlignmentsModifier.AlnModType.NONE),
                                             new AlignmentInterval(new SimpleInterval("chr1", 81194519, 81194663), 144, 288, TextCigarCodec.decode("143S145M"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL))
    ));

    // INV33 '+'-rep
    data.add(new ValidLocalData(fromSAMRecordString("asm000619:tig00014\t2064\tchr1\t34802227\t60\t149H126M\t*\t0\t0\tGGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTTGGTTT\t*\tSA:Z:chr1,54510617,+,75S200M,60,0;\tMD:Z:126\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:126\tXS:i:0", true),
                                fromSAMRecordString("asm000619:tig00014\t0\tchr1\t54510617\t60\t75S200M\t*\t0\t0\tAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCAAACCCGAAAAACAAACTAAAAAACCCCACAAAAACAAACAAAACAAAATGCAGTGTATTTGGAGAGAACGGGTGTCTGGTTATTTTGTGCTTTGTATCAAGTTAGCCCAAAACTTAGTGGCATAAAACAATCAAGCATTGTGCTCGCAGATTAT\t*\tSA:Z:chr1,34802227,-,149S126M,60,0;\tMD:Z:200\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:200\tXS:i:22", true),
                          51, false, null,
                                new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr1", 34802227, 34802352), 1, 126, TextCigarCodec.decode("126M149H"), false, 60, 0, 126, ContigAlignmentsModifier.AlnModType.NONE),
                                             new AlignmentInterval(new SimpleInterval("chr1", 54510668, 54510816), 127, 275, TextCigarCodec.decode("126S149M"), true, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL))
    ));

    // INV33 '-'-rep
    data.add(new ValidLocalData(fromSAMRecordString("asm016915:tig00033\t16\tchr10\t73660151\t60\t140S145M\t*\t0\t0\tCCTTTCTCTGGATCTCATCTGGAATTGGCCTCACAGTTTGTGAACAGCCCAGAAATAGCCAATGTCTTAGCTTTGGAACGTTTGCCACTTTCCAGCTACAGCTGGACCTTGTATCCTGGTTTGGTTCCTGATGACTTTCAAAGAACAAATGGCTTCCAGCAAGAGAAAAAGGGGATGCAACATTTTTACAAATTATTTCTTTTTTTATTAAAAAAATTTTAAGTTAAATGCTAAAGATATGTTTAACCTCTCTGATACTGACTTGCTCATGAGAAGAAAGAGAGA\t*\tSA:Z:chr10,73659956,+,143S142M,60,0;\tMD:Z:145\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:145\tXS:i:74", true),
                                fromSAMRecordString("asm016915:tig00033\t2048\tchr10\t73659956\t60\t143H142M\t*\t0\t0\tTTTGAAAGTCATCAGGAACCAAACCAGGATACAAGGTCCAGCTGTAGCTGGAAAGTGGCAAACGTTCCAAAGCTAAGACATTGGCTATTTCTGGGCTGTTCACAAACTGTGAGGCCAATTCCAGATGAGATCCAGAGAAAGG\t*\tSA:Z:chr10,73660151,-,140S145M,60,0;\tMD:Z:142\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:142\tXS:i:112", true),
                          2, true, null,
                                new Tuple2<>(new AlignmentInterval(new SimpleInterval("chr10", 73660153, 73660295), 1, 143, TextCigarCodec.decode("143M142S"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL),
                                             new AlignmentInterval(new SimpleInterval("chr10", 73659956, 73660097), 144, 285, TextCigarCodec.decode("143H142M"), true, 60, 0, 142, ContigAlignmentsModifier.AlnModType.NONE))
    ));

    // invdup one yield to two
    data.add(new ValidLocalData(fromSAMRecordString("asm031213:tig00068\t16\tchrUn_GL000195v1\t49574\t48\t125S270M\t*\t0\t0\tCCAACATGAAGAAACCCCGTCTCTACTAAAAATACAAAATTATCCAGGTGTGGTGTTGTATGCCTGTAATCCTAGCTACTCGGGAGGCCGAGGCAGGAGAATCGCTTGAACCCAGGAGGTGGAGGTTACAGGCATACAACACCACACCTGGATAATTTTGTATTTTTAGTAGAGACGGGGTTTCTTAATGTTGGTCAGGCTGGTCTCATACTCCTGATCTCAGATCATCTGCCCACCTAGGCCTCCCAAAATGCAGGGATTACAGGCATGAGTCACAATGCCCGGCTGTAATTCCCTCTCTTTTATACCTTAGATTTGAATAATTTTTGCTGGATTCTTCAAACATGAAGTATTTTTTGAATTGGAAACTAACTGAATGACTAACTGGTAAGTAG\t*\tSA:Z:chrUn_GL000195v1,49512,+,264S131M,48,1;\tMD:Z:61C21A14G69T70A30\tRG:Z:GATKSVContigAlignments\tNM:i:5\tAS:i:245\tXS:i:216", true),
                                fromSAMRecordString("asm031213:tig00068\t2048\tchrUn_GL000195v1\t49512\t48\t264H131M\t*\t0\t0\tCTGTAACCTCCACCTCCTGGGTTCAAGCGATTCTCCTGCCTCGGCCTCCCGAGTAGCTAGGATTACAGGCATACAACACCACACCTGGATAATTTTGTATTTTTAGTAGAGACGGGGTTTCTTCATGTTGG\t*\tSA:Z:chrUn_GL000195v1,49574,-,125S270M,48,5;\tMD:Z:20T110\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:126\tXS:i:62", true),
                          6, true, null,
                                new Tuple2<>(new AlignmentInterval(new SimpleInterval("chrUn_GL000195v1", 49580, 49843), 1, 264, TextCigarCodec.decode("264M131S"), false, 48, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL),
                                             new AlignmentInterval(new SimpleInterval("chrUn_GL000195v1", 49512, 49642), 265, 395, TextCigarCodec.decode("264H131M"), true, 48, 1, 126, ContigAlignmentsModifier.AlnModType.NONE))
    ));

    // invdup two yield to one
    data.add(new ValidLocalData(fromSAMRecordString("asm030182:tig00002\t2048\tchrX\t52729025\t46\t120M249H\t*\t0\t0\tGATAGCATTAGGAGATATACTTAATGAGAAATGACAAGTTAATGGGTGCAGCATACCAACATGGCACATGTATACATATGTAACAAACCTGCACATTGTACACATGTACCCTAAAACTTA\t*\tSA:Z:chrX,52729135,-,261M108S,60,0;\tMD:Z:53C66\tRG:Z:GATKSVContigAlignments\tNM:i:1\tAS:i:115\tXS:i:95", true),
                                fromSAMRecordString("asm030182:tig00002\t16\tchrX\t52729135\t60\t261M108S\t*\t0\t0\tCTAAAACTTAAAGTACAATAATAATAAAATTTAAAAAAATGTTTTCAAGGATCAATTCTTAACAGTAGAGGAAAATAGGAAAGCGTGTCAGTGGTCCACCAGAAATATTGAGGCATTCCTGGGAGATAGAGTAGATGGGGTCAGACTGATAGAGAAACCCAAGGAGACAAGACCACAGCTCAAATCACTGTAGGCGAGAGATGCTGTTTGTTTTTTGAGACGGAGACTTACTCTGTCGCCCAGGCTGAGTAAGTTTTAGGGTACATGTGTACAATGTGCAGGTTTGTTACATATGTATACATGTGCCATGTTGGTATGCTGCACCCATTAACTTGTCATTTCTCATTAAGTATATCTCCTAATGCTATC\t*\tSA:Z:chrX,52729025,+,120M249S,46,1;\tMD:Z:261\tRG:Z:GATKSVContigAlignments\tNM:i:0\tAS:i:261\tXS:i:187", true),
                          12, false, null,
                                new Tuple2<>(new AlignmentInterval(new SimpleInterval("chrX", 52729025, 52729144), 1, 120, TextCigarCodec.decode("120M249H"), true, 46, 1, 115, ContigAlignmentsModifier.AlnModType.NONE),
                                             new AlignmentInterval(new SimpleInterval("chrX", 52729135, 52729383), 121, 369, TextCigarCodec.decode("120S249M"), false, 60, AlignmentInterval.NO_NM, AlignmentInterval.NO_AS, ContigAlignmentsModifier.AlnModType.UNDERGONE_OVERLAP_REMOVAL))
    ));

    return data;
}
 
Example 16
Source File: SoftClippedReadFilterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private GATKRead buildSAMRead(final String cigarString) {
    final Cigar cigar = TextCigarCodec.decode(cigarString);
    return ArtificialReadUtils.createArtificialRead(header, cigar);
}