htsjdk.tribble.annotation.Strand Java Examples

The following examples show how to use htsjdk.tribble.annotation.Strand. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@DataProvider
Object[][] provideForTestGetStrandCorrectedAllele() {
    return new Object[][] {
            { Allele.create("A"),       Strand.POSITIVE, Allele.create("A") },
            { Allele.create("AA"),      Strand.POSITIVE, Allele.create("AA") },
            { Allele.create("AAT"),     Strand.POSITIVE, Allele.create("AAT") },
            { Allele.create("AATT"),    Strand.POSITIVE, Allele.create("AATT") },
            { Allele.create("AATTG"),   Strand.POSITIVE, Allele.create("AATTG") },
            { Allele.create("AATTGC"),  Strand.POSITIVE, Allele.create("AATTGC") },
            { Allele.create("AATTGCG"), Strand.POSITIVE, Allele.create("AATTGCG") },
            { Allele.create("A"),       Strand.NEGATIVE, Allele.create("T") },
            { Allele.create("AA"),      Strand.NEGATIVE, Allele.create("TT") },
            { Allele.create("AAT"),     Strand.NEGATIVE, Allele.create("ATT") },
            { Allele.create("AATT"),    Strand.NEGATIVE, Allele.create("AATT") },
            { Allele.create("AATTG"),   Strand.NEGATIVE, Allele.create("CAATT") },
            { Allele.create("AATTGC"),  Strand.NEGATIVE, Allele.create("GCAATT") },
            { Allele.create("AATTGCG"), Strand.NEGATIVE, Allele.create("CGCAATT") },
    };
}
 
Example #2
Source File: AddOATag.java    From picard with MIT License 6 votes vote down vote up
private void setOATag(SAMRecord rec) {
    if (rec.getReferenceName().contains(",")) {
        throw new PicardException(String.format("Reference name for record %s contains a comma character.", rec.getReadName()));
    }
    final String oaValue;
    if (rec.getReadUnmappedFlag()) {
        oaValue = String.format("*,0,%s,*,255,;", rec.getReadNegativeStrandFlag() ? Strand.NEGATIVE : Strand.POSITIVE);
    } else {
        oaValue = String.format("%s,%s,%s,%s,%s,%s;",
                rec.getReferenceName(),
                rec.getAlignmentStart(),
                rec.getReadNegativeStrandFlag() ? Strand.NEGATIVE : Strand.POSITIVE,
                rec.getCigarString(),
                rec.getMappingQuality(),
                Optional.ofNullable(rec.getAttribute(SAMTag.NM.name())).orElse(""));
    }
    rec.setAttribute(SAMTag.OA.name(), Optional.ofNullable(rec.getAttribute(SAMTag.OA.name())).orElse("") +  oaValue);
}
 
Example #3
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static String getCodonChangeStringForDeletion(final SequenceComparison seqComp, final boolean indelIsBetweenCodons, int alignedCodonStart, int alignedCodonEnd, String refCodon) {

        // Requires:
        //     seqComp.getStrand()
        //     seqComp.getAlignedCodingSequenceAlternateAllele()

        if ( indelIsBetweenCodons ) {

            if ( seqComp.getStrand() == Strand.POSITIVE ) {
                // Skip the first AminoAcid.CODON_LENGTH bases in the aligned codon:
                alignedCodonStart += AminoAcid.CODON_LENGTH;
                refCodon = refCodon.substring(AminoAcid.CODON_LENGTH);
            }
            else {
                // Skip the last AminoAcid.CODON_LENGTH bases in the aligned codon:
                alignedCodonEnd -= AminoAcid.CODON_LENGTH;
                refCodon = refCodon.substring(0, refCodon.length() - AminoAcid.CODON_LENGTH);
            }

            return String.format(CODON_CHANGE_FORMAT_STRING, alignedCodonStart, alignedCodonEnd, refCodon + "del");
        }
        else {
            return String.format(CODON_CHANGE_FORMAT_STRING, alignedCodonStart, alignedCodonEnd, refCodon + ">" + seqComp.getAlignedCodingSequenceAlternateAllele().toLowerCase());
        }
    }
 
Example #4
Source File: GencodeFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private GencodeGtfFeatureBaseData createGtfBaseDataForTestIs5Prime(final SimpleInterval interval) {
    return new GencodeGtfFeatureBaseData(GencodeGtfCodec.GTF_FILE_TYPE_STRING, 
            1,
            interval.getContig(),
            GencodeGtfFeature.ANNOTATION_SOURCE_ENSEMBL,
            GencodeGtfFeature.FeatureType.GENE,
            interval.getStart(),
            interval.getEnd(),
            Strand.POSITIVE,
            GencodeGtfFeature.GenomicPhase.DOT,
            "TEST-GENE-ID",
            "TEST-TX-ID",
            GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
            GencodeGtfFeature.GeneTranscriptStatus.PUTATIVE,
            "TEST-GENE",
            GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
            GencodeGtfFeature.GeneTranscriptStatus.PUTATIVE,
            "TEST-TX",
            1,
            "",
            GencodeGtfFeature.LocusLevel.AUTOMATICALLY_ANNOTATED,
            null,
            "");
}
 
Example #5
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Get the full alternate sequence given a reference coding sequence, and two alleles.
 * @param referenceSequence The reference sequence on which to base the resulting alternate sequence.  Must not be {@code null}.
 * @param alleleStartPos Starting position (1-based, inclusive) for the ref and alt alleles in the given {@code referenceSequence}.  Must be > 0.
 * @param refAllele Reference Allele.  Used for the length of the reference (content ignored).  Must not be {@code null}.
 * @param altAllele Alternate Allele.  Used for both content and length of the alternate allele.  If on the negative strand, assumes that the bases are already reverse-complemented, and that the leading reference base is the last base in the allele.  Must not be {@code null}.
 * @param strand The {@link Strand} on which the variant occurs.  Must not be {@code null}.  Must not be {@link Strand#NONE}.
 * @return The coding sequence that includes the given alternate allele in place of the given reference allele.
 */
public static String getAlternateSequence(final StrandCorrectedReferenceBases referenceSequence,
                                          final int alleleStartPos,
                                          final Allele refAllele,
                                          final Allele altAllele,
                                          final Strand strand) {

    Utils.nonNull(referenceSequence);
    Utils.nonNull(refAllele);
    Utils.nonNull(altAllele);

    ParamUtils.isPositive( alleleStartPos, "Genome positions must be > 0." );

    // We have to subtract 1 here because we need to account for the 1-based indexing of
    // the start and end of the coding region:
    final int alleleIndex = Math.abs(alleleStartPos - 1);

    return referenceSequence.getBaseString().substring(0, alleleIndex) +
            altAllele.getBaseString() +
            referenceSequence.getBaseString().substring(alleleIndex + refAllele.length());
}
 
Example #6
Source File: ProteinChangeInfoUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "provideForTestCreateProteinChangeInfo")
void testCreateProteinChangeInfo( final Allele refAllele,
                                  final Allele altAllele,
                                  final int codingSequenceAlleleStart,
                                  final int alignedCodingSequenceAlleleStart,
                                  final String codingSequence,
                                  final Strand strand,
                                  final boolean isMitochondria,
                                  final ProteinChangeInfo expected ) {

    Assert.assertEquals(
            ProteinChangeInfo.create(
                    refAllele,
                    altAllele,
                    codingSequenceAlleleStart,
                    alignedCodingSequenceAlleleStart,
                    codingSequence,
                    strand,
                    isMitochondria),
            expected
    );
}
 
Example #7
Source File: GenomeInterval.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public GenomeInterval(final String fields[]) {
    chromosome = new ChrString(fields[0]);
    start = Integer.parseInt(fields[1]);
    end = Integer.parseInt(fields[2]);
    strand = Strand.valueOf(fields[3]);
    feature = MapBlock.BlockType.valueOf(fields[4]);
}
 
Example #8
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
Object[][] provideDataForTestCreateSpliceSiteCodonChange() {

    return new Object[][] {
            {1000, 5, 1000, 1500, Strand.POSITIVE, 0, "c.e5-0"},
            {1000, 4, 1, 1500, Strand.POSITIVE,    0, "c.e4+500"},
            {1000, 3, 500, 1500, Strand.POSITIVE,  0, "c.e3-500"},

            {1000, 5, 1000, 1500, Strand.NEGATIVE, 0, "c.e5+0"},
            {1000, 4, 1, 1500, Strand.NEGATIVE,    0, "c.e4-500"},
            {1000, 3, 500, 1500, Strand.NEGATIVE,  0, "c.e3+500"},

            {1000, 5, 1500, 500, Strand.NEGATIVE,  0, "c.e5+500"},

            {1000, 5, 1000, 1500, Strand.POSITIVE, 1, "c.e5+1"},
            {1000, 4, 1, 1500, Strand.POSITIVE,    2, "c.e4+502"},
            {1000, 3, 500, 1500, Strand.POSITIVE,  3, "c.e3-497"},

            {1000, 5, 1000, 1500, Strand.NEGATIVE, 4, "c.e5+4"},
            {1000, 4, 1, 1500, Strand.NEGATIVE,    5, "c.e4-495"},
            {1000, 3, 500, 1500, Strand.NEGATIVE,  6, "c.e3+506"},

            {1000, 5, 1500, 500, Strand.NEGATIVE,  7, "c.e5+507"},

            {1000, 5, 1000, 1500, Strand.POSITIVE, -1, "c.e5-1"},
            {1000, 4, 1, 1500, Strand.POSITIVE,    -2, "c.e4+498"},
            {1000, 3, 500, 1500, Strand.POSITIVE,  -3, "c.e3-503"},

            {1000, 5, 1000, 1500, Strand.NEGATIVE, -4, "c.e5-4"},
            {1000, 4, 1, 1500, Strand.NEGATIVE,    -5, "c.e4-505"},
            {1000, 3, 500, 1500, Strand.NEGATIVE,  -6, "c.e3+494"},

            {1000, 5, 1500, 500, Strand.NEGATIVE,  -7, "c.e5+493"},
    };
}
 
Example #9
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "provideDataForGetCodonChangeString")
void testGetCodonChangeString( final String refAllele,
                               final String altAllele,
                               final int alleleStart,
                               final int codingSequenceAlleleStart,
                               final int alignedCodingSequenceAlleleStart,
                               final String alignedCodingSeqRefAllele,
                               final String alignedCodingSeqAltAllele,
                               final String alignedAlternateAllele,
                               final int alignedRefAlleleStop,
                               final String contig,
                               final Strand strand,
                               final ReferenceSequence codingSequence,
                               final Locatable startCodon,
                               final String expected ) {

    final SequenceComparison seqComp = new SequenceComparison();
    seqComp.setReferenceAllele(refAllele);
    seqComp.setAlternateAllele(altAllele);
    seqComp.setAlleleStart(alleleStart);
    seqComp.setCodingSequenceAlleleStart(codingSequenceAlleleStart);
    seqComp.setAlignedCodingSequenceAlleleStart(alignedCodingSequenceAlleleStart);
    seqComp.setAlignedCodingSequenceReferenceAllele(alignedCodingSeqRefAllele);
    seqComp.setAlignedCodingSequenceAlternateAllele(alignedCodingSeqAltAllele);
    seqComp.setAlignedAlternateAllele(alignedAlternateAllele);
    seqComp.setAlignedReferenceAlleleStop(alignedRefAlleleStop);
    seqComp.setContig(contig);
    seqComp.setStrand(strand);
    seqComp.setTranscriptCodingSequence(codingSequence);

    Assert.assertEquals(FuncotatorUtils.getCodonChangeString(seqComp, startCodon), expected);
}
 
Example #10
Source File: ProteinChangeInfo.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void initializeForInsertion(final int alignedCodingSequenceAlleleStart, final Strand strand, final String referenceProteinSequence, final String alternateProteinSequence, int proteinChangeStartIndex, final boolean indelIsBetweenCodons, final int numAltAminoAcids, final int numRefAminoAcids) {
    // We render the protein change differently if it's an insertion directly between two codons:
    if (indelIsBetweenCodons) {

        // Get the position of the Amino Acid before the insertion:
        aaStartPos = ((alignedCodingSequenceAlleleStart-1) / AminoAcid.CODON_LENGTH) +
                // If we're on the + strand we need to add 1 to make the amino acid position line up correctly:
                (strand == Strand.POSITIVE ? 1 : 0);
        aaEndPos = aaStartPos + 1;
        refAaSeq = "";
        altAaSeq = alternateProteinSequence.substring(proteinChangeStartIndex, proteinChangeStartIndex + numAltAminoAcids );
    }
    else {
        // To start with, we fill in the information naively corresponding to the potentially
        // changed amino acid sequence:
        proteinChangeStartIndex = ((alignedCodingSequenceAlleleStart-1) / AminoAcid.CODON_LENGTH);

        aaStartPos = proteinChangeStartIndex + 1;
        aaEndPos = aaStartPos + numRefAminoAcids;

        refAaSeq = referenceProteinSequence.substring(proteinChangeStartIndex, aaEndPos);
        altAaSeq = alternateProteinSequence.substring(proteinChangeStartIndex, aaStartPos + numAltAminoAcids);

        // Trim our state for this insertion:
        trimInsertionProteinChangeVariables();
    }
}
 
Example #11
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test (dataProvider = "provideDataForTestGetBasesInWindowAroundReferenceAllele")
void testGetBasesInWindowAroundReferenceAllele(final Allele refAllele,
                                               final ReferenceContext referenceContext,
                                               final Strand strand,
                                               final int referenceWindow,
                                               final String expected) {

    final StrandCorrectedReferenceBases basesInWindow = FuncotatorUtils.getBasesInWindowAroundReferenceAllele(refAllele, referenceContext, strand, referenceWindow);
    Assert.assertEquals( basesInWindow, new StrandCorrectedReferenceBases(expected, strand) );
}
 
Example #12
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
Object[][] provideReferenceAndExonListForGatkExceptions() {

    return new Object[][] {
            {
                    new ReferenceContext(new ReferenceFileSource(TEST_REFERENCE), new SimpleInterval(TEST_REFERENCE_CONTIG, TEST_REFERENCE_START, TEST_REFERENCE_END)),
                    Collections.singletonList(
                            new SimpleInterval("2", TEST_REFERENCE_START + 500, TEST_REFERENCE_START + 550)
                    ),
                    Strand.POSITIVE
            },
            {
                    new ReferenceContext(new ReferenceFileSource(TEST_REFERENCE), new SimpleInterval(TEST_REFERENCE_CONTIG, TEST_REFERENCE_START, TEST_REFERENCE_END)),
                    Collections.singletonList(
                            new SimpleInterval("2", TEST_REFERENCE_START + 500, TEST_REFERENCE_START + 550)
                    ),
                    Strand.NEGATIVE
            },
            {
                    new ReferenceContext(new ReferenceFileSource(TEST_REFERENCE), new SimpleInterval(TEST_REFERENCE_CONTIG, TEST_REFERENCE_START, TEST_REFERENCE_END)),
                    Collections.singletonList(
                            new SimpleInterval("2", TEST_REFERENCE_START + 500, TEST_REFERENCE_START + 550)
                    ),
                    Strand.NONE
            },
    };
}
 
Example #13
Source File: GencodeGtfFeature.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Converts the given {@link String} into a {@link Strand}.
 * @param s {@link String} to convert into a {@link Strand}.
 * @return The {@link Strand} corresponding to {@code s}.
 */
private static Strand convertStringToStrand( final String s ) {
    if ( s.equals("+") ) {
        return Strand.POSITIVE;
    }
    else if ( s.equals("-") ) {
        return Strand.NEGATIVE;
    }
    else {
        throw new IllegalArgumentException("Unexpected value: " + s);
    }
}
 
Example #14
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "provideDataForTestGetAlignedRefAllele")
void testGetAlignedRefAllele( final String referenceSnippet,
                              final int referencePadding,
                              final Allele refAllele,
                              final int codingSequenceRefAlleleStart,
                              final int alignedRefAlleleStart,
                              final Strand strand,
                              final String expected) {

    // Make a Dummy Locatable for Logging:
    final Locatable dummyLocatableForLogging = new SimpleInterval("ReferenceSnippet", 1, 100);

    final Allele altAllele = Allele.create(Utils.dupChar('A', refAllele.length()));

    Assert.assertEquals(
            FuncotatorUtils.getAlignedRefAllele(
                    new StrandCorrectedReferenceBases(referenceSnippet, strand),
                    referencePadding,
                    refAllele,
                    altAllele,
                    codingSequenceRefAlleleStart,
                    alignedRefAlleleStart,
                    strand,
                    dummyLocatableForLogging),
            expected
    );
}
 
Example #15
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test (dataProvider = "provideDataForGetAlignedCodingSequenceAllele")
void testGetAlignedCodingSequenceAllele(  final String codingSequence,
                            final Integer alignedAlleleStart,
                            final Integer alignedAlleleStop,
                            final Allele refAllele,
                            final Integer refAlleleStart,
                            final Strand strand,
                            final String expected) {
    final String alignedRefAllele = FuncotatorUtils.getAlignedCodingSequenceAllele(codingSequence, alignedAlleleStart, alignedAlleleStop, refAllele, refAlleleStart, strand);
    Assert.assertEquals( alignedRefAllele, expected );
}
 
Example #16
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "provideForTestIsIndelBetweenCodons")
void testIsIndelBetweenCodons(final int codingSequenceAlleleStart,
                              final int alignedCodingSequenceAlleleStart,
                              final String refAllele,
                              final Strand strand,
                              final boolean expected) {

    Assert.assertEquals(
            FuncotatorUtils.isIndelBetweenCodons(codingSequenceAlleleStart, alignedCodingSequenceAlleleStart, refAllele, strand),
            expected
    );
}
 
Example #17
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
Object[][] provideDataForTestAssertValidStrand_InvalidStrands() {
    return new Object[][] {
            { null },
            { Strand.NONE }
    };
}
 
Example #18
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Gets the next (+ strand) or previous (- strand) complete in-frame codon from the given {@link ReferenceSequence}
 * according to the current codon position and strand.
 * @param referenceSequence The {@link ReferenceSequence} containing the complete coding sequence for the transcript on which the current variant occurs.  Must not be {@code null}.
 * @param currentAlignedCodingSequenceAlleleStart The starting position (1-based, inclusive) of the current codon.  Must be > 0.
 * @param currentAlignedCodingSequenceAlleleStop The ending position (1-based, inclusive) of the current codon.  Must be > 0.
 * @param strand The {@link Strand} on which the current codon resides.  Must not be {@code null}.  Must not be {@link Strand#NONE}.
 * @return The next (+ strand) or previous (- strand) codon in frame with the current codon as specified by the given current codon positions.
 */
private static String getAdjacentReferenceCodon(final ReferenceSequence referenceSequence,
                                                final int currentAlignedCodingSequenceAlleleStart,
                                                final int currentAlignedCodingSequenceAlleleStop,
                                                final Strand strand) {

    Utils.nonNull( referenceSequence );
    ParamUtils.isPositive(currentAlignedCodingSequenceAlleleStart, "Genomic positions must be > 0.");
    ParamUtils.isPositive(currentAlignedCodingSequenceAlleleStop, "Genomic positions must be > 0.");
    assertValidStrand(strand);

    final String nextRefCodon;
    if ( strand == Strand.POSITIVE ) {

        // Add AminoAcid.CODON_LENGTH to get the "next" codon on the - strand:
        final int endex = currentAlignedCodingSequenceAlleleStop + AminoAcid.CODON_LENGTH;

        // Make sure we don't try to get bases after the end of our reference sequence:
        if ( endex >= referenceSequence.getBaseString().length() ) {
            nextRefCodon = "";
        }
        else {
            nextRefCodon = referenceSequence.getBaseString().substring(currentAlignedCodingSequenceAlleleStop, endex);
        }
    }
    else {
        // Make sure we don't try to get bases before the start of our reference sequence:
        if ( currentAlignedCodingSequenceAlleleStart == 1 ) {
            nextRefCodon = "";
        }
        else {
            // Subtract 1 because of 1-inclusive genomic positions
            // Subtract AminoAcid.CODON_LENGTH to get the "next" codon on the - strand:
            nextRefCodon = referenceSequence.getBaseString().substring(currentAlignedCodingSequenceAlleleStart - 1 - AminoAcid.CODON_LENGTH, currentAlignedCodingSequenceAlleleStart - 1);
        }
    }
    return nextRefCodon;
}
 
Example #19
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
    Object[][] provideForTestGetCodingSequenceChangeString() {

//        final int codingSequenceAlleleStart,
//        final String referenceAllele,
//        final String alternateAllele,
//        final Strand strand,
//        final String expected

        // + Strand tests from PIK3CA
        // - Strand tests from MUC16

        // TODO: Add tests for the deletion case requiring exon start/end/allele position.

        return new Object[][] {
                // SNP
                { 1632, "A", "T", Strand.POSITIVE, "c.1632A>T" },
                // ONP (|ONP| > 1)
                { 1633, "CT", "GG", Strand.POSITIVE, "c.1633_1634CT>GG" },
                // Insertion (+ Strand)
                { 1634, "T", "TGG", Strand.POSITIVE, "c.1634_1635insGG" },
                // Insertion (- Strand)
                { 43303, "A", "TCA", Strand.NEGATIVE, "c.43302_43303insTC" },
                // Deletion (+ Strand)
                { 1634, "TGAG", "T", Strand.POSITIVE, "c.1635_1637delGAG" },
                // Deletion (- Strand)
                { 43138, "TCT", "T", Strand.NEGATIVE, "c.43138_43139delTC" },
        };
    }
 
Example #20
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test (dataProvider = "provideDataForTestGetOverlappingExonPositions")
void testGetOverlappingExonPositions(final Allele refAllele,
                                     final Allele altAllele,
                                     final String contig,
                                     final int start,
                                     final int stop,
                                     final Strand strand,
                                     final List<? extends htsjdk.samtools.util.Locatable> exonPositionList,
                                     final SimpleInterval expected) {
    Assert.assertEquals( FuncotatorUtils.getOverlappingExonPositions(refAllele, altAllele, contig, start, stop, strand, exonPositionList), expected);
}
 
Example #21
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static Object[] helpCreateDataForTestGetBasesInWindowAroundReferenceAllele(final String refAlleleBases,
                                                                                   final String strand,
                                                                                   final int windowSizeInBases,
                                                                                   final int startPos,
                                                                                   final int endPos,
                                                                                   final String expected) {
    return new Object[] {
        Allele.create(refAlleleBases, true),
        new ReferenceContext( refDataSourceHg19Ch3, new SimpleInterval("chr3", startPos, endPos) ),
        Strand.decode(strand),
        windowSizeInBases,
        expected
    };
}
 
Example #22
Source File: FuncotatorTestUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * No checking for validity is done in this method.
 *
 * @param dummyTranscriptName An aritrary string.  Never {@code null}
 * @param dummyVariantContext An aritrary {@link VariantContext}.  Never {@code null}
 * @return Never {@code null}
 */
public static Funcotation createDummyGencodeFuncotation(final String dummyTranscriptName, final VariantContext dummyVariantContext) {
    Utils.nonNull(dummyTranscriptName);
    Utils.nonNull(dummyVariantContext);
    return createGencodeFuncotation("GENE","b37", dummyVariantContext.getContig(), dummyVariantContext.getStart(),dummyVariantContext.getEnd(),
            GencodeFuncotation.VariantClassification.DE_NOVO_START_IN_FRAME, null, GencodeFuncotation.VariantType.SNP,
            dummyVariantContext.getReference().getDisplayString(),
            dummyVariantContext.getAlternateAllele(0).getDisplayString(), "g.1000000"+ dummyVariantContext.getReference().getDisplayString() + ">" + dummyVariantContext.getAlternateAllele(0).getDisplayString(),
            dummyTranscriptName, Strand.FORWARD,
    1, 1500, 1500,
    " ", " ",
    "p.L300P", 0.5,
    "ACTGATCGATCGA",Collections.singletonList("FAKE00002.5"), "27");
}
 
Example #23
Source File: FuncotatorTestUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Creates an artifical GencodeGtfTranscriptFeature for testing with dummy values for all fields except
 * for the contig, start, stop, and strand.
 *
 * @param contig Contig that should be assigned to the new GencodeGtfTranscriptFeature
 * @param start Start position that should be assigned to the new GencodeGtfTranscriptFeature
 * @param stop Stop position that should be assigned to the new GencodeGtfTranscriptFeature
 * @param strand Strand that should be assigned to the new GencodeGtfTranscriptFeature
 * @return A new GencodeGtfTranscriptFeature with the specified contig, start, stop, and strand, and dummy
 *         values for all other fields
 */
public static GencodeGtfTranscriptFeature createArtificialGencodeGtfTranscriptFeatureForTesting( final String contig, final int start, final int stop, final Strand strand) {
    return (GencodeGtfTranscriptFeature)GencodeGtfTranscriptFeature.create(
            new GencodeGtfFeatureBaseData(
                    GencodeGtfCodec.GTF_FILE_TYPE_STRING,
                    2,
                    contig,
                    GencodeGtfFeature.ANNOTATION_SOURCE_ENSEMBL,
                    GencodeGtfFeature.FeatureType.TRANSCRIPT,
                    start,
                    stop,
                    strand,
                    GencodeGtfFeature.GenomicPhase.DOT,
                    "FakeGeneID",
                    "FakeTranscriptID",
                    GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
                    null,
                    "FakeGeneName",
                    GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
                    null,
                    "FakeTranscriptName",
                    -1,
                    null,
                    GencodeGtfFeature.LocusLevel.AUTOMATICALLY_ANNOTATED,
                    Collections.emptyList(),
                    null)
    );
}
 
Example #24
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Get the strand-corrected (reverse complemented) {@link Allele} for the given {@link Allele} and {@link Strand}.
 * @param allele The {@link Allele} to correct for strandedness.
 * @param strand The {@link Strand} on which the given {@code allele} lies.  Must be valid as per {@link #assertValidStrand(Strand)}
 * @return The {@link Allele} with sequence corrected for strand.
 */
public static Allele getStrandCorrectedAllele(final Allele allele, final Strand strand) {
    assertValidStrand(strand);

    if ( strand == Strand.POSITIVE ) {
        return Allele.create(allele, false);
    }
    else {
        return Allele.create(ReadUtils.getBasesReverseComplement(allele.getBases()), false);
    }
}
 
Example #25
Source File: GencodeFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a placeholder funcotation on a given {@code variant} and {@code allele} pair for a case that funcotator
 * cannot yet handle, or would currently get wrong.
 * Primarily this occurs when a variant is long and spans multiple types of {@link GencodeGtfFeature}s
 * (i.e. it starts in an intron and ends in an exon or visa-versa). or is
 * long and begins in a transcript and extends beyond a given transcript's end point.
 * There are two such cases right now as manifested in the following issues:
 *     https://github.com/broadinstitute/gatk/issues/3749
 *     https://github.com/broadinstitute/gatk/issues/4307
 * As noted in the above issues, other functional annotation tools also get these kinds of cases wrong.
 * @param variant The {@link VariantContext} to annotate.
 * @param altAllele The alternate {@link Allele} to annotate.
 * @param reference The {@link ReferenceContext} for the given {@code variant}.
 * @param transcript The {@link GencodeGtfTranscriptFeature} which is being used to annotate the given {@code variant}.
 * @param version A {@link String} representing the version of the {@link GencodeFuncotationFactory} being used to annotate the given {@code variant}.
 * @param dataSourceName A {@link String} containing the name of the data source instance.
 * @param ncbiBuildVersion NCBI build version
 * @return A placeholder {@link GencodeFuncotation} for the given {@code variant}.
 */
@VisibleForTesting
static final GencodeFuncotation createDefaultFuncotationsOnProblemVariant( final VariantContext variant,
                                                                           final Allele altAllele,
                                                                           final ReferenceContext reference,
                                                                           final GencodeGtfTranscriptFeature transcript,
                                                                           final String version,
                                                                           final String dataSourceName,
                                                                           final String ncbiBuildVersion) {
    // Create basic annotation information:
    final GencodeFuncotationBuilder gencodeFuncotationBuilder = createGencodeFuncotationBuilderWithTrivialFieldsPopulated(variant, altAllele, transcript, ncbiBuildVersion);

    // Set our version:
    gencodeFuncotationBuilder.setVersion(version);

    // Get the reference bases for our current variant:
    final StrandCorrectedReferenceBases referenceBases = FuncotatorUtils.createReferenceSnippet(variant.getReference(), altAllele, reference, transcript.getGenomicStrand(), referenceWindow);

    // Set the reference context with the bases from the sequence comparison
    // NOTE: The reference context is ALWAYS from the + strand, so we need to reverse our bases back in the - case:
    gencodeFuncotationBuilder.setReferenceContext(referenceBases.getBaseString(Strand.POSITIVE));

    // Get GC Content:
    gencodeFuncotationBuilder.setGcContent( calculateGcContent( variant.getReference(), altAllele, reference, gcContentWindowSizeBases ) );

    gencodeFuncotationBuilder.setVariantClassification(GencodeFuncotation.VariantClassification.COULD_NOT_DETERMINE);

    gencodeFuncotationBuilder.setDataSourceName(dataSourceName);

    return gencodeFuncotationBuilder.build();
}
 
Example #26
Source File: IlluminaManifestRecord.java    From picard with MIT License 5 votes vote down vote up
private Strand getRefStrandFromManifest(final Map<String, Integer> columnNameToIndex) {
    final String strandValue = getColumnValueIfPresentInManifest(columnNameToIndex, IlluminaManifest.REF_STRAND_HEADER_NAME);
    if (strandValue == null) {
        return Strand.NONE;
    }
    return Strand.decode(strandValue.charAt(0));
}
 
Example #27
Source File: GencodeFuncotationBuilder.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Set the Strand in the {@link GencodeFuncotation}.
 * @param strand The reference {@link Strand} for the {@link GencodeFuncotation}.
 * @return {@code this} {@link GencodeFuncotationBuilder}
 */
public GencodeFuncotationBuilder setStrand( final Strand strand ) {
    switch(strand) {
        case POSITIVE: gencodeFuncotation.setTranscriptStrand( "+" ); break;
        case NEGATIVE: gencodeFuncotation.setTranscriptStrand( "-"); break;
        default:
            throw new GATKException.ShouldNeverReachHereException("An invalid Strand type was specified.  Strand must be POSITIVE or NEGATIVE.");
    }
    return this;
}
 
Example #28
Source File: SegmentExonUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static String determineSegmentOverlapDirection(final Strand strand, final boolean isSegmentStart) {
    if (isSegmentStart ^ (strand == Strand.POSITIVE)) {
        return AND_BELOW_STR;
    } else {
        return AND_ABOVE_STR;
    }
}
 
Example #29
Source File: DataProviderForExampleGencodeGtfGene.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static GencodeGtfUTRFeature create3pUtr(final AtomicInteger featureOrderNum, final String contig,
                                                final int length3Utr, final String geneName,
                                                final GencodeGtfExonFeature exon, final Strand codingDirection) {
    final int start = codingDirection == Strand.FORWARD ? exon.getGenomicEndLocation() - length3Utr + 1 : exon.getGenomicStartLocation();
    final int end = codingDirection == Strand.FORWARD ? exon.getGenomicEndLocation() : exon.getGenomicStartLocation() + length3Utr - 1;
    final GencodeGtfFeatureBaseData tmp3pUtr = new GencodeGtfFeatureBaseData(GencodeGtfCodec.GTF_FILE_TYPE_STRING, featureOrderNum.getAndIncrement(), contig, GencodeGtfFeature.ANNOTATION_SOURCE_ENSEMBL, GencodeGtfFeature.FeatureType.UTR,
            start, end, codingDirection, GencodeGtfFeature.GenomicPhase.DOT, "TEST_GENE1", "TEST_TRANSCRIPT1", GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
            null, geneName, GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING, null, "TEST_TRANSCRIPT1", exon.getExonNumber(), exon.getExonId(), GencodeGtfFeature.LocusLevel.AUTOMATICALLY_ANNOTATED,
            Collections.emptyList(),
            null
    );
    return (GencodeGtfUTRFeature) GencodeGtfFeature.create(tmp3pUtr);
}
 
Example #30
Source File: GencodeFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Creates a {@link GencodeFuncotation}s based on the given {@link Allele} with type
 * {@link GencodeFuncotation.VariantClassification#IGR}.
 * Reports reference bases as if they are on the {@link Strand#POSITIVE} strand.
 * @param variant The {@link VariantContext} associated with this annotation.
 * @param altAllele The alternate {@link Allele} to use for this {@link GencodeFuncotation}.
 * @param reference The {@link ReferenceContext} in which the given {@link Allele}s appear.
 * @return An IGR funcotation for the given allele.
 */
private GencodeFuncotation createIgrFuncotation(final VariantContext variant,
                                                final Allele altAllele,
                                                final ReferenceContext reference){

    final GencodeFuncotationBuilder funcotationBuilder = new GencodeFuncotationBuilder();

    // Get GC Content:
    funcotationBuilder.setGcContent( calculateGcContent( variant.getReference(), altAllele, reference, gcContentWindowSizeBases ) );

    final String alleleString = altAllele.getBaseString().isEmpty() ? altAllele.toString() : altAllele.getBaseString();

    funcotationBuilder.setVariantClassification( GencodeFuncotation.VariantClassification.IGR )
                      .setRefAllele( variant.getReference() )
                      .setTumorSeqAllele2( alleleString )
                      .setStart(variant.getStart())
                      .setEnd(variant.getEnd())
                      .setVariantType(getVariantType(variant.getReference(), altAllele))
                      .setChromosome(variant.getContig())
                      .setAnnotationTranscript(FuncotationMap.NO_TRANSCRIPT_AVAILABLE_KEY);

    if ( (!altAllele.isSymbolic()) && (!altAllele.equals(Allele.SPAN_DEL)) ) {
        funcotationBuilder.setGenomeChange(getGenomeChangeString(variant, altAllele));
    }

    funcotationBuilder.setNcbiBuild(ncbiBuildVersion);

    // Set our reference context in the the FuncotatonBuilder:
    funcotationBuilder.setReferenceContext(FuncotatorUtils.createReferenceSnippet(variant.getReference(), altAllele, reference, Strand.POSITIVE, referenceWindow).getBaseString());

    // Set our version:
    funcotationBuilder.setVersion(version);

    // Set our data source name:
    funcotationBuilder.setDataSourceName(getName());

    return funcotationBuilder.build();
}