Java Code Examples for htsjdk.tribble.annotation.Strand#NEGATIVE

The following examples show how to use htsjdk.tribble.annotation.Strand#NEGATIVE . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GencodeGtfFeature.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Converts the given {@link String} into a {@link Strand}.
 * @param s {@link String} to convert into a {@link Strand}.
 * @return The {@link Strand} corresponding to {@code s}.
 */
private static Strand convertStringToStrand( final String s ) {
    if ( s.equals("+") ) {
        return Strand.POSITIVE;
    }
    else if ( s.equals("-") ) {
        return Strand.NEGATIVE;
    }
    else {
        throw new IllegalArgumentException("Unexpected value: " + s);
    }
}
 
Example 2
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
Object[][] provideDataForTestAssertValidStrand_ValidStrands() {
    return new Object[][] {
            { Strand.POSITIVE },
            { Strand.NEGATIVE },
    };
}
 
Example 3
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
Object[][] provideDataForTestCreateSpliceSiteCodonChange() {

    return new Object[][] {
            {1000, 5, 1000, 1500, Strand.POSITIVE, 0, "c.e5-0"},
            {1000, 4, 1, 1500, Strand.POSITIVE,    0, "c.e4+500"},
            {1000, 3, 500, 1500, Strand.POSITIVE,  0, "c.e3-500"},

            {1000, 5, 1000, 1500, Strand.NEGATIVE, 0, "c.e5+0"},
            {1000, 4, 1, 1500, Strand.NEGATIVE,    0, "c.e4-500"},
            {1000, 3, 500, 1500, Strand.NEGATIVE,  0, "c.e3+500"},

            {1000, 5, 1500, 500, Strand.NEGATIVE,  0, "c.e5+500"},

            {1000, 5, 1000, 1500, Strand.POSITIVE, 1, "c.e5+1"},
            {1000, 4, 1, 1500, Strand.POSITIVE,    2, "c.e4+502"},
            {1000, 3, 500, 1500, Strand.POSITIVE,  3, "c.e3-497"},

            {1000, 5, 1000, 1500, Strand.NEGATIVE, 4, "c.e5+4"},
            {1000, 4, 1, 1500, Strand.NEGATIVE,    5, "c.e4-495"},
            {1000, 3, 500, 1500, Strand.NEGATIVE,  6, "c.e3+506"},

            {1000, 5, 1500, 500, Strand.NEGATIVE,  7, "c.e5+507"},

            {1000, 5, 1000, 1500, Strand.POSITIVE, -1, "c.e5-1"},
            {1000, 4, 1, 1500, Strand.POSITIVE,    -2, "c.e4+498"},
            {1000, 3, 500, 1500, Strand.POSITIVE,  -3, "c.e3-503"},

            {1000, 5, 1000, 1500, Strand.NEGATIVE, -4, "c.e5-4"},
            {1000, 4, 1, 1500, Strand.NEGATIVE,    -5, "c.e4-505"},
            {1000, 3, 500, 1500, Strand.NEGATIVE,  -6, "c.e3+494"},

            {1000, 5, 1500, 500, Strand.NEGATIVE,  -7, "c.e5+493"},
    };
}
 
Example 4
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
Object[][] provideForTestIsIndelBetweenCodons() {

    // Arrays have the following fields:
    // codingSequenceAlleleStart, alignedCodingSequenceAlleleStart, refAllele, strand, expected

    return new Object[][] {
            // + Strand:
            { 4,  4, "A",   Strand.POSITIVE, false },
            { 5,  4, "A",   Strand.POSITIVE, false },
            { 6,  4, "A",   Strand.POSITIVE, true },
            { 4,  4, "AC",  Strand.POSITIVE, false },
            { 5,  4, "AC",  Strand.POSITIVE, true },
            { 6,  4, "AC",  Strand.POSITIVE, false },
            { 4,  4, "ACT", Strand.POSITIVE, true },
            { 5,  4, "ACT", Strand.POSITIVE, false },
            { 6,  4, "ACT", Strand.POSITIVE, false },

            // - Strand:
            { 4,  4, "A",   Strand.NEGATIVE, true },
            { 5,  4, "A",   Strand.NEGATIVE, false },
            { 6,  4, "A",   Strand.NEGATIVE, false },
            { 4,  4, "AC",  Strand.NEGATIVE, true },
            { 5,  4, "AC",  Strand.NEGATIVE, false },
            { 6,  4, "AC",  Strand.NEGATIVE, false },
            { 4,  4, "ACT", Strand.NEGATIVE, true },
            { 5,  4, "ACT", Strand.NEGATIVE, false },
            { 6,  4, "ACT", Strand.NEGATIVE, false },
    };
}
 
Example 5
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
    Object[][] provideForTestGetCodingSequenceChangeString() {

//        final int codingSequenceAlleleStart,
//        final String referenceAllele,
//        final String alternateAllele,
//        final Strand strand,
//        final String expected

        // + Strand tests from PIK3CA
        // - Strand tests from MUC16

        // TODO: Add tests for the deletion case requiring exon start/end/allele position.

        return new Object[][] {
                // SNP
                { 1632, "A", "T", Strand.POSITIVE, "c.1632A>T" },
                // ONP (|ONP| > 1)
                { 1633, "CT", "GG", Strand.POSITIVE, "c.1633_1634CT>GG" },
                // Insertion (+ Strand)
                { 1634, "T", "TGG", Strand.POSITIVE, "c.1634_1635insGG" },
                // Insertion (- Strand)
                { 43303, "A", "TCA", Strand.NEGATIVE, "c.43302_43303insTC" },
                // Deletion (+ Strand)
                { 1634, "TGAG", "T", Strand.POSITIVE, "c.1635_1637delGAG" },
                // Deletion (- Strand)
                { 43138, "TCT", "T", Strand.NEGATIVE, "c.43138_43139delTC" },
        };
    }
 
Example 6
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider
Object[][] provideReferenceAndExonListForGatkExceptions() {

    return new Object[][] {
            {
                    new ReferenceContext(new ReferenceFileSource(TEST_REFERENCE), new SimpleInterval(TEST_REFERENCE_CONTIG, TEST_REFERENCE_START, TEST_REFERENCE_END)),
                    Collections.singletonList(
                            new SimpleInterval("2", TEST_REFERENCE_START + 500, TEST_REFERENCE_START + 550)
                    ),
                    Strand.POSITIVE
            },
            {
                    new ReferenceContext(new ReferenceFileSource(TEST_REFERENCE), new SimpleInterval(TEST_REFERENCE_CONTIG, TEST_REFERENCE_START, TEST_REFERENCE_END)),
                    Collections.singletonList(
                            new SimpleInterval("2", TEST_REFERENCE_START + 500, TEST_REFERENCE_START + 550)
                    ),
                    Strand.NEGATIVE
            },
            {
                    new ReferenceContext(new ReferenceFileSource(TEST_REFERENCE), new SimpleInterval(TEST_REFERENCE_CONTIG, TEST_REFERENCE_START, TEST_REFERENCE_END)),
                    Collections.singletonList(
                            new SimpleInterval("2", TEST_REFERENCE_START + 500, TEST_REFERENCE_START + 550)
                    ),
                    Strand.NONE
            },
    };
}
 
Example 7
Source File: SegmentExonUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @param transcript Never {@code null}
 * @param segment Genomic region to determine which exons are covered in the transcript.  Never {@code null}
 * @return Instance of {@link SegmentExonOverlaps}.  A typical value will be a number appended to a "-" or a "+".
 *   The exon numbers are 0-based.  "-" if it covers upstream exons (considering coding direction -- i.e. previous exons
 *   are higher genomic coordinates on negative strand transcripts) and "+" for downstream exons.
 *   For example:
 *   - "6+" would be sixth exon and above.  This would indicate downstream exons (higher genomic
 *     coordinates on positive coding directions and lower genomic coordinates when negative coding) are covered by
 *     the segment.
 *   - "" indicates that the endpoints of the segment do not cover any transcript exons.  So either the entire
 *      transcript is covered or none of the transcript is covered.
 *
 *   Note that this will return the first exon covered, as long as the breakpoint is in the transcript.  Even if the
 *    breakpoint itself does not cover an exon.
 *    For example, this will yield a non-blank value when a segment breakpoint is in an intron.
 */
public static SegmentExonOverlaps determineSegmentExonPosition(final GencodeGtfTranscriptFeature transcript, final Locatable segment) {

    Utils.nonNull(transcript);
    Utils.nonNull(segment);

    final String NOT_IN_TRANSCRIPT = "";

    // Internally, this method assumes that the exons are sorted in genomic order.  Which is NOT how these are
    //  stored in the GencodeGtfTranscriptFeature
    final List<GencodeGtfExonFeature> exons = transcript.getGenomicStrand() == Strand.NEGATIVE ?
            Lists.reverse(transcript.getExons()) : transcript.getExons();

    if (exons.size() == 0) {
        return new SegmentExonOverlaps(NOT_IN_TRANSCRIPT, NOT_IN_TRANSCRIPT);
    }

    final SimpleInterval segmentStart = new SimpleInterval(segment.getContig(), segment.getStart(), segment.getStart());
    final SimpleInterval segmentEnd = new SimpleInterval(segment.getContig(), segment.getEnd(), segment.getEnd());

    // Find the proper index for the start.
    // If the start of the segment does not overlap the first exon, but the segment does, then the start index is -1 (no overlap)
    final int inclusiveIndexPositiveDirectionStart = findInclusiveExonIndex(transcript.getGenomicStrand(), exons, segmentStart, true);

    // If the end of the segment does not overlap the last exon, but the segment does, then the end index is -1 (no overlap)
    final int inclusiveIndexPositiveDirectionEnd = findInclusiveExonIndex(transcript.getGenomicStrand(), exons, segmentEnd, false);

    // Construct the final strings
    final String startResult = inclusiveIndexPositiveDirectionStart != NO_EXON_OVERLAP ?
            inclusiveIndexPositiveDirectionStart + determineSegmentOverlapDirection(transcript.getGenomicStrand(), true)
            : NOT_IN_TRANSCRIPT;
    final String endResult = inclusiveIndexPositiveDirectionEnd != NO_EXON_OVERLAP ?
            inclusiveIndexPositiveDirectionEnd + determineSegmentOverlapDirection(transcript.getGenomicStrand(), false)
            : NOT_IN_TRANSCRIPT;

    return new SegmentExonOverlaps(startResult, endResult);
}
 
Example 8
Source File: SegmentExonUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static int findInclusiveExonIndex(final Strand codingDirection, final List<GencodeGtfExonFeature> exons,
                                          final SimpleInterval pointLocation, final boolean isStartOfSegment) {
    int result = NO_EXON_OVERLAP;
    if (exons.size() == 0) {
        return result;
    }

    final IntervalTree<GencodeGtfExonFeature> exonFeatureIntervalTree = new IntervalTree<>();
    exons.forEach(e -> exonFeatureIntervalTree.put(e.getGenomicStartLocation(), e.getGenomicEndLocation(), e));

    final IntervalTree.Node<GencodeGtfExonFeature> gencodeGtfExonFeatureNode = exonFeatureIntervalTree.minOverlapper(pointLocation.getStart(), pointLocation.getEnd());
    final GencodeGtfExonFeature exon = gencodeGtfExonFeatureNode != null ? gencodeGtfExonFeatureNode.getValue() : null;

    // We need to figure out if the pointLocation is in an intron
    final boolean isOverlapExonExtents = IntervalUtils.overlaps(pointLocation, new SimpleInterval(pointLocation.getContig(), exons.get(0).getGenomicStartLocation(),
            exons.get(exons.size()-1).getGenomicEndLocation()));

    final boolean isIntron = (exon == null) && isOverlapExonExtents;

    if ((exon == null) && !isIntron) {
        result = NO_EXON_OVERLAP;
    } else if (isIntron) {

        // Correction depending on whether we are looking at an end point or beginning of a segment.
        int increment = (codingDirection == Strand.POSITIVE ?
                (isStartOfSegment ? 0 : -1) : (isStartOfSegment ? -1 : 0));

        // Build an intron map
        for (int i = 1; i < exons.size(); i ++) {
            final GencodeGtfExonFeature currentExon =  exons.get(i);
            final GencodeGtfExonFeature prevExon =  exons.get(i-1);

            final Locatable intron = new SimpleInterval(currentExon.getChromosomeName(), prevExon.getEnd() + 1, currentExon.getStart() - 1);
            if (IntervalUtils.overlaps(pointLocation, intron)) {
                result = (codingDirection == Strand.NEGATIVE ? exons.size() - i + increment: i + increment);
            }
        }
    } else {
        result = exon.getExonNumber() - 1;
    }
    return result;
}
 
Example 9
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Get the overlapping exon start/stop as a {@link SimpleInterval} for the given altAllele / reference.
 * @param refAllele {@link Allele} for the given {@code altAllele}.  Must not be {@code null}.
 * @param altAllele {@link Allele} to locate on an exon.  Must not be {@code null}.
 * @param contig Contig on which the altAllele occurs.  Must not be {@code null}.
 * @param variantStart Start position (1-based, inclusive) of the given {@code altAllele}.  Must be > 0.
 * @param variantEnd End position (1-based, inclusive) of the given {@code altAllele}.  Must be > 0.
 * @param exonPositionList List of exon start / stop positions to cross-reference with the given {@code altAllele}.  Must not be {@code null}.
 * @param strand The {@link Strand} on which the {@code altAllele} is located.  Must not be {@code null}.  Must not be {@link Strand#NONE}.
 * @return A {@link SimpleInterval} containing the extents of the exon that overlaps the given altAllele.  {@code null} if no overlap occurs.
 */
public static SimpleInterval getOverlappingExonPositions(final Allele refAllele,
                                                         final Allele altAllele,
                                                         final String contig,
                                                         final int variantStart,
                                                         final int variantEnd,
                                                         final Strand strand,
                                                         final List<? extends htsjdk.samtools.util.Locatable> exonPositionList) {
    Utils.nonNull( refAllele );
    Utils.nonNull( altAllele );
    Utils.nonNull( contig );
    Utils.nonNull( exonPositionList );
    assertValidStrand( strand );

    ParamUtils.isPositive( variantStart, "Genome positions must be > 0." );
    ParamUtils.isPositive( variantEnd, "Genome positions must be > 0." );

    // Get the correct start / end positions:
    int alleleStart = variantStart;
    int alleleEnd   = variantEnd;

    if ( refAllele.length() > refAllele.length() ) {
        final int lengthAdjustment = (refAllele.length() - altAllele.length());
        if ( strand == Strand.NEGATIVE ) {
            alleleStart -= lengthAdjustment;
        }
        else {
            alleleEnd   += lengthAdjustment;
        }
    }

    // Create an interval so we can check for overlap:
    final SimpleInterval variantInterval = new SimpleInterval( contig, alleleStart, alleleEnd );

    int exonStart = -1;
    int exonStop  = -1;

    // Check for overlap:
    for ( final Locatable exon : exonPositionList ) {
        if ( variantInterval.overlaps(exon) ) {
            exonStart = exon.getStart();
            exonStop  = exon.getEnd();
        }
    }

    // Since we set the start/stop together we only need to check one of these:
    if ( exonStart == -1 ) {
        return null;
    }
    else {
        return new SimpleInterval( contig, exonStart, exonStop );
    }
}
 
Example 10
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@DataProvider
Object[][] provideDataForGetStartPositionInTranscript() {

    final List<? extends Locatable> exons_forward = Arrays.asList(
            new SimpleInterval("chr1", 10,19),
            new SimpleInterval("chr1", 30,39),
            new SimpleInterval("chr1", 50,59),
            new SimpleInterval("chr1", 70,79),
            new SimpleInterval("chr1", 90,99)
    );

    final List<? extends Locatable> exons_backward = Arrays.asList(
            new SimpleInterval("chr1", 90,99),
            new SimpleInterval("chr1", 70,79),
            new SimpleInterval("chr1", 50,59),
            new SimpleInterval("chr1", 30,39),
            new SimpleInterval("chr1", 10,19)
    );

    // And a spot-check test from real data:
    final List<? extends Locatable> spot_check_exons = Arrays.asList(
            new SimpleInterval("3", 178916614, 178916965),
            new SimpleInterval("3", 178917478, 178917687),
            new SimpleInterval("3", 178919078, 178919328),
            new SimpleInterval("3", 178921332, 178921577),
            new SimpleInterval("3", 178922291, 178922376),
            new SimpleInterval("3", 178927383, 178927488),
            new SimpleInterval("3", 178927974, 178928126),
            new SimpleInterval("3", 178928219, 178928353),
            new SimpleInterval("3", 178935998, 178936122)
    );

    return new Object[][] {
            { new SimpleInterval("chr1", 1, 1),     exons_forward, Strand.POSITIVE, -1 },
            { new SimpleInterval("chr1", 25, 67),   exons_forward, Strand.POSITIVE, -1 },
            { new SimpleInterval("chr1", 105, 392), exons_forward, Strand.POSITIVE, -1 },
            { new SimpleInterval("chr1", 10, 10),   exons_forward, Strand.POSITIVE,  1 },
            { new SimpleInterval("chr1", 99, 99),   exons_forward, Strand.POSITIVE, 50 },
            { new SimpleInterval("chr1", 50, 67),   exons_forward, Strand.POSITIVE, 21 },
            { new SimpleInterval("chr1", 67, 75),   exons_forward, Strand.POSITIVE, -1 },
            { new SimpleInterval("chr1", 91, 97),   exons_forward, Strand.POSITIVE, 42 },

            { new SimpleInterval("chr1", 1, 1),     exons_backward, Strand.NEGATIVE, -1 },
            { new SimpleInterval("chr1", 25, 67),   exons_backward, Strand.NEGATIVE, -1 },
            { new SimpleInterval("chr1", 105, 392), exons_backward, Strand.NEGATIVE, -1 },
            { new SimpleInterval("chr1", 10, 10),   exons_backward, Strand.NEGATIVE, 50 },
            { new SimpleInterval("chr1", 99, 99),   exons_backward, Strand.NEGATIVE,  1 },
            { new SimpleInterval("chr1", 50, 67),   exons_backward, Strand.NEGATIVE, -1 },
            { new SimpleInterval("chr1", 67, 75),   exons_backward, Strand.NEGATIVE, 15 },

            // Spot check:
            { new SimpleInterval("3", 178936090, 178936096), spot_check_exons, Strand.POSITIVE, 1632 }
    };
}
 
Example 11
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@DataProvider
    Object[][] provideDataForGetCodonChangeString()  {

//        final String refAllele,
//        final String altAllele,
//        final int alleleStart,
//        final int codingSequenceAlleleStart,
//        final int alignedCodingSequenceAlleleStart,
//        final String alignedCodingSeqRefAllele,
//        final String alignedCodingSeqAltAllele,
//        final alignedAlternateAllele,
//        final int alignedRefAlleleStop,
//        final String contig
//        final Strand strand
//        final Locatable startCodon,
//        final ReferenceSequence codingSequence,
//        final String expected

        // PIK3CA is on chr3 and is transcribed in the FORWARD direction:
        final ReferenceDataSource pik3caTranscriptDataSource = ReferenceDataSource.of(new File(FuncotatorTestConstants.GENCODE_DATA_SOURCE_FASTA_PATH_HG19).toPath());
        final String              pik3caFullTranscriptName   = pik3caTranscriptDataSource.getSequenceDictionary().getSequences().stream().filter(s -> s.getSequenceName().startsWith(FuncotatorTestConstants.PIK3CA_TRANSCRIPT)).map(SAMSequenceRecord::getSequenceName).collect(Collectors.joining());
        final ReferenceSequence   pik3caCodingSequence       = pik3caTranscriptDataSource.queryAndPrefetch(pik3caFullTranscriptName, 158, 3364);

        // MUC16 is on chr19 and is transcribed in the REVERSE direction:
        // (The magic numbers here are the coding region for this transscript for MUC16)
        final ReferenceDataSource muc16TranscriptDataSource = ReferenceDataSource.of(new File(FuncotatorTestConstants.GENCODE_DATA_SOURCE_FASTA_PATH_HG19).toPath());
        final String              muc16FullTranscriptName   = muc16TranscriptDataSource.getSequenceDictionary().getSequences().stream().filter(s -> s.getSequenceName().startsWith(FuncotatorTestConstants.MUC16_TRANSCRIPT)).map(SAMSequenceRecord::getSequenceName).collect(Collectors.joining());
        final ReferenceSequence   muc16CodingSequence    = muc16TranscriptDataSource.queryAndPrefetch(muc16FullTranscriptName, 205, 43728);

        final SimpleInterval pik3caStart = new SimpleInterval("chr3", 178916614, 178916616);
        final SimpleInterval muc16Start = new SimpleInterval("chr19", 9091812, 9091814);

        return new Object[][] {
                // ONP
                //     alignedCodingSequenceAlleleStart == alignedReferenceAlleleStop   *
                //     alignedCodingSequenceAlleleStart != alignedReferenceAlleleStop
                { "G", "T", 178921515, 997, 997, "GCA", "TCA", "TCA", 999, pik3caStart.getContig(), Strand.POSITIVE, pik3caCodingSequence, pik3caStart, "c.(997-999)Gca>Tca" },
                { "C", "T", 178921516, 998, 997, "GCA", "GTA", "GTA", 999, pik3caStart.getContig(), Strand.POSITIVE, pik3caCodingSequence, pik3caStart, "c.(997-999)gCa>gTa" },
                { "A", "T", 178921517, 999, 997, "GCA", "GCT", "GCT", 999, pik3caStart.getContig(), Strand.POSITIVE, pik3caCodingSequence, pik3caStart, "c.(997-999)gcA>gcT" },
                // INDEL
                //     StartCodonIndel + strand
                { "A", "AT", 178916614, 1, 1, "ATG", "ATTG", "ATTG", 3, pik3caStart.getContig(), Strand.POSITIVE, pik3caCodingSequence, pik3caStart, "" },
                //     FrameShift                                                       *
                { "G", "GC", 178921515, 997, 997, "GCA", "GCCA", "GCCA", 999, pik3caStart.getContig(), Strand.POSITIVE, pik3caCodingSequence, pik3caStart, "c.(997-999)gcafs" },
                //     In-Frame Insertion
                //          StartCodon - strand
                { "A", "TGTA", 9091814, 1, 1, "ATG", "TGTATG", "TGTATG", 3, muc16Start.getContig(), Strand.NEGATIVE, muc16CodingSequence, muc16Start, "c.(1-3)atg>TGTatg" },
                //              + Strand
                { "A", "AGCG", 178921515, 999, 997, "GCA", "GCAGCG", "GCAGCG", 999, pik3caStart.getContig(), Strand.POSITIVE, pik3caCodingSequence, pik3caStart, "c.(1000-1002)ctc>GCGctc" },
                //              - Strand
                { "T", "TTCG", 9091794, 21, 19, "CTT", "CTCGTT", "CTCGTT", 21, muc16Start.getContig(), Strand.NEGATIVE, muc16CodingSequence, muc16Start, "c.(19-21)ctt>ctCGTt" },
                //          Within a codon:
                //              + Strand
                { "G", "GCAG", 178921515, 997, 997, "GCA", "GCAGCA", "GCAGCA", 999, pik3caStart.getContig(), Strand.POSITIVE, pik3caCodingSequence, pik3caStart, "c.(997-999)gca>gCAGca" },
                //              - Strand
                { "C", "CG", 9091793, 22, 22, "CCT", "CGCT", "CGCT", 24, muc16Start.getContig(), Strand.NEGATIVE, muc16CodingSequence, muc16Start, "c.(22-24)cctfs" },
                //     In-Frame Deletion
                //          Between Codons
                //              + Strand
                { "TGCA", "T", 178921514, 996, 994, "AGTGCA", "AGT", "AGT", 996, pik3caStart.getContig(), Strand.POSITIVE, pik3caCodingSequence, pik3caStart, "c.(997-999)gcadel" },
                //              - Strand
                { "TCCT", "T", 9091794, 21, 19, "CTTCCT", "CTT", "CTT", 21, muc16Start.getContig(), Strand.NEGATIVE, muc16CodingSequence, muc16Start, "c.(19-24)cttcct>ctt" },
                //          Within a codon
                { "GCAC", "G", 178921515, 997, 997, "GCACTC", "GTC", "GTC", 999, pik3caStart.getContig(), Strand.POSITIVE, pik3caCodingSequence, pik3caStart, "c.(997-1002)gcactc>gtc" },
        };
    }
 
Example 12
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Get the aligned coding sequence for the given reference allele.
 * NOTE: alignedRefAlleleStart must be less than or equal to codingSequenceRefAlleleStart.
 * @param referenceSnippet {@link StrandCorrectedReferenceBases} containing a short excerpt of the reference sequence around the given reference allele.  Must not be {@code null}.
 * @param referencePadding Number of bases in {@code referenceSnippet} before the reference allele starts.  This padding exists at the end as well (plus some other bases to account for the length of the alternate allele if it is longer than the reference).  Must be >= 0.
 * @param refAllele The reference {@link Allele}.  Must not be {@code null}.
 * @param altAllele The alternate {@link Allele}.  Must not be {@code null}.
 * @param codingSequenceRefAlleleStart The position (1-based, inclusive) in the coding sequence where the {@code refAllele} starts.
 * @param alignedRefAlleleStart The in-frame position (1-based, inclusive) of the first base of the codon containing the reference allele.
 * @param strand The {@link Strand} on which the variant occurs.  Must not be {@code null}.  Must not be {@link Strand#NONE}.
 * @param variantGenomicPositionForLogging The {@link Locatable} representing the position of the variant in genomic coordinates.  Used for logging purposes only.
 * @return A {@link String} of in-frame codons that contain the entire reference allele.
 */
public static String getAlignedRefAllele(final StrandCorrectedReferenceBases referenceSnippet,
                                         final int referencePadding,
                                         final Allele refAllele,
                                         final Allele altAllele,
                                         final int codingSequenceRefAlleleStart,
                                         final int alignedRefAlleleStart,
                                         final Strand strand,
                                         final Locatable variantGenomicPositionForLogging) {

    Utils.nonNull(referenceSnippet);
    Utils.nonNull(refAllele);
    Utils.nonNull(altAllele);
    ParamUtils.isPositiveOrZero( referencePadding, "Padding must be >= 0." );
    Utils.nonNull(variantGenomicPositionForLogging);
    assertValidStrand(strand);
    Utils.validate( alignedRefAlleleStart <= codingSequenceRefAlleleStart, "The alignedRefAlleleStart must be less than or equal to codingSequenceRefAlleleStart!" );

    // Get the length of the variant.
    // Since our referenceSnippet is <referencePadding> bases on either side of the variant
    // we need to know how long the variant is (so we can adjust for - strand variants)
    final int referenceLength = refAllele.length();

    // Compute the number of bases that we must prepend to this one to get to the start of a codon:
    final int extraBasesNeededForCodonAlignment = (codingSequenceRefAlleleStart - alignedRefAlleleStart);

    // We must add bases to the start to adjust for indels because of the required preceding base in VCF format:
    final int indelAdjustment = GATKVariantContextUtils.isIndel(refAllele, altAllele) ? 1 : 0;

    // We need to adjust coordinates for deletions on the - strand in the reference snippet:
    final int negativeDeletionAdjustment = (altAllele.length() < refAllele.length()) && (strand == Strand.NEGATIVE) ? 1 : 0;

    // Get the index in our reference snippet of the aligned codon start:
    int refStartAlignedIndex;
    if ( strand == Strand.NEGATIVE ) {
        // Variants on the - strand have to be handled as a special case because of how
        // the referenceSnippet is constructed:
        refStartAlignedIndex = referencePadding - extraBasesNeededForCodonAlignment;
    }
    else {
        refStartAlignedIndex = referencePadding - extraBasesNeededForCodonAlignment - indelAdjustment;
    }

    // TODO: This should probably be an error condition:
    if ( refStartAlignedIndex < 0 ) {
        refStartAlignedIndex = 0;
    }

    // Round to the nearest multiple of AminoAcid.CODON_LENGTH to get the end position.
    // Note this is the position of the first base NOT in the REF allele
    // (because it's used for substring coordinates).
    int refEndPosExclusive = refStartAlignedIndex + (int)(Math.ceil((extraBasesNeededForCodonAlignment + refAllele.length()) / ((double)AminoAcid.CODON_LENGTH)) * AminoAcid.CODON_LENGTH);

    // Create the aligned reference:
    String alignedReferenceAllele = referenceSnippet.getBaseString().substring(refStartAlignedIndex, refEndPosExclusive);

    final String computedReferenceAlleleWithSubstitution = alignedReferenceAllele.substring(extraBasesNeededForCodonAlignment, extraBasesNeededForCodonAlignment + refAllele.length());

    // Make sure our reference is what we expect it to be with the ref allele:
    if ( !computedReferenceAlleleWithSubstitution.equals(refAllele.getBaseString()) ) {
        // Oh noes!
        // Ref allele is different from reference sequence!

        // Oh well, we should use the reference we were given anyways...
        final String substitutedReferenceSnippet = getAlternateSequence(referenceSnippet, referencePadding + 1, refAllele, refAllele, strand);
        refEndPosExclusive = refStartAlignedIndex + (int)(Math.ceil((extraBasesNeededForCodonAlignment + refAllele.length()) / ((double)AminoAcid.CODON_LENGTH)) * AminoAcid.CODON_LENGTH);

        final String substitutedAlignedAlleleSeq = substitutedReferenceSnippet.substring(refStartAlignedIndex, refEndPosExclusive);

        // Warn the user!
        final String positionString = '[' + variantGenomicPositionForLogging.getContig() + ":" + variantGenomicPositionForLogging.getStart() + ']';
        logger.warn("Reference allele is different than the reference coding sequence (strand: " + strand + ", alt = " + altAllele.getBaseString() + ", ref " + refAllele.getBaseString() + " != " + computedReferenceAlleleWithSubstitution + " reference coding seq) @" + positionString + "!  Substituting given allele for sequence code (" + alignedReferenceAllele + "->" + substitutedAlignedAlleleSeq + ")");

        // Set up our return value:
        alignedReferenceAllele = substitutedAlignedAlleleSeq;
    }

    return alignedReferenceAllele;
}
 
Example 13
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Get the coding sequence change string from the given variant information.
 * Also known as cDNA string.
 * This method is assumed to be called only when the variant occurs in a coding region of the genome.
 * @param codingSequenceAlleleStart The start position (1-based, inclusive) of the ref allele in the coding sequence.
 * @param refAllele A {@link String} containing the bases of the reference allele.  MUST BE Reverse Complemented if strand == {@link Strand#NEGATIVE}
 * @param altAllele A {@link String} containing the bases of the alternate allele.  MUST BE Reverse Complemented if strand == {@link Strand#NEGATIVE}
 * @param strand The {@link Strand} on which the gene containing the variant lies.  Must not be {@code null}.  Must not be {@link Strand#NONE}.
 * @param exonStartPosition An {@link Integer} containing the start position (1-based, inclusive) of the variant in its exon.  May be {@code null}.
 * @param exonEndPosition  An {@link Integer} containing the end position (1-based, inclusive) of the variant in its exon.  May be {@code null}.
 * @param alleleStart The start position (1-based, inclusive) of the variant in genomic coordinates (relative to the start of the variant's contig).
 * @return A {@link String} representing the coding sequence change between the given ref and alt alleles.
 */
public static String getCodingSequenceChangeString( final int codingSequenceAlleleStart,
                                                    final String refAllele,
                                                    final String altAllele,
                                                    final Strand strand,
                                                    final Integer exonStartPosition,
                                                    final Integer exonEndPosition,
                                                    final Integer alleleStart) {
    Utils.nonNull(refAllele);
    Utils.nonNull(altAllele);
    assertValidStrand(strand);

    // Check for ONP:
    if ( GATKVariantContextUtils.isXnp(refAllele, altAllele) ) {
        if (altAllele.length() > 1) {
            return String.format(CDNA_CHANGE_FORMAT_STRING,
                    codingSequenceAlleleStart + "_" + (codingSequenceAlleleStart + refAllele.length() - 1),
                    refAllele,
                    ">",
                    altAllele);
        } else {
            return String.format(CDNA_CHANGE_FORMAT_STRING, codingSequenceAlleleStart, refAllele, ">", altAllele);
        }
    }
    // Check for Insertion:
    else if ( GATKVariantContextUtils.isInsertion(refAllele, altAllele) ) {
        // Must account for strandedness when dealing with insertions:
        // NOTE: Purposefully removing last base in allele to get rid of padding base.
        if ( strand == Strand.NEGATIVE ) {
            return String.format(CDNA_CHANGE_FORMAT_STRING, (codingSequenceAlleleStart-1) + "_" + codingSequenceAlleleStart, "", "ins", altAllele.substring(0,altAllele.length()-1).toUpperCase());
        }
        else {
            return String.format(CDNA_CHANGE_FORMAT_STRING, codingSequenceAlleleStart + "_" + (codingSequenceAlleleStart + 1), "", "ins", altAllele.substring(refAllele.length()).toUpperCase());
        }
    }
    // Must be a Deletion:
    else {
        int start = codingSequenceAlleleStart + altAllele.length();
        int end = codingSequenceAlleleStart + refAllele.length() - 1;

        String deletedBases = refAllele.substring(altAllele.length()).toUpperCase();

        // Must account for strandedness when dealing with deletions:
        if ( strand == Strand.NEGATIVE ) {

            // Because we're backwards stranded we need to account for being on the other side of the reference
            // base, so we shift everything by 1:
            --start;
            --end;

            // We need to adjust our deleted bases here.
            // We know that the last base in the allele is the required preceding reference base.
            deletedBases = refAllele.substring(0, refAllele.length() - 1).toUpperCase();
        }

        // If we have exon information, and we SHOULD, we use it to trim the start/stop coordinates
        // of the cDNA string to the extants of the coding region:
        if ( (exonStartPosition != null) && (exonEndPosition != null) ) {
            final int cdsExonStart = codingSequenceAlleleStart - (alleleStart - exonStartPosition);
            final int cdsExonEnd   = cdsExonStart + (exonEndPosition - exonStartPosition);

            if ( start < cdsExonStart ) {
                start = cdsExonStart;
            }
            if ( end > cdsExonEnd ) {
                end = cdsExonEnd;
            }
        }

        // Only add end extent if we have to:
        final String endBoundString = start == end ? "" : "_" + end;

        return String.format(CDNA_CHANGE_FORMAT_STRING, start + endBoundString, "", "del", deletedBases);
    }
}
 
Example 14
Source File: FuncotatorUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@DataProvider
    Object[][] provideDataForGetAlignedCodingSequenceAllele() {

        final String seq = "ATGAAAGGGGTGCCTATGCTAGATAGACAGATAGTGTGTGTGTGTGTGCGCGCGCGCGCGCGTTGTTAG";

        //CTA ACA ACG CGC GCG CGC GCG CAC ACA CAC ACA CAC TAT CTG TCT ATC TAG CAT AGG CAC CCC TTT CAT

//        final Allele refAllele,
//        final Integer refAlleleStart,

        return new Object[][] {
                { seq,  1, 3,  Allele.create("ATG", true), 1, Strand.POSITIVE, "ATG" },
                { seq,  4, 6,  Allele.create("AAA", true), 4, Strand.POSITIVE, "AAA" },
                { seq,  7, 9,  Allele.create("GGG", true), 7, Strand.POSITIVE, "GGG" },
                { seq, 10, 12, Allele.create("GTG", true), 10, Strand.POSITIVE, "GTG" },
                { seq, 13, 15, Allele.create("CCT", true), 13, Strand.POSITIVE, "CCT" },
                { seq, 16, 18, Allele.create("ATG", true), 16, Strand.POSITIVE, "ATG" },
                { seq, 19, 21, Allele.create("CTA", true), 19, Strand.POSITIVE, "CTA" },
                { seq,  1,  6, Allele.create("ATGAAA", true), 1, Strand.POSITIVE, "ATGAAA" },
                { seq,  4,  9, Allele.create("AAAGGG", true), 4, Strand.POSITIVE, "AAAGGG" },
                { seq,  7, 12, Allele.create("GGGGTG", true), 7, Strand.POSITIVE, "GGGGTG" },
                { seq, 10, 15, Allele.create("GTGCCT", true), 10, Strand.POSITIVE, "GTGCCT" },
                { seq, 13, 18, Allele.create("CCTATG", true), 13, Strand.POSITIVE, "CCTATG" },
                { seq, 16, 21, Allele.create("ATGCTA", true), 16, Strand.POSITIVE, "ATGCTA" },
                { seq, 19, 24, Allele.create("CTAGAT", true), 19, Strand.POSITIVE, "CTAGAT" },
                { seq, 1, seq.length(), Allele.create(seq, true), 1, Strand.POSITIVE, seq },

                { seq,  1, 3,  Allele.create("CTA", true), 1, Strand.NEGATIVE, "CTA" },
                { seq,  4, 6,  Allele.create("ACA", true), 4, Strand.NEGATIVE, "ACA" },
                { seq,  7, 9,  Allele.create("ACG", true), 7, Strand.NEGATIVE, "ACG" },
                { seq, 10, 12, Allele.create("CGC", true), 10, Strand.NEGATIVE, "CGC" },
                { seq, 13, 15, Allele.create("GCG", true), 13, Strand.NEGATIVE, "GCG" },
                { seq, 16, 18, Allele.create("CGC", true), 16, Strand.NEGATIVE, "CGC" },
                { seq, 19, 21, Allele.create("GCG", true), 19, Strand.NEGATIVE, "GCG" },
                { seq,  1,  6, Allele.create("CTAACA", true), 1, Strand.NEGATIVE, "CTAACA" },
                { seq,  4,  9, Allele.create("ACAACG", true), 4, Strand.NEGATIVE, "ACAACG" },
                { seq,  7, 12, Allele.create("ACGCGC", true), 7, Strand.NEGATIVE, "ACGCGC" },
                { seq, 10, 15, Allele.create("CGCGCG", true), 10, Strand.NEGATIVE, "CGCGCG" },
                { seq, 13, 18, Allele.create("GCGCGC", true), 13, Strand.NEGATIVE, "GCGCGC" },
                { seq, 16, 21, Allele.create("CGCGCG", true), 16, Strand.NEGATIVE, "CGCGCG" },
                { seq, 19, 24, Allele.create("GCGCAC", true), 19, Strand.NEGATIVE, "GCGCAC" },
                { seq, 1, seq.length(), Allele.create(ReadUtils.getBasesReverseComplement( seq.getBytes() ), true), 1, Strand.NEGATIVE, ReadUtils.getBasesReverseComplement( seq.getBytes() ) },
        };
    }
 
Example 15
Source File: FuncotatorUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Gets a codon change string for a splice site.
 * Assumes the variant and exon referenced in the params are on the same contig.
 * @param variantStart Start position (1-based, inclusive) of the variant.  Must be > 0.
 * @param exonNumber Number (1-based) of the exon in the transcript.  Must be > 0.
 * @param exonStart Start position (1-based, inclusive) of the exon.  Must be > 0.
 * @param exonEnd End position (1-based, inclusive) of the exon.  Must be > 0.
 * @param strand The {@link Strand} on which the variant and exon are read.  Must not be {@code null}.  Must not be {@link Strand#NONE}.
 * @param offsetIndelAdjustment An adjustment added to account for bases lost / gained in an Indel event.
 * @return A {@link String} representing the codon change for the splice site represented by the given parameters.
 */
public static String createSpliceSiteCodonChange(final int variantStart,
                                                 final int exonNumber,
                                                 final int exonStart,
                                                 final int exonEnd,
                                                 final Strand strand,
                                                 final int offsetIndelAdjustment) {

    ParamUtils.isPositive(variantStart, "Genomic positions must be > 0.");
    ParamUtils.isPositive(exonNumber, "Exon number must be > 0.");
    ParamUtils.isPositive(exonStart, "Genomic positions must be > 0.");
    ParamUtils.isPositive(exonEnd, "Genomic positions must be > 0.");
    assertValidStrand( strand );

    char sign = '-';
    int offset = exonStart - variantStart;
    if ( Math.abs(offset) > Math.abs(variantStart - exonEnd)) {
        offset = variantStart - exonEnd;
        sign = '+';
    }
    offset = Math.abs(offset);

    if (strand == Strand.NEGATIVE) {
        if ( sign == '+' ) {
            sign = '-';
        }
        else {
            sign = '+';
        }
    }

    // Add our indel adjustment here:
    if ( sign == '+' ) {
        offset += offsetIndelAdjustment;
    }
    else {
        offset -= offsetIndelAdjustment;
    }

    // Make sure we correctly adjust for the zero crossing:
    if ( offset < 0 ) {
        offset *= -1;
        if ( sign == '+') {
            sign = '-';
        }
        else {
            sign = '+';
        }
    }

    return "c.e" + exonNumber + sign + offset;
}
 
Example 16
Source File: BedToIntervalList.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to.
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());
        header.setSequenceDictionary(samSequenceDictionary);
        // set the sort order to be sorted by coordinate, which is actually done below
        // by getting the .uniqued() intervals list before we write out the file
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        final IntervalList intervalList = new IntervalList(header);

        final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false);
        final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
        final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

        while (iterator.hasNext()) {
            final BEDFeature bedFeature = iterator.next();
            final String sequenceName = bedFeature.getContig();
            final int start = bedFeature.getStart();
            final int end = bedFeature.getEnd();
            // NB: do not use an empty name within an interval
            final String name;
            if (bedFeature.getName().isEmpty()) {
                name = null;
            } else {
                name = bedFeature.getName();
            }

            final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

            // Do some validation
            if (null == sequenceRecord) {
                if (DROP_MISSING_CONTIGS) {
                    LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd()));
                    missingIntervals++;
                    missingRegion += bedFeature.getEnd() - bedFeature.getStart();
                    continue;
                }
                throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
            } else if (start < 1) {
                throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
            } else if (sequenceRecord.getSequenceLength() < start) {
                throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
            } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig
                    || end < 0 ) {
                throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
            } else if (sequenceRecord.getSequenceLength() < end) {
                throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
            } else if (end < start - 1) {
                throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
            }

            final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
            final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
            intervalList.add(interval);

            progressLogger.record(sequenceName, start);
        }
        CloserUtil.close(bedReader);

        if (DROP_MISSING_CONTIGS) {
            if (missingRegion == 0) {
                LOG.info("There were no missing regions.");
            } else {
                LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
            }
        }
        // Sort and write the output
        IntervalList out = intervalList;
        if (SORT) {
            out = out.sorted();
        }
        if (UNIQUE) {
            out = out.uniqued();
        }
        out.write(OUTPUT);
        LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount()));

    } catch (final IOException e) {
        throw new RuntimeException(e);
    }

    return 0;
}
 
Example 17
Source File: MapBlocks.java    From varsim with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public Collection<ReadMapBlock> liftOverGenomeInterval(final GenomeInterval interval, final int minIntervalLength) {
    final Collection<ReadMapBlock> readMapBlocks = new ArrayList<>();

    final ChrString chromosome = interval.chromosome;
    final int start = interval.start + 1; //convert to 1-based start
    final int end = interval.end; //1-based end

    if (!blockIntervalTree.containsKey(chromosome)) {
        return readMapBlocks;
    }

    final List<MapBlock> subset = findIntersectingBlocks(blockIntervalTree, chromosome, start, end);

    log.trace("Going to lift over " + interval);

    // since intervalOffset is only used for MapBlock, which is 1-based, we need to set the beginning offset to 1
    int intervalOffset = 1;
    for (MapBlock b : subset) {

            int srcStart = Math.max(start, b.srcLoc.location);
            int srcEnd = Math.min(end, b.srcLoc.location + b.size - 1);
            int lengthOfInterval = srcEnd - srcStart + 1;
            final int lengthOfIntervalOnRead = b.blockType != MapBlock.BlockType.DEL ? lengthOfInterval : 0;
            final int lengthOfIntervalOnRef = b.blockType != MapBlock.BlockType.INS ? lengthOfInterval : 0;

            log.trace("intervalStart = " + srcStart + " intervalEnd = " + srcEnd + " lengthOfInterval = " + lengthOfInterval);

            // lengthOfIntervalOnRef is length of the lifted over interval
            if (lengthOfIntervalOnRef < minIntervalLength) {
                log.trace("Skipping block " + b + " since the overlap is too small ( < " + minIntervalLength + ")");
            } else {
                // 0-based start, 1-based end
                final GenomeInterval liftedInterval = new GenomeInterval();
                liftedInterval.chromosome = b.dstLoc.chromosome;
                liftedInterval.feature = b.blockType;
                if (b.direction == 0) {
                    liftedInterval.start = b.dstLoc.location + srcStart - b.srcLoc.location - 1;
                    liftedInterval.end = liftedInterval.start + lengthOfIntervalOnRef;
                    liftedInterval.strand = interval.strand;
                } else {
                    liftedInterval.start = b.dstLoc.location + (b.srcLoc.location + b.size - 1 - srcEnd) - 1;
                    liftedInterval.end = liftedInterval.start + lengthOfIntervalOnRef;
                    liftedInterval.strand = interval.strand == Strand.POSITIVE ? Strand.NEGATIVE : Strand.POSITIVE;
                }

                readMapBlocks.add(new ReadMapBlock(intervalOffset, intervalOffset + lengthOfIntervalOnRead - 1, liftedInterval));
            }
            intervalOffset += lengthOfIntervalOnRead;
    }
    return readMapBlocks;
}
 
Example 18
Source File: DataProviderForExampleGencodeGtfGene.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
private static GencodeGtfExonFeature createStartCodonExon(final int exonStart, final String contig, final int lengthExons,
                                                          final AtomicInteger featureOrderNum, final String geneName,
                                                          final int exonNum, final int length5pUtr,
                                                          final Strand codingDirection) {
    final GencodeGtfFeatureBaseData tmpExon = new GencodeGtfFeatureBaseData(GencodeGtfCodec.GTF_FILE_TYPE_STRING, featureOrderNum.getAndIncrement(), contig, GencodeGtfFeature.ANNOTATION_SOURCE_ENSEMBL, GencodeGtfFeature.FeatureType.EXON,
            exonStart, exonStart + length5pUtr + lengthExons - 1, codingDirection, GencodeGtfFeature.GenomicPhase.DOT, "TEST_GENE1", "TEST_TRANSCRIPT1", GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
            null, geneName, GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING, null, "TEST_TRANSCRIPT1", exonNum, "TEST_EXON_" + exonNum, GencodeGtfFeature.LocusLevel.AUTOMATICALLY_ANNOTATED,
            Collections.emptyList(),
            null
    );
    final GencodeGtfExonFeature exon = (GencodeGtfExonFeature) GencodeGtfFeature.create(tmpExon);

    if (codingDirection == Strand.POSITIVE) {
        final GencodeGtfFeatureBaseData tmpStartCodon = new GencodeGtfFeatureBaseData(GencodeGtfCodec.GTF_FILE_TYPE_STRING, featureOrderNum.getAndIncrement(), contig, GencodeGtfFeature.ANNOTATION_SOURCE_ENSEMBL, GencodeGtfFeature.FeatureType.START_CODON,
                exon.getGenomicStartLocation() + length5pUtr - 1, exon.getGenomicStartLocation() + length5pUtr + 1, codingDirection, GencodeGtfFeature.GenomicPhase.DOT, "TEST_GENE1", "TEST_TRANSCRIPT1", GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
                null, geneName, GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING, null, "TEST_TRANSCRIPT1", 1, "TEST_EXON1", GencodeGtfFeature.LocusLevel.AUTOMATICALLY_ANNOTATED,
                Collections.emptyList(),
                null
        );
        final GencodeGtfStartCodonFeature startCodon1 = (GencodeGtfStartCodonFeature) GencodeGtfFeature.create(tmpStartCodon);

        final GencodeGtfFeatureBaseData tmpCdsMinusStart = new GencodeGtfFeatureBaseData(GencodeGtfCodec.GTF_FILE_TYPE_STRING, featureOrderNum.getAndIncrement(), contig, GencodeGtfFeature.ANNOTATION_SOURCE_ENSEMBL, GencodeGtfFeature.FeatureType.CDS,
                startCodon1.getGenomicEndLocation() + 1, exon.getGenomicEndLocation(), codingDirection, GencodeGtfFeature.GenomicPhase.DOT, "TEST_GENE1", "TEST_TRANSCRIPT1", GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
                null, geneName, GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING, null, "TEST_TRANSCRIPT1", exon.getExonNumber(), exon.getExonId(), GencodeGtfFeature.LocusLevel.AUTOMATICALLY_ANNOTATED,
                Collections.emptyList(),
                null
        );
        final GencodeGtfCDSFeature cds1 = (GencodeGtfCDSFeature) GencodeGtfFeature.create(tmpCdsMinusStart);
        exon.setStartCodon(startCodon1);
        exon.setCds(cds1);
    }

    // negative strand, then have CDS appear before start codon (note that UTR is handled elsewhere.
    if (codingDirection == Strand.NEGATIVE) {
        final GencodeGtfFeatureBaseData tmpStartCodon2 = new GencodeGtfFeatureBaseData(GencodeGtfCodec.GTF_FILE_TYPE_STRING, featureOrderNum.getAndIncrement(), contig, GencodeGtfFeature.ANNOTATION_SOURCE_ENSEMBL, GencodeGtfFeature.FeatureType.START_CODON,
                exon.getGenomicEndLocation() - length5pUtr - 2, exon.getGenomicStartLocation() + length5pUtr, codingDirection, GencodeGtfFeature.GenomicPhase.DOT, "TEST_GENE1", "TEST_TRANSCRIPT1", GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
                null, geneName, GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING, null, "TEST_TRANSCRIPT1", 1, "TEST_EXON1", GencodeGtfFeature.LocusLevel.AUTOMATICALLY_ANNOTATED,
                Collections.emptyList(),
                null
        );
        final GencodeGtfStartCodonFeature startCodon2 = (GencodeGtfStartCodonFeature) GencodeGtfFeature.create(tmpStartCodon2);

        final GencodeGtfFeatureBaseData tmpCdsMinusStart2 = new GencodeGtfFeatureBaseData(GencodeGtfCodec.GTF_FILE_TYPE_STRING, featureOrderNum.getAndIncrement(), contig, GencodeGtfFeature.ANNOTATION_SOURCE_ENSEMBL, GencodeGtfFeature.FeatureType.CDS,
                exon.getGenomicStartLocation(), startCodon2.getGenomicStartLocation() - 1, codingDirection, GencodeGtfFeature.GenomicPhase.DOT, "TEST_GENE1", "TEST_TRANSCRIPT1", GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING,
                null, geneName, GencodeGtfFeature.GeneTranscriptType.PROTEIN_CODING, null, "TEST_TRANSCRIPT1", exon.getExonNumber(), exon.getExonId(), GencodeGtfFeature.LocusLevel.AUTOMATICALLY_ANNOTATED,
                Collections.emptyList(),
                null
        );
        final GencodeGtfCDSFeature cds2 = (GencodeGtfCDSFeature) GencodeGtfFeature.create(tmpCdsMinusStart2);
        exon.setStartCodon(startCodon2);
        exon.setCds(cds2);
    }

    return exon;
}
 
Example 19
Source File: GencodeFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 2 votes vote down vote up
/**
 * Determines whether the provided variant is in the 3' flanking region of the provided transcript. A variant
 * is in the 3' flanking region if it overlaps any of the threePrimeFlankSize bases after the 3' end of the
 * transcript, but does not overlap any part of the transcript itself.
 *
 * @param variant variant to check
 * @param transcript transcript whose flanking regions to consider
 * @param threePrimeFlankSize size in bases of the 3' flanking region
 * @return true if the variant overlaps the 3' flanking region and does not overlap the transcript itself,
 *         otherwise false
 */
@VisibleForTesting
static boolean isThreePrimeFlank(final VariantContext variant, final GencodeGtfTranscriptFeature transcript, final int threePrimeFlankSize) {
    return (transcript.getGenomicStrand() == Strand.POSITIVE && isInTranscriptRightFlank(variant, transcript, threePrimeFlankSize)) ||
           (transcript.getGenomicStrand() == Strand.NEGATIVE && isInTranscriptLeftFlank(variant, transcript, threePrimeFlankSize));
}
 
Example 20
Source File: GencodeFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 2 votes vote down vote up
/**
 * Determines whether the provided variant is in the 5' flanking region of the provided transcript. A variant
 * is in the 5' flanking region if it overlaps any of the fivePrimeFlankSize bases before the 5' end of the
 * transcript, but does not overlap any part of the transcript itself.
 *
 * @param variant variant to check
 * @param transcript transcript whose flanking regions to consider
 * @param fivePrimeFlankSize size in bases of the 5' flanking region
 * @return true if the variant overlaps the 5' flanking region and does not overlap the transcript itself,
 *         otherwise false
 */
@VisibleForTesting
static boolean isFivePrimeFlank(final VariantContext variant, final GencodeGtfTranscriptFeature transcript, final int fivePrimeFlankSize) {
    return (transcript.getGenomicStrand() == Strand.POSITIVE && isInTranscriptLeftFlank(variant, transcript, fivePrimeFlankSize)) ||
           (transcript.getGenomicStrand() == Strand.NEGATIVE && isInTranscriptRightFlank(variant, transcript, fivePrimeFlankSize));
}