package org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.segment;

import com.google.common.collect.Lists;
import htsjdk.samtools.util.IntervalTree;
import htsjdk.samtools.util.Locatable;
import htsjdk.tribble.annotation.Strand;
import org.broadinstitute.hellbender.utils.IntervalUtils;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfExonFeature;
import org.broadinstitute.hellbender.utils.codecs.gtf.GencodeGtfTranscriptFeature;

import java.util.List;

public class SegmentExonUtils {

    private static final String AND_BELOW_STR = "-";
    private static final String AND_ABOVE_STR = "+";

    private SegmentExonUtils(){}

    // This should always be -1
    private final static int NO_EXON_OVERLAP = -1;

    /**
     * @param transcript Never {@code null}
     * @param segment Genomic region to determine which exons are covered in the transcript.  Never {@code null}
     * @return Instance of {@link SegmentExonOverlaps}.  A typical value will be a number appended to a "-" or a "+".
     *   The exon numbers are 0-based.  "-" if it covers upstream exons (considering coding direction -- i.e. previous exons
     *   are higher genomic coordinates on negative strand transcripts) and "+" for downstream exons.
     *   For example:
     *   - "6+" would be sixth exon and above.  This would indicate downstream exons (higher genomic
     *     coordinates on positive coding directions and lower genomic coordinates when negative coding) are covered by
     *     the segment.
     *   - "" indicates that the endpoints of the segment do not cover any transcript exons.  So either the entire
     *      transcript is covered or none of the transcript is covered.
     *
     *   Note that this will return the first exon covered, as long as the breakpoint is in the transcript.  Even if the
     *    breakpoint itself does not cover an exon.
     *    For example, this will yield a non-blank value when a segment breakpoint is in an intron.
     */
    public static SegmentExonOverlaps determineSegmentExonPosition(final GencodeGtfTranscriptFeature transcript, final Locatable segment) {

        Utils.nonNull(transcript);
        Utils.nonNull(segment);

        final String NOT_IN_TRANSCRIPT = "";

        // Internally, this method assumes that the exons are sorted in genomic order.  Which is NOT how these are
        //  stored in the GencodeGtfTranscriptFeature
        final List<GencodeGtfExonFeature> exons = transcript.getGenomicStrand() == Strand.NEGATIVE ?
                Lists.reverse(transcript.getExons()) : transcript.getExons();

        if (exons.size() == 0) {
            return new SegmentExonOverlaps(NOT_IN_TRANSCRIPT, NOT_IN_TRANSCRIPT);
        }

        final SimpleInterval segmentStart = new SimpleInterval(segment.getContig(), segment.getStart(), segment.getStart());
        final SimpleInterval segmentEnd = new SimpleInterval(segment.getContig(), segment.getEnd(), segment.getEnd());

        // Find the proper index for the start.
        // If the start of the segment does not overlap the first exon, but the segment does, then the start index is -1 (no overlap)
        final int inclusiveIndexPositiveDirectionStart = findInclusiveExonIndex(transcript.getGenomicStrand(), exons, segmentStart, true);

        // If the end of the segment does not overlap the last exon, but the segment does, then the end index is -1 (no overlap)
        final int inclusiveIndexPositiveDirectionEnd = findInclusiveExonIndex(transcript.getGenomicStrand(), exons, segmentEnd, false);

        // Construct the final strings
        final String startResult = inclusiveIndexPositiveDirectionStart != NO_EXON_OVERLAP ?
                inclusiveIndexPositiveDirectionStart + determineSegmentOverlapDirection(transcript.getGenomicStrand(), true)
                : NOT_IN_TRANSCRIPT;
        final String endResult = inclusiveIndexPositiveDirectionEnd != NO_EXON_OVERLAP ?
                inclusiveIndexPositiveDirectionEnd + determineSegmentOverlapDirection(transcript.getGenomicStrand(), false)
                : NOT_IN_TRANSCRIPT;

        return new SegmentExonOverlaps(startResult, endResult);
    }

    // exons, transcript, and pointLocation must be on the same contig.  Exons should be input in the coding order
    private static int findInclusiveExonIndex(final Strand codingDirection, final List<GencodeGtfExonFeature> exons,
                                              final SimpleInterval pointLocation, final boolean isStartOfSegment) {
        int result = NO_EXON_OVERLAP;
        if (exons.size() == 0) {
            return result;
        }

        final IntervalTree<GencodeGtfExonFeature> exonFeatureIntervalTree = new IntervalTree<>();
        exons.forEach(e -> exonFeatureIntervalTree.put(e.getGenomicStartLocation(), e.getGenomicEndLocation(), e));

        final IntervalTree.Node<GencodeGtfExonFeature> gencodeGtfExonFeatureNode = exonFeatureIntervalTree.minOverlapper(pointLocation.getStart(), pointLocation.getEnd());
        final GencodeGtfExonFeature exon = gencodeGtfExonFeatureNode != null ? gencodeGtfExonFeatureNode.getValue() : null;

        // We need to figure out if the pointLocation is in an intron
        final boolean isOverlapExonExtents = IntervalUtils.overlaps(pointLocation, new SimpleInterval(pointLocation.getContig(), exons.get(0).getGenomicStartLocation(),
                exons.get(exons.size()-1).getGenomicEndLocation()));

        final boolean isIntron = (exon == null) && isOverlapExonExtents;

        if ((exon == null) && !isIntron) {
            result = NO_EXON_OVERLAP;
        } else if (isIntron) {

            // Correction depending on whether we are looking at an end point or beginning of a segment.
            int increment = (codingDirection == Strand.POSITIVE ?
                    (isStartOfSegment ? 0 : -1) : (isStartOfSegment ? -1 : 0));

            // Build an intron map
            for (int i = 1; i < exons.size(); i ++) {
                final GencodeGtfExonFeature currentExon =  exons.get(i);
                final GencodeGtfExonFeature prevExon =  exons.get(i-1);

                final Locatable intron = new SimpleInterval(currentExon.getChromosomeName(), prevExon.getEnd() + 1, currentExon.getStart() - 1);
                if (IntervalUtils.overlaps(pointLocation, intron)) {
                    result = (codingDirection == Strand.NEGATIVE ? exons.size() - i + increment: i + increment);
                }
            }
        } else {
            result = exon.getExonNumber() - 1;
        }
        return result;
    }

    private static String determineSegmentOverlapDirection(final Strand strand, final boolean isSegmentStart) {
        if (isSegmentStart ^ (strand == Strand.POSITIVE)) {
            return AND_BELOW_STR;
        } else {
            return AND_ABOVE_STR;
        }
    }
}