/*
 * Copyright (C) 2014 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.google.cloud.genomics.dataflow.utils;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;

import com.google.api.services.genomics.model.Annotation;
import com.google.api.services.genomics.model.CodingSequence;
import com.google.api.services.genomics.model.Exon;
import com.google.api.services.genomics.model.Transcript;
import com.google.cloud.genomics.dataflow.utils.AnnotationUtils.VariantEffect;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;

import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

import htsjdk.samtools.util.SequenceUtil;

@RunWith(JUnit4.class)
public class AnnotationUtilsTest {
  @Test
  public void testDetermineVariantTranscriptEffect_simpleShort() {
    Annotation transcript = new Annotation()
      .setReferenceName("1")
      .setStart(2L)
      .setEnd(9L)
      .setTranscript(new Transcript()
        .setCodingSequence(new CodingSequence().setStart(2L).setEnd(9L))
        .setExons(ImmutableList.of(
            new Exon().setStart(2L).setEnd(9L).setFrame(0))));

    assertEquals("GATTACA -> GCTTACA, codon is GAT -> GCT, AA is D -> A",
        VariantEffect.NONSYNONYMOUS_SNP,
        AnnotationUtils.determineVariantTranscriptEffect(3L, "C", transcript, "GATTACA"));
    assertEquals("ATGTGAA -> ATGTGGA, codon is TGA -> TGG, AA is STOP -> W",
        VariantEffect.STOP_LOSS,
        AnnotationUtils.determineVariantTranscriptEffect(7L, "G", transcript, "ATGTGAA"));
    assertEquals("CCCAAAT -> CCCTAAT, codon is AAA -> TAA, AA is K -> STOP",
        VariantEffect.STOP_GAIN,
        AnnotationUtils.determineVariantTranscriptEffect(5L, "T", transcript, "CCCAAAT"));
    assertEquals("GATTACA -> GACTACA, codon is GAT -> GAC, AA is D -> D",
        VariantEffect.SYNONYMOUS_SNP,
        AnnotationUtils.determineVariantTranscriptEffect(4L, "C", transcript, "GATTACA"));
    assertNull("variant does not intersect transcript",
        AnnotationUtils.determineVariantTranscriptEffect(123L, "C", transcript, "GATTACA"));
  }

  @Test
  public void testDetermineVariantTranscriptEffect_reverseStrand() {
    Annotation transcript = new Annotation()
      .setReferenceName("1")
      .setStart(2L)
      .setEnd(20L)
      .setReverseStrand(true)
      .setTranscript(new Transcript()
        .setCodingSequence(new CodingSequence().setStart(3L).setEnd(18L))
        .setExons(ImmutableList.of(
            new Exon().setStart(2L).setEnd(7L).setFrame(2),
            new Exon().setStart(10L).setEnd(20L).setFrame(1))
        ));

    String bases = SequenceUtil.reverseComplement(
        // First exon [10, 20)
        "AC" + // 5' UTR
        "ATG" + "ACG" + "GT" +
        // intron
        "CCC" +
        // Second exon [2, 7)
        "G" + "TAG" +
        "G"); // 3' UTR
    assertEquals("ATG -> ACG (reverse complement), AA is M -> T",
        VariantEffect.NONSYNONYMOUS_SNP,
        AnnotationUtils.determineVariantTranscriptEffect(16L, "G", transcript, bases));
    assertEquals("TAG -> CAG (reverse complement), AA is STOP -> Q",
        VariantEffect.STOP_LOSS,
        AnnotationUtils.determineVariantTranscriptEffect(5L, "G", transcript, bases));
    assertNull("mutates intron",
        AnnotationUtils.determineVariantTranscriptEffect(9L, "C", transcript, bases));
    assertNull("mutates 5' UTR",
        AnnotationUtils.determineVariantTranscriptEffect(19L, "C", transcript, bases));
  }

  @Test
  public void testDetermineVariantTranscriptEffect_noncoding() {
    Annotation transcript = new Annotation()
      .setReferenceName("1")
      .setStart(2L)
      .setEnd(9L)
      .setTranscript(new Transcript()
        .setExons(ImmutableList.of(new Exon().setStart(2L).setEnd(9L))));

    assertNull(AnnotationUtils.determineVariantTranscriptEffect(3L, "C", transcript, "GATTACA"));
    assertNull(AnnotationUtils.determineVariantTranscriptEffect(11L, "C", transcript, "GATTACA"));
  }

  @Test
  public void testDetermineVariantTranscriptEffect_frameless() {
    Annotation transcript = new Annotation()
      .setReferenceName("1")
      .setStart(2L)
      .setEnd(9L)
      .setTranscript(new Transcript()
        .setCodingSequence(new CodingSequence().setStart(2L).setEnd(9L))
        .setExons(ImmutableList.of(new Exon().setStart(2L).setEnd(9L))));

    assertNull(AnnotationUtils.determineVariantTranscriptEffect(3L, "C", transcript, "GATTACA"));
    assertNull(AnnotationUtils.determineVariantTranscriptEffect(11L, "C", transcript, "GATTACA"));
  }

  @Test
  public void testDetermineVariantTranscriptEffect_multiExon() {
    String bases = Strings.repeat("ACTTGGGTCA", 60);
    Annotation transcript = new Annotation()
      .setReferenceName("1")
      .setStart(100L)
      .setEnd(700L)
      .setTranscript(new Transcript()
        .setCodingSequence(new CodingSequence().setStart(250L).setEnd(580L))
        .setExons(ImmutableList.of(
            new Exon().setStart(100L).setEnd(180L),
            new Exon().setStart(200L).setEnd(300L).setFrame(2),
            new Exon().setStart(400L).setEnd(500L).setFrame(1),
            new Exon().setStart(550L).setEnd(600L).setFrame(0))
        ));

    assertNull("mutates noncoding exon",
        AnnotationUtils.determineVariantTranscriptEffect(150L, "C", transcript, bases));
    assertNull("mutates noncoding region of coding exon",
        AnnotationUtils.determineVariantTranscriptEffect(240L, "C", transcript, bases));
    assertNull("mutates intron",
        AnnotationUtils.determineVariantTranscriptEffect(350L, "C", transcript, bases));
    assertEquals("mutates first coding base, ACT -> TCT",
        VariantEffect.NONSYNONYMOUS_SNP,
        AnnotationUtils.determineVariantTranscriptEffect(250L, "T", transcript, bases));
    assertEquals("mutates middle exon, TGG -> TCG",
        VariantEffect.NONSYNONYMOUS_SNP,
        AnnotationUtils.determineVariantTranscriptEffect(454L, "C", transcript, bases));
  }
}