/* * Copyright (C) 2014 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.google.cloud.genomics.dataflow.utils; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import com.google.api.services.genomics.model.Annotation; import com.google.api.services.genomics.model.CodingSequence; import com.google.api.services.genomics.model.Exon; import com.google.api.services.genomics.model.Transcript; import com.google.cloud.genomics.dataflow.utils.AnnotationUtils.VariantEffect; import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import htsjdk.samtools.util.SequenceUtil; @RunWith(JUnit4.class) public class AnnotationUtilsTest { @Test public void testDetermineVariantTranscriptEffect_simpleShort() { Annotation transcript = new Annotation() .setReferenceName("1") .setStart(2L) .setEnd(9L) .setTranscript(new Transcript() .setCodingSequence(new CodingSequence().setStart(2L).setEnd(9L)) .setExons(ImmutableList.of( new Exon().setStart(2L).setEnd(9L).setFrame(0)))); assertEquals("GATTACA -> GCTTACA, codon is GAT -> GCT, AA is D -> A", VariantEffect.NONSYNONYMOUS_SNP, AnnotationUtils.determineVariantTranscriptEffect(3L, "C", transcript, "GATTACA")); assertEquals("ATGTGAA -> ATGTGGA, codon is TGA -> TGG, AA is STOP -> W", VariantEffect.STOP_LOSS, AnnotationUtils.determineVariantTranscriptEffect(7L, "G", transcript, "ATGTGAA")); assertEquals("CCCAAAT -> CCCTAAT, codon is AAA -> TAA, AA is K -> STOP", VariantEffect.STOP_GAIN, AnnotationUtils.determineVariantTranscriptEffect(5L, "T", transcript, "CCCAAAT")); assertEquals("GATTACA -> GACTACA, codon is GAT -> GAC, AA is D -> D", VariantEffect.SYNONYMOUS_SNP, AnnotationUtils.determineVariantTranscriptEffect(4L, "C", transcript, "GATTACA")); assertNull("variant does not intersect transcript", AnnotationUtils.determineVariantTranscriptEffect(123L, "C", transcript, "GATTACA")); } @Test public void testDetermineVariantTranscriptEffect_reverseStrand() { Annotation transcript = new Annotation() .setReferenceName("1") .setStart(2L) .setEnd(20L) .setReverseStrand(true) .setTranscript(new Transcript() .setCodingSequence(new CodingSequence().setStart(3L).setEnd(18L)) .setExons(ImmutableList.of( new Exon().setStart(2L).setEnd(7L).setFrame(2), new Exon().setStart(10L).setEnd(20L).setFrame(1)) )); String bases = SequenceUtil.reverseComplement( // First exon [10, 20) "AC" + // 5' UTR "ATG" + "ACG" + "GT" + // intron "CCC" + // Second exon [2, 7) "G" + "TAG" + "G"); // 3' UTR assertEquals("ATG -> ACG (reverse complement), AA is M -> T", VariantEffect.NONSYNONYMOUS_SNP, AnnotationUtils.determineVariantTranscriptEffect(16L, "G", transcript, bases)); assertEquals("TAG -> CAG (reverse complement), AA is STOP -> Q", VariantEffect.STOP_LOSS, AnnotationUtils.determineVariantTranscriptEffect(5L, "G", transcript, bases)); assertNull("mutates intron", AnnotationUtils.determineVariantTranscriptEffect(9L, "C", transcript, bases)); assertNull("mutates 5' UTR", AnnotationUtils.determineVariantTranscriptEffect(19L, "C", transcript, bases)); } @Test public void testDetermineVariantTranscriptEffect_noncoding() { Annotation transcript = new Annotation() .setReferenceName("1") .setStart(2L) .setEnd(9L) .setTranscript(new Transcript() .setExons(ImmutableList.of(new Exon().setStart(2L).setEnd(9L)))); assertNull(AnnotationUtils.determineVariantTranscriptEffect(3L, "C", transcript, "GATTACA")); assertNull(AnnotationUtils.determineVariantTranscriptEffect(11L, "C", transcript, "GATTACA")); } @Test public void testDetermineVariantTranscriptEffect_frameless() { Annotation transcript = new Annotation() .setReferenceName("1") .setStart(2L) .setEnd(9L) .setTranscript(new Transcript() .setCodingSequence(new CodingSequence().setStart(2L).setEnd(9L)) .setExons(ImmutableList.of(new Exon().setStart(2L).setEnd(9L)))); assertNull(AnnotationUtils.determineVariantTranscriptEffect(3L, "C", transcript, "GATTACA")); assertNull(AnnotationUtils.determineVariantTranscriptEffect(11L, "C", transcript, "GATTACA")); } @Test public void testDetermineVariantTranscriptEffect_multiExon() { String bases = Strings.repeat("ACTTGGGTCA", 60); Annotation transcript = new Annotation() .setReferenceName("1") .setStart(100L) .setEnd(700L) .setTranscript(new Transcript() .setCodingSequence(new CodingSequence().setStart(250L).setEnd(580L)) .setExons(ImmutableList.of( new Exon().setStart(100L).setEnd(180L), new Exon().setStart(200L).setEnd(300L).setFrame(2), new Exon().setStart(400L).setEnd(500L).setFrame(1), new Exon().setStart(550L).setEnd(600L).setFrame(0)) )); assertNull("mutates noncoding exon", AnnotationUtils.determineVariantTranscriptEffect(150L, "C", transcript, bases)); assertNull("mutates noncoding region of coding exon", AnnotationUtils.determineVariantTranscriptEffect(240L, "C", transcript, bases)); assertNull("mutates intron", AnnotationUtils.determineVariantTranscriptEffect(350L, "C", transcript, bases)); assertEquals("mutates first coding base, ACT -> TCT", VariantEffect.NONSYNONYMOUS_SNP, AnnotationUtils.determineVariantTranscriptEffect(250L, "T", transcript, bases)); assertEquals("mutates middle exon, TGG -> TCG", VariantEffect.NONSYNONYMOUS_SNP, AnnotationUtils.determineVariantTranscriptEffect(454L, "C", transcript, bases)); } }