Java Code Examples for htsjdk.samtools.SAMRecord#getContig()

The following examples show how to use htsjdk.samtools.SAMRecord#getContig() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// One intergenic read (no other gene models overlapping)
public void testIntergeicRead () {
	SAMRecord r = getFakeRecord(testBAMFile, 200, 210, false);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, true, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	Assert.assertEquals(r.getStringAttribute("GE"), null);
	Assert.assertEquals(r.getStringAttribute("GS"), null);
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTERGENIC.name());
}
 
Example 2
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// 	One UTR read (wrong strand, no other gene models overlapping)
public void testUTRReadWrongStrand () {
	SAMRecord r = getFakeRecord(testBAMFile, 91, 95, true);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String GE=r.getStringAttribute("GE");
	String GS=r.getStringAttribute("GS");
	String XF = r.getStringAttribute("XF");

	Assert.assertNull(GE);
	Assert.assertNull(GS);
	Assert.assertEquals(XF, LocusFunction.INTERGENIC.name());
}
 
Example 3
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// One intergenic read (no other gene models overlapping)
public void testIntergeicRead () {
	SAMRecord r = getFakeRecord(testBAMFile, 200, 210, false);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, true, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	Assert.assertEquals(r.getStringAttribute("gn"), null);
	Assert.assertEquals(r.getStringAttribute("gs"), null);
	Assert.assertEquals(r.getStringAttribute("gf"), null);
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTERGENIC.name());
}
 
Example 4
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// 	2) One intronic read (no other gene models overlapping)
public void testIntronicRead () {
	SAMRecord r = getFakeRecord(testBAMFile, 50, 75, false);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.INTRONIC.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTRONIC.name());
}
 
Example 5
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// 	One UTR read (correct strand, no other gene models overlapping)
public void testUTRRead () {
	SAMRecord r = getFakeRecord(testBAMFile, 91, 95, false);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.UTR.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name());
}
 
Example 6
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// 	One UTR read (wrong strand, no other gene models overlapping)
public void testUTRReadWrongStrand () {
	SAMRecord r = getFakeRecord(testBAMFile, 91, 95, true);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.UTR.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTERGENIC.name());
}
 
Example 7
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// One CODING read (correct strand, no other gene models overlapping)
public void testCodingRead () {
	SAMRecord r = getFakeRecord(testBAMFile, 2, 8, false);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.CODING.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.CODING.name());
}
 
Example 8
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// One CODING read (wrong strand, no other gene models overlapping)
public void testCodingReadWrongStrand () {
	SAMRecord r = getFakeRecord(testBAMFile, 2, 8, true);
	boolean negStrandFlag = r.getReadNegativeStrandFlag();
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.CODING.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTERGENIC.name());
}
 
Example 9
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// 	2) One intronic read (no other gene models overlapping)
public void testIntronicRead () {
	SAMRecord r = getFakeRecord(testBAMFile, 50, 75, false);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String geneName = r.getStringAttribute("GE");
	String geneStrand = r.getStringAttribute("GS");
	String XF = r.getStringAttribute("XF");

	Assert.assertNull(geneName);
	Assert.assertNull(geneStrand);

	Assert.assertEquals(XF, LocusFunction.INTRONIC.name());
}
 
Example 10
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test(enabled=true)
// a read with 2 alignment blocks that both overlap a single gene, both in an exon.
public void testSplitReadExonExonSameGene () {
	SAMRecord r = getFakeSplitRecord(testBAMFile, 91, 100, 151,160, false);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 200, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 200, 1, 200, 3, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	tx.addExon(150, 160);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector);
	Assert.assertEquals(r.getStringAttribute("GE"), gene.getName());
	Assert.assertEquals(r.getStringAttribute("GS"), "+");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.CODING.name());
}
 
Example 11
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testIntergenicCorrectIntronicWrong () {
	SAMRecord r = getFakeRecord(testBAMFile, 50, 60, false);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, true, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);

	String gn = r.getStringAttribute("gn");
	String gs = r.getStringAttribute("gs");
	String gf = r.getStringAttribute("gf");

	Assert.assertEquals(gn, gene.getName());
	Assert.assertEquals(gs, "-");
	Assert.assertEquals(gf, LocusFunction.INTRONIC.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTERGENIC.name());
}
 
Example 12
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test(enabled=true)
// a read with 2 alignment blocks that both overlap a single gene, once in an intron and once an exon.
public void testSplitReadExonicIntronicSameGene () {
	SAMRecord r = getFakeSplitRecord(testBAMFile, 91, 100, 131,140, false);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 200, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 200, 1, 200, 3, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	tx.addExon(150, 160);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName()+","+gene.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+,+");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.CODING.name() + "," + LocusFunction.INTRONIC.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.CODING.name());
}
 
Example 13
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test(enabled=true)
// a read with 2 alignment blocks that both overlap a single gene, once in an intron and once an exon.
public void testSplitReadExonicIntronicSameGene () {
	SAMRecord r = getFakeSplitRecord(testBAMFile, 91, 100, 131,140, false);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 200, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 200, 1, 200, 3, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	tx.addExon(150, 160);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector);
	Assert.assertEquals(r.getStringAttribute("GE"), gene.getName());
	Assert.assertEquals(r.getStringAttribute("GS"), "+");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.CODING.name());
}
 
Example 14
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testIntronicCorrectCodingWrong () {
	// read on the negative strand
	SAMRecord r = getFakeRecord(testBAMFile, 91, 100, true);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 100, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Negative strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, true, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	String gn = r.getStringAttribute("gn");
	String gs = r.getStringAttribute("gs");
	String gf = r.getStringAttribute("gf");

	// names always come out alphabetically sorted.
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName()+","+gene2.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+,-");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.CODING.name() + "," + LocusFunction.INTRONIC.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTRONIC.name());
}
 
Example 15
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testSplitReadExonicIntronicDifferentGenes() {
	SAMRecord r = getFakeSplitRecord(testBAMFile, 1, 10, 91,100, false);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 100, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Positive strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, false, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector);
	Assert.assertEquals(r.getStringAttribute("GE"), gene.getName());
	Assert.assertEquals(r.getStringAttribute("GS"), "+");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.CODING.name());

}
 
Example 16
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
// Read overlaps exon on one gene, and intron on another, all on the same strand.
public void testCodingAndIntronic () {
	// read on the positive strand
	SAMRecord r = getFakeRecord(testBAMFile, 91, 100, false);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 100, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Negative strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, false, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector);

	String geneTagged = r.getStringAttribute("GE");
	String strandTagged = r.getStringAttribute("GS");

	// names always come out alphabetically sorted.
	Assert.assertEquals(geneTagged, gene.getName());
	Assert.assertEquals(strandTagged, "+");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.CODING.name());
}
 
Example 17
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
// Read overlaps exon on one gene, and intron on another, all on the same strand.
public void testCodingAndIntronic () {
	// read on the negative strand
	SAMRecord r = getFakeRecord(testBAMFile, 91, 100, false);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 100, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Negative strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, false, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector, false);

	// names always come out alphabetically sorted.
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName()+","+gene2.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+,+");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.CODING.name() + "," + LocusFunction.INTRONIC.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.CODING.name());
}
 
Example 18
Source File: CompareDropSeqAlignments.java    From Drop-seq with MIT License 5 votes vote down vote up
private void writeReadReportLine(List<SAMRecord> r1List, List<SAMRecord> r2List, List<String> tagNames, PrintStream writer) {
	// only the 2nd read can be a multimapper.
	if (!validateReadSetSize(r1List, r2List)) return;
	
	SAMRecord r1 = r1List.get(0);
	
	String r1Contig=r1.getContig().replaceAll(this.TRIM_CONTIG_STRING, "");
	int r1Pos=r1.getAlignmentStart();
	
	for (SAMRecord r: r2List) {
		List<String> line = new ArrayList<>();
		Collections.addAll(line, r1.getReadName(), r1Contig, Integer.toString(r1Pos), Integer.toString(r1.getMappingQuality()));
		String r2Contig = r.getContig();
		if (r2Contig!=null) r2Contig.replaceAll(this.TRIM_CONTIG_STRING, "");
		if (r2Contig==null) r2Contig="NA";
		int r2Pos=r.getAlignmentStart();
		// if the contig and position are the same, don't emit the read
		if (r1Contig.equals(r2Contig) && r1Pos==r2Pos) 
			continue;
		
		Collections.addAll(line, r2Contig, Integer.toString(r2Pos), Integer.toString(r.getMappingQuality()), Boolean.toString(!r.isSecondaryAlignment()));
		for (String tagName: tagNames) {
			line.add(getTagValueAsString(r1, tagName));
			line.add(getTagValueAsString(r, tagName));
		}
		writer.println(StringUtils.join(line, "\t"));
	}		
}
 
Example 19
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testIntronicCorrectUTRWrong () {
	// read on the negative strand
	SAMRecord r = getFakeRecord(testBAMFile, 91, 100, true);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Negative strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, true, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector);
	String GE = r.getStringAttribute("GE");
	String GS = r.getStringAttribute("GS");
	String XF = r.getStringAttribute("XF");

	// names always come out alphabetically sorted.
	Assert.assertNull(GE);
	Assert.assertNull(GS);

	Assert.assertEquals(XF, LocusFunction.INTRONIC.name());
}
 
Example 20
Source File: BamSlicerApplication.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static void writeToSlice(@NotNull SAMFileWriter writer, @NotNull CloseableIterator<SAMRecord> iterator) {
    String contig = "";
    while (iterator.hasNext()) {
        SAMRecord record = iterator.next();
        if (record.getContig() != null && !contig.equals(record.getContig())) {
            contig = record.getContig();
            LOGGER.info("Reading contig: {}", contig);
        }
        writer.addAlignment(record);
    }
    iterator.close();
}