Java Code Examples for htsjdk.samtools.SAMRecord#getStringAttribute()

The following examples show how to use htsjdk.samtools.SAMRecord#getStringAttribute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SingleCellRnaSeqMetricsCollector.java    From Drop-seq with MIT License 7 votes vote down vote up
RnaSeqMetricsCollector getRNASeqMetricsCollector(final String cellBarcodeTag, final List<String> cellBarcodes, final File inBAM,
  		final RnaSeqMetricsCollector.StrandSpecificity strand, final double rRNAFragmentPCT, final int readMQ,
  		final File annotationsFile, final File rRNAIntervalsFile) {

  	CollectorFactory factory = new CollectorFactory(inBAM, strand, rRNAFragmentPCT, annotationsFile, rRNAIntervalsFile);
RnaSeqMetricsCollector collector=  factory.getCollector(cellBarcodes);
List<SAMReadGroupRecord> rg = factory.getReadGroups(cellBarcodes);

      // iterate by cell barcodes.  Skip all the reads without cell barcodes.
CloseableIterator<SAMRecord> iter = getReadsInTagOrder (inBAM, cellBarcodeTag, rg, cellBarcodes, readMQ);

      ProgressLogger p = new ProgressLogger(log, 1000000, "Accumulating metrics");
while (iter.hasNext()) {
	SAMRecord r = iter.next();
	String cellBarcode = r.getStringAttribute(cellBarcodeTag);
	r.setAttribute("RG", cellBarcode);
          p.record(r);
   	collector.acceptRecord(r, null);
}

collector.finish();
return (collector);
  }
 
Example 2
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// One CODING read (wrong strand, no other gene models overlapping)
public void testCodingReadWrongStrand () {
	SAMRecord r = getFakeRecord(testBAMFile, 2, 8, true);
	boolean negStrandFlag = r.getReadNegativeStrandFlag();
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String geneTagged=r.getStringAttribute("GE");
	String strandTagged=r.getStringAttribute("GS");

	Assert.assertNull(geneTagged);
	Assert.assertNull(strandTagged);
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTERGENIC.name());
}
 
Example 3
Source File: TagOrderIteratorTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test(enabled=true)
public void testCellSorting() {
      final Iterator<SAMRecord> toi = getTagOrderIterator(IN_FILE, "ZC");
 int counter=0;
 
 String [] cellOrder={"ATCAGGGACAGA","ATCAGGGACAGA","ATCAGGGACAGA","ATCAGGGACAGA","ATCAGGGACAGA","ATCAGGGACAGA","TGGCGAAGAGAT", "TGGCGAAGAGAT","TGGCGAAGAGAT","TGGCGAAGAGAT","TGGCGAAGAGAT","TGGCGAAGAGAT"};
 
 
 while (toi.hasNext()) {
  SAMRecord r = toi.next();
  r.getReadName();
  String cellName = r.getStringAttribute("ZC");
  
  String expectedName = cellOrder[counter];
  Assert.assertEquals(cellName, expectedName);
  counter++;
 }
}
 
Example 4
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
// 	One UTR read (wrong strand, no other gene models overlapping)
public void testUTRReadWrongStrand () {
	SAMRecord r = getFakeRecord(testBAMFile, 91, 95, true);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String GE=r.getStringAttribute("GE");
	String GS=r.getStringAttribute("GS");
	String XF = r.getStringAttribute("XF");

	Assert.assertNull(GE);
	Assert.assertNull(GS);
	Assert.assertEquals(XF, LocusFunction.INTERGENIC.name());
}
 
Example 5
Source File: AggregatedTagOrderIteratorTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test (enabled=true)
public void testGetCellBatch() {
	File f = new File("testdata/org/broadinstitute/transcriptome/barnyard/5cell3gene.bam");
	String cellTag = "ZC";
       final Iterator<List<SAMRecord>> iter = filterSortAndGroupByTagsAndQuality(f, cellTag);

	List<String>sortingTags = new ArrayList<String>();
	
	sortingTags.add(cellTag);

	while (iter.hasNext()) {
		List<SAMRecord> recs=iter.next();
		SAMRecord r = recs.iterator().next();
		int setSize = recs.size();
		String cell = r.getStringAttribute(cellTag);
		int expectedSize = getCellBatchExpectedSize(cell);
		
		Assert.assertTrue(testAllRecordsSamTags(recs, sortingTags, expectedSize, setSize));
		
		Assert.assertEquals(expectedSize, setSize);
	}
       CloserUtil.close(iter);
}
 
Example 6
Source File: DetectBeadSubstitutionErrors.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Repairs the cell barcode, which can mean one of the following:
 * 1) If the cell barcode is ambiguous and you're filtering ambiguous, return null
 * 2) If the cell barcode is the smaller neighbor of the pair, set the tag to be the larger neighbor
 * 2a) If using a different output cell barcode, tag each read with the new tag and either the unchanged cell barcode or the new neighbor.
 * @param r The input read
 * @param result The cell Barcode collapse result so we can replace smaller barcodes with larger ones to merge them.
 * @return The modified read, or null if the read should be filtered from the data.
 */
private SAMRecord repairBarcode (final SAMRecord r, final BottomUpCollapseResult result) {
	String cellBarcode=r.getStringAttribute(this.CELL_BARCODE_TAG);
	// check filter first as it's faster.
	boolean ambiguous=result.isAmbiguousBarcode(cellBarcode);
	// if you're filtering ambiguous, return no read.
	if (this.FILTER_AMBIGUOUS && ambiguous) return null;
	// if you're not filtering ambiguous, return the read unmodified.
	if (this.FILTER_AMBIGUOUS && ambiguous) return r;
	String newCellBarcode=result.getLargerRelatedBarcode(cellBarcode);
	// if not null, we have something to repair.
	if (newCellBarcode!=null)
		r.setAttribute(this.OUT_CELL_BARCODE_TAG, newCellBarcode);
	else // add the cell barocode tag with the old value.  If you're writing out to a new tag this is important.
		r.setAttribute(this.OUT_CELL_BARCODE_TAG, cellBarcode);
	return r;
}
 
Example 7
Source File: GeneStrandFilteringIterator.java    From Drop-seq with MIT License 6 votes vote down vote up
@Override
/**
 * For a given record, if the gene strand tag does not match the read strand tag, reject the record.
 * If there are multiple gene strand tags, they must all agree with the read strand tag.
 * @param rec
 * @return
 */
public boolean filterOut(final SAMRecord rec) {
	String geneStrand = rec.getStringAttribute(strandTag);
	if (geneStrand==null) return false;

	String [] strands = geneStrand.split(",");
	String readStrandString = Utils.strandToString(!rec.getReadNegativeStrandFlag());
	for (String s: strands)
		if (!s.equals(readStrandString)) return true;
	return false;
}
 
Example 8
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testIntergenicCorrectIntronicWrong () {
	SAMRecord r = getFakeRecord(testBAMFile, 50, 60, false);
	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, true, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);
	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String geneTag = r.getStringAttribute("GE");
	String geneStrand = r.getStringAttribute("GS");

	Assert.assertNull(geneTag);
	Assert.assertNull(geneStrand);
	// Assert.assertEquals(geneTag, gene.getName());
	// Assert.assertEquals(geneStrand, "-");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTERGENIC.name());
}
 
Example 9
Source File: AggregatedTagOrderIteratorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test(enabled=true)
public void testCellSorting() {
       final Iterator<List<SAMRecord>> groupingIterator = filterSortAndGroupByTags(IN_FILE, "ZC");
	int counter=0;
	
	String [] cellOrder={"ATCAGGGACAGA", "TGGCGAAGAGAT"};
	Map<String, Integer> cellCounts = new HashMap<String, Integer>();
	cellCounts.put("ATCAGGGACAGA", 6);
	cellCounts.put("TGGCGAAGAGAT", 6);
	
	
	 while (groupingIterator.hasNext()) {
		  Collection<SAMRecord> r = groupingIterator.next();
		  int size = r.size();
		  SAMRecord rec = r.iterator().next();
		  
		  String cell = rec.getStringAttribute("ZC");
		  System.out.println("Cell [" + cell +"] size [" + size +"]");
		  
		  String expectedGene = cellOrder[counter];
		  int expectedSize = cellCounts.get(cell);
		  
		  Assert.assertEquals(cell, expectedGene);
		  Assert.assertEquals(size, expectedSize);
		  counter++;
	 }
}
 
Example 10
Source File: AggregatedTagOrderIteratorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test(enabled=true)
public void testGeneCellSorting() {
       final Iterator<List<SAMRecord>> groupingIterator = filterSortAndGroupByTags(IN_FILE, "GE", "ZC");
	int counter=0;
	  
	
	String [] geneOrder={"CHUK", "CHUK", "NKTR", "NKTR", "SNRPA1", "SNRPA1"};
	String [] cellOrder={"ATCAGGGACAGA", "TGGCGAAGAGAT", "ATCAGGGACAGA", "TGGCGAAGAGAT", "ATCAGGGACAGA", "TGGCGAAGAGAT"};
	int [] expectedSize = {2,2,2,2,2,2};
	
	while (groupingIterator.hasNext()) {
		Collection<SAMRecord> r = groupingIterator.next();
		int size = r.size();
		SAMRecord rec = r.iterator().next();
		
		String readName = rec.getReadName();
		String cellName = rec.getStringAttribute("ZC");
		String geneName = rec.getStringAttribute("GE");
		
		System.out.println("Cell [" + cellName +"] geneName [" + geneName +"] size [" + size +"]");

		Assert.assertEquals(cellName, cellOrder[counter]);
		Assert.assertEquals(geneName, geneOrder[counter]);
		Assert.assertEquals(size, expectedSize[counter]);
		counter++;
	}
}
 
Example 11
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testUTRCorrectIntronicWrong () {
	// read on the negative strand
	SAMRecord r = getFakeRecord(testBAMFile, 91, 100, false);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Negative strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, true, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector);
	String GE = r.getStringAttribute("GE");
	String GS = r.getStringAttribute("GS");
	String XF = r.getStringAttribute("XF");

	// names always come out alphabetically sorted.
	Assert.assertEquals(GE, gene.getName());
	Assert.assertEquals(GS, "+");
	Assert.assertEquals(XF, LocusFunction.UTR.name());
}
 
Example 12
Source File: SortedSAMWriter.java    From abra2 with MIT License 5 votes vote down vote up
public MateKey getOriginalReadInfo(SAMRecord read) {
		int pos = read.getAlignmentStart();
		boolean isUnmapped = read.getReadUnmappedFlag();
		boolean isRc = read.getReadNegativeStrandFlag();
		
		String yo = read.getStringAttribute("YO");
		
		if (yo != null) {
			if (yo.startsWith("N/A")) {
				// Original alignment was unmapped
				isUnmapped = true;
//				isRc = false;
				// Orientation is forced to be opposite of mate during realignment
				// regardless of the original alignment.
				isRc = !read.getMateNegativeStrandFlag();
			} else {
				String[] fields = yo.split(":");
				pos = Integer.parseInt(fields[1]);
				isUnmapped = false;
				isRc = fields[2].equals("-") ? true : false;
			}
		}
		
		int readNum = read.getFirstOfPairFlag() ? 1 : 2;
		
		return new MateKey(read.getReadName(), pos, isUnmapped, isRc, readNum, read.getAlignmentStart());
	}
 
Example 13
Source File: SNPUMICellReadIteratorWrapperTest.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * Integration style test to see if I can throw null pointers, etc with a bit of real data.
 */
@Test(enabled=true)
public void processReads() {
	List<String> cellBarcodeList = ParseBarcodeFile.readCellBarcodeFile(cellBCFile);
	IntervalList loci = IntervalList.fromFile(snpIntervals);

       SamReader reader = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE).open(bamFile);
       // Filter records before sorting, to reduce I/O
       final MissingTagFilteringIterator filteringIterator =
               new MissingTagFilteringIterator(reader.iterator(), GENE_NAME_TAG, cellBarcodeTag, molBCTag);

       MapQualityFilteredIterator filteringIterator2 = new MapQualityFilteredIterator(filteringIterator, readMQ, true);

       GeneFunctionIteratorWrapper gfteratorWrapper = new GeneFunctionIteratorWrapper(filteringIterator2, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG, false, STRAND_STRATEGY, LOCUS_FUNCTION_LIST);

       SNPUMICellReadIteratorWrapper snpumiCellReadIterator = new SNPUMICellReadIteratorWrapper(gfteratorWrapper, loci, cellBarcodeTag, cellBarcodeList, GENE_NAME_TAG, snpTag, readMQ);

       // create comparators in the order the data should be sorted
       final MultiComparator<SAMRecord> multiComparator = new MultiComparator<>(
               new StringTagComparator(cellBarcodeTag),
               new StringTagComparator(GENE_NAME_TAG),
               new StringTagComparator(molBCTag));

       final CloseableIterator<SAMRecord> sortingIterator =
               SamRecordSortingIteratorFactory.create(reader.getFileHeader(), snpumiCellReadIterator, multiComparator, null);

       Assert.assertTrue(sortingIterator.hasNext());
       SAMRecord nextRead = sortingIterator.next();
       List<SAMTagAndValue> tagValues= nextRead.getAttributes();
       String snpTagValue = nextRead.getStringAttribute(this.snpTag);
	CloserUtil.close(snpumiCellReadIterator);
}
 
Example 14
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testIntronicCorrectCodingWrong () {
	// read on the negative strand
	SAMRecord r = getFakeRecord(testBAMFile, 91, 100, true);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 100, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Negative strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, true, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	String gn = r.getStringAttribute("gn");
	String gs = r.getStringAttribute("gs");
	String gf = r.getStringAttribute("gf");

	// names always come out alphabetically sorted.
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName()+","+gene2.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+,-");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.CODING.name() + "," + LocusFunction.INTRONIC.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTRONIC.name());
}
 
Example 15
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testIntronicCorrectUTRWrong () {
	// read on the negative strand
	SAMRecord r = getFakeRecord(testBAMFile, 91, 100, true);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Negative strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, true, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector, false);
	String gn = r.getStringAttribute("gn");
	String gs = r.getStringAttribute("gs");
	String gf = r.getStringAttribute("gf");

	// names always come out alphabetically sorted.
	Assert.assertEquals(r.getStringAttribute("gn"), gene.getName()+","+gene2.getName());
	Assert.assertEquals(r.getStringAttribute("gs"), "+,-");
	Assert.assertEquals(r.getStringAttribute("gf"), LocusFunction.UTR.name() + "," + LocusFunction.INTRONIC.name());
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.INTRONIC.name());
}
 
Example 16
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testIntronicCorrectUTRWrong () {
	// read on the negative strand
	SAMRecord r = getFakeRecord(testBAMFile, 91, 100, true);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 90, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Negative strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, true, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector);
	String GE = r.getStringAttribute("GE");
	String GS = r.getStringAttribute("GS");
	String XF = r.getStringAttribute("XF");

	// names always come out alphabetically sorted.
	Assert.assertNull(GE);
	Assert.assertNull(GS);

	Assert.assertEquals(XF, LocusFunction.INTRONIC.name());
}
 
Example 17
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testIntronicCorrectCodingWrong () {
	// read on the negative strand
	SAMRecord r = getFakeRecord(testBAMFile, 91, 100, true);

	// gene with 2 exons, 1 coding from 1-10, one UTR from 91-100.  Positive strand gene.
	GeneFromGTF gene = new GeneFromGTF(r.getContig(), 1, 100, false, "A", "coding", "A", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx = gene.addTranscript("trans1", 1, 100, 1, 100, 2, "trans1", "trans1", "coding");
	tx.addExon(1, 10);
	tx.addExon(91, 100);
	OverlapDetector<Gene> geneOverlapDetector = new OverlapDetector<>(0, 0);
	geneOverlapDetector.addLhs(gene, gene);

	// gene with 2 exons, 1 coding from 50-60, 1 coding from 150-160. Negative strand gene.
	GeneFromGTF gene2 = new GeneFromGTF(r.getContig(), 50, 160, true, "B", "coding", "B", "coding", 1);
	final GeneFromGTF.TranscriptFromGTF tx2 = gene2.addTranscript("trans2", 50, 160, 50, 150, 2, "trans2", "trans2", "coding");
	tx2.addExon(50, 60);
	tx2.addExon(150, 160);
	geneOverlapDetector.addLhs(gene2, gene2);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	List <Gene> genes = new ArrayList <> (geneOverlapDetector.getAll());
	Collections.sort(genes, TagReadWithGeneFunction.GENE_NAME_COMPARATOR);

	r=tagger.setAnnotations(r, geneOverlapDetector);
	String ge = r.getStringAttribute("GE");
	String gs = r.getStringAttribute("GS");
	String xf = r.getStringAttribute("XF");

	Assert.assertNull(ge);
	Assert.assertNull(gs);

	Assert.assertEquals(xf, LocusFunction.INTRONIC.name());
}
 
Example 18
Source File: Utils.java    From Drop-seq with MIT License 4 votes vote down vote up
public static String getCellBC (final SAMRecord r, final String cellBCTag) {
	String currentCell = r.getStringAttribute(cellBCTag);
	if (currentCell==null) return (DEFAULT_CELL_BARCODE);
	return (currentCell);
}
 
Example 19
Source File: SimpleAlleleCounter.java    From abra2 with MIT License 4 votes vote down vote up
private IndelInfo checkForIndelAtLocus(SAMRecord read, int refPos) {
		IndelInfo elem = null;
		
//		if (refPos == 105243047 && read.getReadName().equals("D7T4KXP1:400:C5F94ACXX:5:2302:20513:30410")) {
//			System.out.println("bar");
//		}
		
		String contigInfo = read.getStringAttribute("YA");
		if (contigInfo != null) {
			// Get assembled contig info.
			String[] fields = contigInfo.split(":");
			int contigPos = Integer.parseInt(fields[1]);
			
			Cigar contigCigar = TextCigarCodec.decode(fields[2]);
			
			// Check to see if contig contains indel at current locus
			elem = checkForIndelAtLocus(contigPos, contigCigar, refPos);
			
			if (elem != null) {
				// Now check to see if this read supports the indel
				IndelInfo readElem = checkForIndelAtLocus(read.getAlignmentStart(),
						read.getCigar(), refPos);
				
				// Allow partially overlapping indels to support contig
				// (Should only matter for inserts)
				if (readElem == null || readElem.getCigarElement().getOperator() != elem.getCigarElement().getOperator()) {
					// Read element doesn't match contig indel
					elem = null;
				} else {
					elem.setReadIndex(readElem.getReadIndex());
					
					// If this read overlaps the entire insert, capture the bases.
					if (elem.getCigarElement().getOperator() == CigarOperator.I) {

						if (elem.getCigarElement().getLength() == readElem.getCigarElement().getLength()) {
					
							String insertBases = read.getReadString().substring(readElem.getReadIndex(), readElem.getReadIndex()+readElem.getCigarElement().getLength());
							elem.setInsertBases(insertBases);
						} else if (readElem.getCigarElement().getLength() < elem.getCigarElement().getLength()) {
							
							int lengthDiff = elem.getCigarElement().getLength() - readElem.getCigarElement().getLength();
							
							if (readElem.getReadIndex() == 0) {
								elem.setReadIndex(readElem.getReadIndex() - lengthDiff);
							} else if (readElem.getReadIndex() == read.getReadLength()-1) {
								elem.setReadIndex(readElem.getReadIndex() + lengthDiff);
							}
						}
					}
				}
			}
		}
		
		return elem;
	}
 
Example 20
Source File: EstimateLibraryComplexity.java    From picard with MIT License 4 votes vote down vote up
public static int getReadBarcodeValue(final SAMRecord record, final String tag) {
    if (null == tag) return 0;
    final String attr = record.getStringAttribute(tag);
    if (null == attr) return 0;
    else return attr.hashCode();
}