Java Code Examples for htsjdk.samtools.SAMRecord#setAttribute()

The following examples show how to use htsjdk.samtools.SAMRecord#setAttribute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IntervalTagComparatorTest.java    From Drop-seq with MIT License 6 votes vote down vote up
private List<SAMRecord> getRecords () {
	SAMRecordSetBuilder builder  = new SAMRecordSetBuilder();

	// chromosome 2 and chromosome 10
	SAMRecord r1 = builder.addFrag("read1", 1, 1, false);
	Interval i1 = new Interval("2", 1, 10, true, null);
	r1.setAttribute(this.intervalTag, IntervalTagComparator.toString(i1));

	SAMRecord r2 = builder.addFrag("read1", 1, 1, false);
	Interval i2 = new Interval("10", 1, 10, true, null);
	r2.setAttribute(this.intervalTag, IntervalTagComparator.toString(i2));

	List<SAMRecord> result = new ArrayList<>();
	result.add(r1);
	result.add(r2);
	return result;
}
 
Example 2
Source File: TagReadWithInterval.java    From Drop-seq with MIT License 6 votes vote down vote up
private SAMRecord tagRead(final SAMRecord r, final OverlapDetector<Interval> od) {
	Set<Interval>intervals = new HashSet<>();
	List<AlignmentBlock> blocks = r.getAlignmentBlocks();
	for (AlignmentBlock b: blocks) {
		int refStart =b.getReferenceStart();
		int refEnd = refStart+b.getLength()-1;
		Interval v = new Interval(r.getReferenceName(), refStart, refEnd);
		Collection<Interval> blockResult = od.getOverlaps(v);
		intervals.addAll(blockResult);
	}
	String tagName = getIntervalName(intervals);
	if (tagName!=null)
		r.setAttribute(this.TAG, tagName);
	else
		r.setAttribute(this.TAG, null);
	return (r);

}
 
Example 3
Source File: SNPUMICellReadIteratorWrapper.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Check if a read overlaps any SNPs in the OverlapDetector.  Tag reads with SNPs.
 * If more than 1 SNP tags a read, make a read for each SNP.
 * Simplified since data goes through GeneFunctionIteratorWrapper to take care of how reads/genes interact.
 */
private void processSNP (final SAMRecord r) {
	List<AlignmentBlock> blocks = r.getAlignmentBlocks();

	Collection<String> snps = new HashSet<>();

	for (AlignmentBlock b: blocks) {
		int start = b.getReferenceStart();
		int end = start + b.getLength() -1;

		Interval i = new Interval(r.getReferenceName(), start, end);
		Collection<Interval> overlaps = this.snpIntervals.getOverlaps(i);
		for (Interval o: overlaps)
			snps.add(IntervalTagComparator.toString(o));
	}

	// 1 read per SNP.
	for (String snp:snps) {
		SAMRecord rr = Utils.getClone(r);
		rr.setAttribute(this.snpTag, snp);
		queueRecordForOutput(rr);
	}
}
 
Example 4
Source File: MarkDuplicatesTest.java    From picard with MIT License 6 votes vote down vote up
@Test
public void testWithBarcodeComplex() {
    final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
    tester.getSamRecordSetBuilder().setReadLength(68);
    final String readNameOne = "RUNID:1:1:15993:13361";
    final String readNameTwo = "RUNID:2:2:15993:13362";
    final String readNameThree = "RUNID:3:3:15993:13362";

    // first two reads have the same barcode, third read has a different barcode
    tester.addMatePair(readNameOne, 2, 41212324, 41212310, false, false, false, false, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY);
    tester.addMatePair(readNameTwo, 2, 41212324, 41212310, false, false, true, true, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY); // same barcode as the first
    tester.addMatePair(readNameThree, 2, 41212324, 41212310, false, false, false, false, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY);

    final String barcodeTag = "BC";
    for (final SAMRecord record : new IterableAdapter<>(tester.getRecordIterator())) {
        if (record.getReadName().equals(readNameOne) || record.getReadName().equals(readNameTwo)) {
            record.setAttribute(barcodeTag, "AAAA");
        }
        else if (record.getReadName().equals(readNameThree)) {
            record.setAttribute(barcodeTag, "CCCC");
        }
    }
    tester.addArg("BARCODE_TAG=" + barcodeTag);
    tester.runTest();
}
 
Example 5
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 6 votes vote down vote up
private void retagBarcodedReads (Iterator<SAMRecord> informativeRecs, ObjectCounter<String> barcodeCounts, Map<String, String> collapseMap, boolean dropSmallCounts, SAMFileWriter writer,
		String collapseTag, String outTag) {
	
	Set<String> expectedBarcodes = null;
	// already validated that if dropSmallCounts is true, then the minNumObservations > 1.
	if (dropSmallCounts)
		// use all the remaining barcodes that have counts.
		expectedBarcodes = new HashSet<>(barcodeCounts.getKeys());
	while (informativeRecs.hasNext()) {
		SAMRecord r = informativeRecs.next();
		String tagValue = r.getStringAttribute(collapseTag);
		// if the tag was not set, then don't set it.
		if (tagValue!=null) {
			// tag was set,
			// if the tagValue is not in the expected barocde list and the barcode list is populated, then don't add this read and short circuit to next read.
			if (expectedBarcodes!=null && !expectedBarcodes.contains(tagValue))
				continue;
			// tag was set, set it.
			// is it in the map?  If so, update the tag value.
			if (collapseMap.containsKey(tagValue))
				tagValue = collapseMap.get(tagValue);
			r.setAttribute(outTag, tagValue);
		}
		writer.addAlignment(r);
	}		
}
 
Example 6
Source File: CellBarcodeFilteringIteratorTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void filterOut2() {
	Iterator<SAMRecord> underlyingIterator = Collections.emptyIterator();
	Collection<String> cellBarcodesToKeep = null;
	String cellBCTag ="XC";

	CellBarcodeFilteringIterator f = new CellBarcodeFilteringIterator(underlyingIterator, cellBCTag, cellBarcodesToKeep);
	SAMRecordSetBuilder b = new SAMRecordSetBuilder();
	// second argument is the contig index which is 0 based.  So contig index=0 -> chr1.  index=2 -> chr3, etc.
	b.addFrag("1", 0, 1, false);
	SAMRecord r1 = b.getRecords().iterator().next();

	r1.setAttribute(cellBCTag, "CELL1");
	Assert.assertFalse(f.filterOut(r1));

	r1.setAttribute(cellBCTag, "CELL2");
	Assert.assertFalse(f.filterOut(r1));

}
 
Example 7
Source File: Read.java    From cramtools with Apache License 2.0 6 votes vote down vote up
SAMRecord secondSAMRecord(SAMFileHeader header) {
	SAMRecord r = new SAMRecord(header);
	r.setReadName(evidenceRecord.getReadName());
	r.setReferenceName(evidenceRecord.Chromosome);
	r.setAlignmentStart(Integer.valueOf(evidenceRecord.MateOffsetInReference) + 1);
	r.setMappingQuality(Integer.valueOf(evidenceRecord.ScoreAllele0));
	r.setReadPairedFlag(true);
	r.setReadUnmappedFlag(false);
	r.setReadNegativeStrandFlag(negative);
	r.setFirstOfPairFlag(evidenceRecord.side == 1);
	r.setSecondOfPairFlag(!r.getFirstOfPairFlag());

	r.setCigar(new Cigar(Utils.toCigarOperatorList(secondHalf.samCigarElements)));

	r.setReadBases(Utils.toByteArray(secondHalf.readBasesBuf));
	r.setBaseQualities(Utils.toByteArray(secondHalf.readScoresBuf));

	r.setAttribute("GC", Utils.toString(secondHalf.gcList));
	r.setAttribute("GS", Utils.toString(secondHalf.gsBuf));
	r.setAttribute("GQ", SAMUtils.phredToFastq(Utils.toByteArray(secondHalf.gqBuf)));

	return r;
}
 
Example 8
Source File: OrientationTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public static SAMRecord makeTestRecord(SAMFileHeader sfh, int flags, String sequence, String mateSequence, int pos, int matePos, String readGroup) {
  final SAMRecord sr = new SAMRecord(sfh);
  sr.setFlags(flags);
  sr.setReferenceName(sequence);
  sr.setMateReferenceName(mateSequence);
  sr.setAlignmentStart(pos);
  sr.setMateAlignmentStart(matePos);
  sr.setAttribute("RG", readGroup);
  if (pos < matePos) {
    sr.setInferredInsertSize(matePos - pos + 5);
  } else {
    sr.setInferredInsertSize(matePos - pos  - 5); //-1 * (pos - matePos + 5));
  }
  return sr;
}
 
Example 9
Source File: UmiUtil.java    From picard with MIT License 5 votes vote down vote up
/**
 * Set molecular identifier tag of record using UMI and alignment start position along with top and bottom strand information
 * @param rec SAMRecord to set molecular index of
 * @param assignedUmi Assigned or inferred UMI to use in the molecular index tag
 * @param molecularIdentifierTag SAM tag to use as molecular identifier, if null no modification to the record will be made
 * @param duplexUmis Treat UMI as duplex, if true /A and /B will be added to denote top and bottom strands respectively
 */
static void setMolecularIdentifier(final SAMRecord rec, final String assignedUmi, final String molecularIdentifierTag, final boolean duplexUmis) {

    if (molecularIdentifierTag == null) {
        return;
    }

    final StringBuilder molecularIdentifier = new StringBuilder();
    molecularIdentifier.append(rec.getContig());
    molecularIdentifier.append(CONTIG_SEPARATOR);
    molecularIdentifier.append(rec.getReadNegativeStrandFlag() ? rec.getAlignmentStart() : rec.getMateAlignmentStart());
    molecularIdentifier.append(UMI_NAME_SEPARATOR);
    molecularIdentifier.append(assignedUmi);

    if (duplexUmis) {
        // Reads whose strand position can be determined will have their
        // molecularIdentifier set to include an identifier appended that
        // indicates top or bottom strand.
        ReadStrand strand = getStrand(rec);
        switch (strand) {
            case TOP:
                molecularIdentifier.append(TOP_STRAND_DUPLEX);
                break;
            case BOTTOM:
                molecularIdentifier.append(BOTTOM_STRAND_DUPLEX);
                break;
            default:
                // If we can't determine strand position nothing
                // is appended to the molecularIdentifier.
                break;
        }
    }
    rec.setAttribute(molecularIdentifierTag, molecularIdentifier.toString());
}
 
Example 10
Source File: DetectBeadSynthesisErrors.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * @return null if the read should not be included in the output BAM.
 */
SAMRecord padCellBarcodeFix (final SAMRecord r, final Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions, final Map<String, String> intendedSequenceMap, final String cellBarcodeTag, final String molecularBarcodeTag, final double extremeBaseRatio) {
	String cellBC=r.getStringAttribute(cellBarcodeTag);

	BeadSynthesisErrorData bsed = errorBarcodesWithPositions.get(cellBC);
	if (bsed==null) return (r); // no correction data, no fix.

	// we're only going to fix cells where there's one or more synthesis errors
	BeadSynthesisErrorType bset = bsed.getErrorType(extremeBaseRatio, this.detectPrimerTool, this.EDIT_DISTANCE);
	if (bset==BeadSynthesisErrorType.NO_ERROR) return (r); // no error, return.
	// has an error, not a synthesis error...
	if (bset!=BeadSynthesisErrorType.SYNTH_MISSING_BASE)
		return (null);

	// has a synthesis error
	int polyTErrorPosition = bsed.getPolyTErrorPosition(this.EXTREME_BASE_RATIO);
	int umiLength = bsed.getBaseLength();
	int numErrors= umiLength-polyTErrorPosition+1;
	// if there are too many errors, or the errors aren't all polyT, return null.
	if (numErrors > MAX_NUM_ERRORS)
		return null;

	// apply the fix and return the fixed read.
	String umi = r.getStringAttribute(molecularBarcodeTag);
	String cellBCFixed = padCellBarcode(cellBC, polyTErrorPosition, umiLength);
	String umiFixed = fixUMI(cellBC, umi, polyTErrorPosition);

	// if there's an intended sequence, use that instead of the default padded cell barcode.
	String intendedSeq = intendedSequenceMap.get(cellBC);
	if (intendedSeq!=null)
		cellBCFixed=intendedSeq;

	r.setAttribute(cellBarcodeTag, cellBCFixed);
	r.setAttribute(molecularBarcodeTag, umiFixed);
	return r;
}
 
Example 11
Source File: DefaultTaggingIterator.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
public SAMRecord next() {
	SAMRecord r= this.underlyingIterator.next();
	String v = r.getStringAttribute(collapseTag);
	if (v!=null)
		r.setAttribute(outTag, v);
	return (r);
}
 
Example 12
Source File: UmiGraph.java    From picard with MIT License 5 votes vote down vote up
/**
 * Creates a UmiGraph object
 * @param set Set of reads that have the same start-stop positions, these will be broken up by UMI
 * @param umiTag UMI tag used in the SAM/BAM/CRAM file ie. RX
 * @param molecularIdentifierTag  Molecular identifier tag used in the SAM/BAM/CRAM file ie. MI
 * @param allowMissingUmis Allow for missing UMIs
 * @param duplexUmis Whether or not to use duplex of single strand UMIs
 */
UmiGraph(final DuplicateSet set, final String umiTag, final String molecularIdentifierTag,
                final boolean allowMissingUmis, final boolean duplexUmis) {
    this.umiTag = umiTag;
    this.molecularIdentifierTag = molecularIdentifierTag;
    this.allowMissingUmis = allowMissingUmis;
    this.duplexUmis = duplexUmis;
    records = set.getRecords();

    // First ensure that all the reads have a UMI, if any reads are missing a UMI throw an exception unless allowMissingUmis is true
    for (final SAMRecord rec : records) {
        if (rec.getStringAttribute(umiTag) == null) {
            if (!allowMissingUmis) {
                throw new PicardException("Read " + rec.getReadName() + " does not contain a UMI with the " + umiTag + " attribute.");
            }
            rec.setAttribute(umiTag, "");
        }
    }

    // Count the number of times each UMI occurs
    umiCounts = records.stream().collect(Collectors.groupingBy(p -> UmiUtil.getTopStrandNormalizedUmi(p, umiTag, duplexUmis), counting()));

    // At first we consider every UMI as if it were its own duplicate set
    numUmis = umiCounts.size();
    umi = new String[numUmis];
    duplicateSetID = IntStream.rangeClosed(0, numUmis-1).toArray();

    int i = 0;
    for (final String key : umiCounts.keySet()) {
        umi[i] = key;
        i++;
    }
}
 
Example 13
Source File: DefaultSamFilterTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public void testVariantInvalids() {
  final SamFilterParams.SamFilterParamsBuilder builder = SamFilterParams.builder().excludeVariantInvalid(true);
  final DefaultSamFilter f = new DefaultSamFilter(builder.create());
  final DefaultSamFilter notf = new DefaultSamFilter(builder.invertFilters(true).create());
  final SAMRecord rec = new SAMRecord(new SAMFileHeader()); // Not unmapped but alignment position == 0
  rec.setAlignmentStart(0);
  assertFalse(f.acceptRecord(rec));
  assertFalse(f.acceptRecord(rec) == notf.acceptRecord(rec));
  rec.setAlignmentStart(1);
  assertTrue(f.acceptRecord(rec));
  assertFalse(f.acceptRecord(rec) == notf.acceptRecord(rec));
  rec.setAttribute("NH", 0);
  assertFalse(f.acceptRecord(rec));
  assertFalse(f.acceptRecord(rec) == notf.acceptRecord(rec));
}
 
Example 14
Source File: PreprocessingTools.java    From halvade with GNU General Public License v3.0 5 votes vote down vote up
public int streamElPrep(Reducer.Context context, String output, String rg, 
            int threads, SAMRecordIterator SAMit, 
            SAMFileHeader header, String dictFile, boolean updateRG, boolean keepDups, String RGID) throws InterruptedException, IOException, QualityException {
        long startTime = System.currentTimeMillis();
        String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "elprep", "");  
        String[] command = CommandGenerator.elPrep(bin, "/dev/stdin", output, threads, true, rg, null, !keepDups, customArgs);
//        runProcessAndWait(command);
        ProcessBuilderWrapper builder = new ProcessBuilderWrapper(command, null);
        builder.startProcess(true);        
        BufferedWriter localWriter = builder.getSTDINWriter();
        
        // write header
        final StringWriter headerTextBuffer = new StringWriter();
        new SAMTextHeaderCodec().encode(headerTextBuffer, header);
        final String headerText = headerTextBuffer.toString();
        localWriter.write(headerText, 0, headerText.length());
        
        
        SAMRecord sam;
        int reads = 0;
        while(SAMit.hasNext()) {
            sam = SAMit.next();
            if(updateRG)
                sam.setAttribute(SAMTag.RG.name(), RGID);
            String samString = sam.getSAMString();
            localWriter.write(samString, 0, samString.length());
            reads++;
        }
        localWriter.flush();
        localWriter.close();
                
        int error = builder.waitForCompletion();
        if(error != 0)
            throw new ProcessException("elPrep", error);
        long estimatedTime = System.currentTimeMillis() - startTime;
        Logger.DEBUG("estimated time: " + estimatedTime / 1000);
        if(context != null)
            context.getCounter(HalvadeCounters.TIME_ELPREP).increment(estimatedTime);
        return reads;
    }
 
Example 15
Source File: UmiUtilTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "brokenUmiProvider", expectedExceptions = PicardException.class)
public void testBrokenUmi(final String brokenUmi) {
    SAMFileHeader header = new SAMFileHeader();
    SAMRecord rec = new SAMRecord(header);

    rec.setAttribute("RX", brokenUmi);

    // This should throw an exception due to a broken UMI in rec
    UmiUtil.getTopStrandNormalizedUmi(rec, "RX", true);
}
 
Example 16
Source File: ReorderSam.java    From picard with MIT License 4 votes vote down vote up
/**
 * Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way
 * according to the newOrder mapping from dictionary index -> index.  Name is used for printing only.
 */
private void writeReads(final SAMFileWriter out,
                        final SAMRecordIterator it,
                        final Map<Integer, Integer> newOrder,
                        final String name) {
    long counter = 0;
    log.info("  Processing " + name);

    while (it.hasNext()) {
        counter++;
        final SAMRecord read = it.next();
        final int oldRefIndex = read.getReferenceIndex();
        final int oldMateIndex = read.getMateReferenceIndex();
        final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder);

        read.setHeader(out.getFileHeader());
        read.setReferenceIndex(newRefIndex);

        // read becoming unmapped
        if (oldRefIndex != NO_ALIGNMENT_REFERENCE_INDEX &&
                newRefIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
            read.setAlignmentStart(NO_ALIGNMENT_START);
            read.setReadUnmappedFlag(true);
            read.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
            read.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
        }

        final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder);
        if (oldMateIndex != NO_ALIGNMENT_REFERENCE_INDEX &&
                newMateIndex == NO_ALIGNMENT_REFERENCE_INDEX) { // mate becoming unmapped
            read.setMateAlignmentStart(NO_ALIGNMENT_START);
            read.setMateUnmappedFlag(true);
            read.setAttribute(SAMTag.MC.name(), null);      // Set the Mate Cigar String to null
        }
        read.setMateReferenceIndex(newMateIndex);

        out.addAlignment(read);
    }

    it.close();
    log.info("Wrote " + counter + " reads");
}
 
Example 17
Source File: BamTagCountingIteratorTest.java    From Drop-seq with MIT License 4 votes vote down vote up
@Test
public void testFilter() {
	Iterator<SAMRecord> underlyingIterator = Collections.emptyIterator();
	String tagName = "XC";
	// filter always returns false.
	BamTagCountingIterator f = new BamTagCountingIterator(underlyingIterator, tagName);


	SAMRecordSetBuilder b = new SAMRecordSetBuilder();
	// second argument is the contig index which is 0 based.  So contig index=0 -> chr1.  index=2 -> chr3, etc.
	b.addFrag("1", 0, 1, false);
	SAMRecord r1 = b.getRecords().iterator().next();
	r1.setAttribute(tagName, "A");
	// this one counts.
	Assert.assertFalse(f.filterOut(r1));

	// wrong tag set, correct tag is not set.
	r1.setAttribute("XZ", "A");
	r1.setAttribute(tagName, null);
	Assert.assertFalse(f.filterOut(r1));

	r1.setAttribute(tagName, "A");
	// this one counts.
	Assert.assertFalse(f.filterOut(r1));

	r1.setAttribute(tagName, "B");
	// this one counts.
	Assert.assertFalse(f.filterOut(r1));

	r1.setAttribute(tagName, "C");
	// this one counts.
	Assert.assertFalse(f.filterOut(r1));

	ObjectCounter<String> expected = f.getCounts();
	expected.incrementByCount("A", 2);
	expected.incrementByCount("B", 1);
	expected.incrementByCount("C", 1);
	ObjectCounter<String> actual = f.getCounts();

	Assert.assertEquals(actual, expected);



}
 
Example 18
Source File: FilterBamByTagTest.java    From Drop-seq with MIT License 4 votes vote down vote up
@Test
public void filterReadTest() {
	SAMRecord readHasAttribute = new SAMRecord(null);
	String tag = "XT";
	readHasAttribute.setAttribute(tag, "1");

	Set<String> values = new HashSet<>();
	values.add("1");

	SAMRecord readNoAttribute = new SAMRecord(null);

	FilterBamByTag t = new FilterBamByTag();
	// read has attribute, accept any value, want to retain read.
	boolean flag1 = t.filterRead(readHasAttribute, tag, null, true, null, false);
	Assert.assertFalse(flag1);

	// read has attribute, accept any value, want to filter read.
	boolean flag2 = t.filterRead(readHasAttribute, tag, null, false, null, false);
	Assert.assertTrue(flag2);

	// read has attribute, accept certain value, want to retain read.
	boolean flag3 = t.filterRead(readHasAttribute, tag, values, true, null, false);
	Assert.assertFalse(flag3);

	// read has attribute, accept certain value, want to filter read.
	boolean flag4 = t.filterRead(readHasAttribute, tag, values, false, null, false);
	Assert.assertTrue(flag4);

	// read does not have attribute, accept any value, want to retain read.
	boolean flag5 = t.filterRead(readNoAttribute, tag, null, true, null, false);
	Assert.assertTrue(flag5);

	// read does not have attribute, accept any value, want to filter read.
	boolean flag6 = t.filterRead(readNoAttribute, tag, null, false, null, false);
	Assert.assertFalse(flag6);

	// read does not have attribute, accept certain value, want to retain read.
	boolean flag7 = t.filterRead(readNoAttribute, tag, values, true, null, false);
	Assert.assertTrue(flag7);

	// read does not have attribute, accept certain value, want to filter read.
	boolean flag8 = t.filterRead(readNoAttribute, tag, values, false, null, false);
	Assert.assertFalse(flag8);
	
	// test map quality filtering
	
	readHasAttribute.setMappingQuality(10);
	boolean flag9 = t.filterRead(readHasAttribute, tag, null, true, 10, false);
	Assert.assertFalse(flag9);
	boolean flag10 = t.filterRead(readHasAttribute, tag, null, true, 20, false);
	Assert.assertTrue(flag10);
	
	// want to test partial matching.
	SAMRecord readHasGene = new SAMRecord(null);
	String geneNameTag = "gn";
	readHasGene.setAttribute(geneNameTag, "A,B");

	Set<String> geneValues = new HashSet<> (Arrays.asList("A"));
	boolean flagExact =t.filterRead(readHasGene, geneNameTag, geneValues, true, 0, false);
	boolean flagPartial =t.filterRead(readHasGene, geneNameTag, geneValues, true, 0, true);
	// filter the exact match
	Assert.assertTrue(flagExact);
	// don't filter the partial match
	Assert.assertFalse(flagPartial);
	
}
 
Example 19
Source File: Evidence2SAM.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, ParseException {
	EvidenceRecordFileIterator iterator = new EvidenceRecordFileIterator(new File(args[0]));
	Read context = new Read();
	SAMFileHeader header = new SAMFileHeader();
	header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
	SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr10", 135534747);
	samSequenceRecord.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, iterator.assembly_ID);
	String readGroup = String.format("%s-%s", iterator.assembly_ID, iterator.chromosome);
	SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG, iterator.sample);
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_UNIT_TAG, readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.SEQUENCING_CENTER_TAG, "\"Complete Genomics\"");
	Date date = new SimpleDateFormat("yyyy-MMM-dd hh:mm:ss.S").parse(iterator.generatedAt);
	readGroupRecord.setAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG,
			new SimpleDateFormat("yyyy-MM-dd").format(date));
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_TAG, "\"Complete Genomics\"");
	header.addReadGroup(readGroupRecord);

	header.addSequence(samSequenceRecord);

	SAMFileWriterFactory f = new SAMFileWriterFactory();
	SAMFileWriter samWriter;
	if (args.length > 1)
		samWriter = f.makeBAMWriter(header, false, new File(args[1]));
	else
		samWriter = f.makeSAMWriter(header, false, System.out);
	int i = 0;
	long time = System.currentTimeMillis();
	DedupIterator dedupIt = new DedupIterator(iterator);

	while (dedupIt.hasNext()) {
		EvidenceRecord evidenceRecord = dedupIt.next();
		if (evidenceRecord == null)
			throw new RuntimeException();
		try {
			context.reset(evidenceRecord);
			context.parse();
		} catch (Exception e) {
			System.err.println("Failed on line:");
			System.err.println(evidenceRecord.line);
			throw new RuntimeException(e);
		}

		SAMRecord[] samRecords = context.toSAMRecord(header);
		for (SAMRecord samRecord : samRecords) {
			samRecord.setAttribute(SAMTag.RG.name(), readGroup);
			samWriter.addAlignment(samRecord);
		}

		i++;
		if (i % 1000 == 0) {
			if (System.currentTimeMillis() - time > 10 * 1000) {
				time = System.currentTimeMillis();
				System.err.println(i);
			}
		}

		if (i > 10000)
			break;
	}
	samWriter.close();
}
 
Example 20
Source File: AlignmentsTags.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags}
 * but allows for reference excerpt instead of a full reference sequence.
 * 
 * @param record
 *            SAMRecord to inject NM and MD tags into
 * @param ref
 *            reference sequence bases, may be a subsequence starting at
 *            refOffest
 * @param refOffset
 *            array offset of the reference bases, 0 is the same as the
 *            whole sequence. This value will be subtracted from the
 *            reference array index in calculations.
 * @param calcMD
 *            calculate MD tag if true
 * @param calcNM
 *            calculate NM tag if true
 */
public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset,
		final boolean calcMD, final boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	final Cigar cigar = record.getCigar();
	final List<CigarElement> cigarElements = cigar.getCigarElements();
	final byte[] seq = record.getReadBases();
	final int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	final StringBuilder str = new StringBuilder();

	final int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		final CigarElement ce = cigarElements.get(i);
		int j;
		final int length = ce.getLength();
		final CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < length; ++j) {
				final int z = y + j;

				if (refOffset + ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j - refOffset];

				if ((c1 == c2) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j - refOffset]);
					u = 0;
					++nm;
				}
			}
			if (j < length)
				break;
			x += length;
			y += length;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < length; ++j) {
				if (ref[x + j - refOffset] == 0)
					break;
				str.appendCodePoint(ref[x + j - refOffset]);
			}
			u = 0;
			if (j < length)
				break;
			x += length;
			nm += length;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += length;
			if (op == CigarOperator.INSERTION)
				nm += length;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += length;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}