htsjdk.samtools.SAMRecord#setAttribute

Source File: IntervalTagComparatorTest.java From Drop-seq with MIT License

6 votes

private List<SAMRecord> getRecords () {
	SAMRecordSetBuilder builder  = new SAMRecordSetBuilder();

	// chromosome 2 and chromosome 10
	SAMRecord r1 = builder.addFrag("read1", 1, 1, false);
	Interval i1 = new Interval("2", 1, 10, true, null);
	r1.setAttribute(this.intervalTag, IntervalTagComparator.toString(i1));

	SAMRecord r2 = builder.addFrag("read1", 1, 1, false);
	Interval i2 = new Interval("10", 1, 10, true, null);
	r2.setAttribute(this.intervalTag, IntervalTagComparator.toString(i2));

	List<SAMRecord> result = new ArrayList<>();
	result.add(r1);
	result.add(r2);
	return result;
}

Source File: TagReadWithInterval.java From Drop-seq with MIT License

6 votes

private SAMRecord tagRead(final SAMRecord r, final OverlapDetector<Interval> od) {
	Set<Interval>intervals = new HashSet<>();
	List<AlignmentBlock> blocks = r.getAlignmentBlocks();
	for (AlignmentBlock b: blocks) {
		int refStart =b.getReferenceStart();
		int refEnd = refStart+b.getLength()-1;
		Interval v = new Interval(r.getReferenceName(), refStart, refEnd);
		Collection<Interval> blockResult = od.getOverlaps(v);
		intervals.addAll(blockResult);
	}
	String tagName = getIntervalName(intervals);
	if (tagName!=null)
		r.setAttribute(this.TAG, tagName);
	else
		r.setAttribute(this.TAG, null);
	return (r);

}

Source File: SNPUMICellReadIteratorWrapper.java From Drop-seq with MIT License

6 votes

/**
 * Check if a read overlaps any SNPs in the OverlapDetector.  Tag reads with SNPs.
 * If more than 1 SNP tags a read, make a read for each SNP.
 * Simplified since data goes through GeneFunctionIteratorWrapper to take care of how reads/genes interact.
 */
private void processSNP (final SAMRecord r) {
	List<AlignmentBlock> blocks = r.getAlignmentBlocks();

	Collection<String> snps = new HashSet<>();

	for (AlignmentBlock b: blocks) {
		int start = b.getReferenceStart();
		int end = start + b.getLength() -1;

		Interval i = new Interval(r.getReferenceName(), start, end);
		Collection<Interval> overlaps = this.snpIntervals.getOverlaps(i);
		for (Interval o: overlaps)
			snps.add(IntervalTagComparator.toString(o));
	}

	// 1 read per SNP.
	for (String snp:snps) {
		SAMRecord rr = Utils.getClone(r);
		rr.setAttribute(this.snpTag, snp);
		queueRecordForOutput(rr);
	}
}

Source File: MarkDuplicatesTest.java From picard with MIT License

6 votes

@Test
public void testWithBarcodeComplex() {
    final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
    tester.getSamRecordSetBuilder().setReadLength(68);
    final String readNameOne = "RUNID:1:1:15993:13361";
    final String readNameTwo = "RUNID:2:2:15993:13362";
    final String readNameThree = "RUNID:3:3:15993:13362";

    // first two reads have the same barcode, third read has a different barcode
    tester.addMatePair(readNameOne, 2, 41212324, 41212310, false, false, false, false, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY);
    tester.addMatePair(readNameTwo, 2, 41212324, 41212310, false, false, true, true, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY); // same barcode as the first
    tester.addMatePair(readNameThree, 2, 41212324, 41212310, false, false, false, false, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY);

    final String barcodeTag = "BC";
    for (final SAMRecord record : new IterableAdapter<>(tester.getRecordIterator())) {
        if (record.getReadName().equals(readNameOne) || record.getReadName().equals(readNameTwo)) {
            record.setAttribute(barcodeTag, "AAAA");
        }
        else if (record.getReadName().equals(readNameThree)) {
            record.setAttribute(barcodeTag, "CCCC");
        }
    }
    tester.addArg("BARCODE_TAG=" + barcodeTag);
    tester.runTest();
}

Source File: CollapseTagWithContext.java From Drop-seq with MIT License

6 votes

private void retagBarcodedReads (Iterator<SAMRecord> informativeRecs, ObjectCounter<String> barcodeCounts, Map<String, String> collapseMap, boolean dropSmallCounts, SAMFileWriter writer,
		String collapseTag, String outTag) {
	
	Set<String> expectedBarcodes = null;
	// already validated that if dropSmallCounts is true, then the minNumObservations > 1.
	if (dropSmallCounts)
		// use all the remaining barcodes that have counts.
		expectedBarcodes = new HashSet<>(barcodeCounts.getKeys());
	while (informativeRecs.hasNext()) {
		SAMRecord r = informativeRecs.next();
		String tagValue = r.getStringAttribute(collapseTag);
		// if the tag was not set, then don't set it.
		if (tagValue!=null) {
			// tag was set,
			// if the tagValue is not in the expected barocde list and the barcode list is populated, then don't add this read and short circuit to next read.
			if (expectedBarcodes!=null && !expectedBarcodes.contains(tagValue))
				continue;
			// tag was set, set it.
			// is it in the map?  If so, update the tag value.
			if (collapseMap.containsKey(tagValue))
				tagValue = collapseMap.get(tagValue);
			r.setAttribute(outTag, tagValue);
		}
		writer.addAlignment(r);
	}		
}

Source File: CellBarcodeFilteringIteratorTest.java From Drop-seq with MIT License

6 votes

@Test
public void filterOut2() {
	Iterator<SAMRecord> underlyingIterator = Collections.emptyIterator();
	Collection<String> cellBarcodesToKeep = null;
	String cellBCTag ="XC";

	CellBarcodeFilteringIterator f = new CellBarcodeFilteringIterator(underlyingIterator, cellBCTag, cellBarcodesToKeep);
	SAMRecordSetBuilder b = new SAMRecordSetBuilder();
	// second argument is the contig index which is 0 based.  So contig index=0 -> chr1.  index=2 -> chr3, etc.
	b.addFrag("1", 0, 1, false);
	SAMRecord r1 = b.getRecords().iterator().next();

	r1.setAttribute(cellBCTag, "CELL1");
	Assert.assertFalse(f.filterOut(r1));

	r1.setAttribute(cellBCTag, "CELL2");
	Assert.assertFalse(f.filterOut(r1));

}

Source File: Read.java From cramtools with Apache License 2.0

6 votes

SAMRecord secondSAMRecord(SAMFileHeader header) {
	SAMRecord r = new SAMRecord(header);
	r.setReadName(evidenceRecord.getReadName());
	r.setReferenceName(evidenceRecord.Chromosome);
	r.setAlignmentStart(Integer.valueOf(evidenceRecord.MateOffsetInReference) + 1);
	r.setMappingQuality(Integer.valueOf(evidenceRecord.ScoreAllele0));
	r.setReadPairedFlag(true);
	r.setReadUnmappedFlag(false);
	r.setReadNegativeStrandFlag(negative);
	r.setFirstOfPairFlag(evidenceRecord.side == 1);
	r.setSecondOfPairFlag(!r.getFirstOfPairFlag());

	r.setCigar(new Cigar(Utils.toCigarOperatorList(secondHalf.samCigarElements)));

	r.setReadBases(Utils.toByteArray(secondHalf.readBasesBuf));
	r.setBaseQualities(Utils.toByteArray(secondHalf.readScoresBuf));

	r.setAttribute("GC", Utils.toString(secondHalf.gcList));
	r.setAttribute("GS", Utils.toString(secondHalf.gsBuf));
	r.setAttribute("GQ", SAMUtils.phredToFastq(Utils.toByteArray(secondHalf.gqBuf)));

	return r;
}

Source File: OrientationTest.java From rtg-tools with BSD 2-Clause "Simplified" License

5 votes

public static SAMRecord makeTestRecord(SAMFileHeader sfh, int flags, String sequence, String mateSequence, int pos, int matePos, String readGroup) {
  final SAMRecord sr = new SAMRecord(sfh);
  sr.setFlags(flags);
  sr.setReferenceName(sequence);
  sr.setMateReferenceName(mateSequence);
  sr.setAlignmentStart(pos);
  sr.setMateAlignmentStart(matePos);
  sr.setAttribute("RG", readGroup);
  if (pos < matePos) {
    sr.setInferredInsertSize(matePos - pos + 5);
  } else {
    sr.setInferredInsertSize(matePos - pos  - 5); //-1 * (pos - matePos + 5));
  }
  return sr;
}

Source File: UmiUtil.java From picard with MIT License

5 votes

/**
 * Set molecular identifier tag of record using UMI and alignment start position along with top and bottom strand information
 * @param rec SAMRecord to set molecular index of
 * @param assignedUmi Assigned or inferred UMI to use in the molecular index tag
 * @param molecularIdentifierTag SAM tag to use as molecular identifier, if null no modification to the record will be made
 * @param duplexUmis Treat UMI as duplex, if true /A and /B will be added to denote top and bottom strands respectively
 */
static void setMolecularIdentifier(final SAMRecord rec, final String assignedUmi, final String molecularIdentifierTag, final boolean duplexUmis) {

    if (molecularIdentifierTag == null) {
        return;
    }

    final StringBuilder molecularIdentifier = new StringBuilder();
    molecularIdentifier.append(rec.getContig());
    molecularIdentifier.append(CONTIG_SEPARATOR);
    molecularIdentifier.append(rec.getReadNegativeStrandFlag() ? rec.getAlignmentStart() : rec.getMateAlignmentStart());
    molecularIdentifier.append(UMI_NAME_SEPARATOR);
    molecularIdentifier.append(assignedUmi);

    if (duplexUmis) {
        // Reads whose strand position can be determined will have their
        // molecularIdentifier set to include an identifier appended that
        // indicates top or bottom strand.
        ReadStrand strand = getStrand(rec);
        switch (strand) {
            case TOP:
                molecularIdentifier.append(TOP_STRAND_DUPLEX);
                break;
            case BOTTOM:
                molecularIdentifier.append(BOTTOM_STRAND_DUPLEX);
                break;
            default:
                // If we can't determine strand position nothing
                // is appended to the molecularIdentifier.
                break;
        }
    }
    rec.setAttribute(molecularIdentifierTag, molecularIdentifier.toString());
}

Source File: DetectBeadSynthesisErrors.java From Drop-seq with MIT License

5 votes

/**
 * @return null if the read should not be included in the output BAM.
 */
SAMRecord padCellBarcodeFix (final SAMRecord r, final Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions, final Map<String, String> intendedSequenceMap, final String cellBarcodeTag, final String molecularBarcodeTag, final double extremeBaseRatio) {
	String cellBC=r.getStringAttribute(cellBarcodeTag);

	BeadSynthesisErrorData bsed = errorBarcodesWithPositions.get(cellBC);
	if (bsed==null) return (r); // no correction data, no fix.

	// we're only going to fix cells where there's one or more synthesis errors
	BeadSynthesisErrorType bset = bsed.getErrorType(extremeBaseRatio, this.detectPrimerTool, this.EDIT_DISTANCE);
	if (bset==BeadSynthesisErrorType.NO_ERROR) return (r); // no error, return.
	// has an error, not a synthesis error...
	if (bset!=BeadSynthesisErrorType.SYNTH_MISSING_BASE)
		return (null);

	// has a synthesis error
	int polyTErrorPosition = bsed.getPolyTErrorPosition(this.EXTREME_BASE_RATIO);
	int umiLength = bsed.getBaseLength();
	int numErrors= umiLength-polyTErrorPosition+1;
	// if there are too many errors, or the errors aren't all polyT, return null.
	if (numErrors > MAX_NUM_ERRORS)
		return null;

	// apply the fix and return the fixed read.
	String umi = r.getStringAttribute(molecularBarcodeTag);
	String cellBCFixed = padCellBarcode(cellBC, polyTErrorPosition, umiLength);
	String umiFixed = fixUMI(cellBC, umi, polyTErrorPosition);

	// if there's an intended sequence, use that instead of the default padded cell barcode.
	String intendedSeq = intendedSequenceMap.get(cellBC);
	if (intendedSeq!=null)
		cellBCFixed=intendedSeq;

	r.setAttribute(cellBarcodeTag, cellBCFixed);
	r.setAttribute(molecularBarcodeTag, umiFixed);
	return r;
}

Source File: DefaultTaggingIterator.java From Drop-seq with MIT License

5 votes

@Override
public SAMRecord next() {
	SAMRecord r= this.underlyingIterator.next();
	String v = r.getStringAttribute(collapseTag);
	if (v!=null)
		r.setAttribute(outTag, v);
	return (r);
}

Source File: UmiGraph.java From picard with MIT License

5 votes

/**
 * Creates a UmiGraph object
 * @param set Set of reads that have the same start-stop positions, these will be broken up by UMI
 * @param umiTag UMI tag used in the SAM/BAM/CRAM file ie. RX
 * @param molecularIdentifierTag  Molecular identifier tag used in the SAM/BAM/CRAM file ie. MI
 * @param allowMissingUmis Allow for missing UMIs
 * @param duplexUmis Whether or not to use duplex of single strand UMIs
 */
UmiGraph(final DuplicateSet set, final String umiTag, final String molecularIdentifierTag,
                final boolean allowMissingUmis, final boolean duplexUmis) {
    this.umiTag = umiTag;
    this.molecularIdentifierTag = molecularIdentifierTag;
    this.allowMissingUmis = allowMissingUmis;
    this.duplexUmis = duplexUmis;
    records = set.getRecords();

    // First ensure that all the reads have a UMI, if any reads are missing a UMI throw an exception unless allowMissingUmis is true
    for (final SAMRecord rec : records) {
        if (rec.getStringAttribute(umiTag) == null) {
            if (!allowMissingUmis) {
                throw new PicardException("Read " + rec.getReadName() + " does not contain a UMI with the " + umiTag + " attribute.");
            }
            rec.setAttribute(umiTag, "");
        }
    }

    // Count the number of times each UMI occurs
    umiCounts = records.stream().collect(Collectors.groupingBy(p -> UmiUtil.getTopStrandNormalizedUmi(p, umiTag, duplexUmis), counting()));

    // At first we consider every UMI as if it were its own duplicate set
    numUmis = umiCounts.size();
    umi = new String[numUmis];
    duplicateSetID = IntStream.rangeClosed(0, numUmis-1).toArray();

    int i = 0;
    for (final String key : umiCounts.keySet()) {
        umi[i] = key;
        i++;
    }
}

Source File: DefaultSamFilterTest.java From rtg-tools with BSD 2-Clause "Simplified" License

5 votes

public void testVariantInvalids() {
  final SamFilterParams.SamFilterParamsBuilder builder = SamFilterParams.builder().excludeVariantInvalid(true);
  final DefaultSamFilter f = new DefaultSamFilter(builder.create());
  final DefaultSamFilter notf = new DefaultSamFilter(builder.invertFilters(true).create());
  final SAMRecord rec = new SAMRecord(new SAMFileHeader()); // Not unmapped but alignment position == 0
  rec.setAlignmentStart(0);
  assertFalse(f.acceptRecord(rec));
  assertFalse(f.acceptRecord(rec) == notf.acceptRecord(rec));
  rec.setAlignmentStart(1);
  assertTrue(f.acceptRecord(rec));
  assertFalse(f.acceptRecord(rec) == notf.acceptRecord(rec));
  rec.setAttribute("NH", 0);
  assertFalse(f.acceptRecord(rec));
  assertFalse(f.acceptRecord(rec) == notf.acceptRecord(rec));
}

Source File: PreprocessingTools.java From halvade with GNU General Public License v3.0

5 votes

public int streamElPrep(Reducer.Context context, String output, String rg, 
            int threads, SAMRecordIterator SAMit, 
            SAMFileHeader header, String dictFile, boolean updateRG, boolean keepDups, String RGID) throws InterruptedException, IOException, QualityException {
        long startTime = System.currentTimeMillis();
        String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "elprep", "");  
        String[] command = CommandGenerator.elPrep(bin, "/dev/stdin", output, threads, true, rg, null, !keepDups, customArgs);
//        runProcessAndWait(command);
        ProcessBuilderWrapper builder = new ProcessBuilderWrapper(command, null);
        builder.startProcess(true);        
        BufferedWriter localWriter = builder.getSTDINWriter();
        
        // write header
        final StringWriter headerTextBuffer = new StringWriter();
        new SAMTextHeaderCodec().encode(headerTextBuffer, header);
        final String headerText = headerTextBuffer.toString();
        localWriter.write(headerText, 0, headerText.length());
        
        
        SAMRecord sam;
        int reads = 0;
        while(SAMit.hasNext()) {
            sam = SAMit.next();
            if(updateRG)
                sam.setAttribute(SAMTag.RG.name(), RGID);
            String samString = sam.getSAMString();
            localWriter.write(samString, 0, samString.length());
            reads++;
        }
        localWriter.flush();
        localWriter.close();
                
        int error = builder.waitForCompletion();
        if(error != 0)
            throw new ProcessException("elPrep", error);
        long estimatedTime = System.currentTimeMillis() - startTime;
        Logger.DEBUG("estimated time: " + estimatedTime / 1000);
        if(context != null)
            context.getCounter(HalvadeCounters.TIME_ELPREP).increment(estimatedTime);
        return reads;
    }

Source File: UmiUtilTest.java From picard with MIT License

5 votes

@Test(dataProvider = "brokenUmiProvider", expectedExceptions = PicardException.class)
public void testBrokenUmi(final String brokenUmi) {
    SAMFileHeader header = new SAMFileHeader();
    SAMRecord rec = new SAMRecord(header);

    rec.setAttribute("RX", brokenUmi);

    // This should throw an exception due to a broken UMI in rec
    UmiUtil.getTopStrandNormalizedUmi(rec, "RX", true);
}

Source File: ReorderSam.java From picard with MIT License

4 votes

/**
 * Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way
 * according to the newOrder mapping from dictionary index -> index.  Name is used for printing only.
 */
private void writeReads(final SAMFileWriter out,
                        final SAMRecordIterator it,
                        final Map<Integer, Integer> newOrder,
                        final String name) {
    long counter = 0;
    log.info("  Processing " + name);

    while (it.hasNext()) {
        counter++;
        final SAMRecord read = it.next();
        final int oldRefIndex = read.getReferenceIndex();
        final int oldMateIndex = read.getMateReferenceIndex();
        final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder);

        read.setHeader(out.getFileHeader());
        read.setReferenceIndex(newRefIndex);

        // read becoming unmapped
        if (oldRefIndex != NO_ALIGNMENT_REFERENCE_INDEX &&
                newRefIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
            read.setAlignmentStart(NO_ALIGNMENT_START);
            read.setReadUnmappedFlag(true);
            read.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
            read.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
        }

        final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder);
        if (oldMateIndex != NO_ALIGNMENT_REFERENCE_INDEX &&
                newMateIndex == NO_ALIGNMENT_REFERENCE_INDEX) { // mate becoming unmapped
            read.setMateAlignmentStart(NO_ALIGNMENT_START);
            read.setMateUnmappedFlag(true);
            read.setAttribute(SAMTag.MC.name(), null);      // Set the Mate Cigar String to null
        }
        read.setMateReferenceIndex(newMateIndex);

        out.addAlignment(read);
    }

    it.close();
    log.info("Wrote " + counter + " reads");
}

Source File: BamTagCountingIteratorTest.java From Drop-seq with MIT License

4 votes

@Test
public void testFilter() {
	Iterator<SAMRecord> underlyingIterator = Collections.emptyIterator();
	String tagName = "XC";
	// filter always returns false.
	BamTagCountingIterator f = new BamTagCountingIterator(underlyingIterator, tagName);


	SAMRecordSetBuilder b = new SAMRecordSetBuilder();
	// second argument is the contig index which is 0 based.  So contig index=0 -> chr1.  index=2 -> chr3, etc.
	b.addFrag("1", 0, 1, false);
	SAMRecord r1 = b.getRecords().iterator().next();
	r1.setAttribute(tagName, "A");
	// this one counts.
	Assert.assertFalse(f.filterOut(r1));

	// wrong tag set, correct tag is not set.
	r1.setAttribute("XZ", "A");
	r1.setAttribute(tagName, null);
	Assert.assertFalse(f.filterOut(r1));

	r1.setAttribute(tagName, "A");
	// this one counts.
	Assert.assertFalse(f.filterOut(r1));

	r1.setAttribute(tagName, "B");
	// this one counts.
	Assert.assertFalse(f.filterOut(r1));

	r1.setAttribute(tagName, "C");
	// this one counts.
	Assert.assertFalse(f.filterOut(r1));

	ObjectCounter<String> expected = f.getCounts();
	expected.incrementByCount("A", 2);
	expected.incrementByCount("B", 1);
	expected.incrementByCount("C", 1);
	ObjectCounter<String> actual = f.getCounts();

	Assert.assertEquals(actual, expected);



}

Source File: FilterBamByTagTest.java From Drop-seq with MIT License

4 votes

@Test
public void filterReadTest() {
	SAMRecord readHasAttribute = new SAMRecord(null);
	String tag = "XT";
	readHasAttribute.setAttribute(tag, "1");

	Set<String> values = new HashSet<>();
	values.add("1");

	SAMRecord readNoAttribute = new SAMRecord(null);

	FilterBamByTag t = new FilterBamByTag();
	// read has attribute, accept any value, want to retain read.
	boolean flag1 = t.filterRead(readHasAttribute, tag, null, true, null, false);
	Assert.assertFalse(flag1);

	// read has attribute, accept any value, want to filter read.
	boolean flag2 = t.filterRead(readHasAttribute, tag, null, false, null, false);
	Assert.assertTrue(flag2);

	// read has attribute, accept certain value, want to retain read.
	boolean flag3 = t.filterRead(readHasAttribute, tag, values, true, null, false);
	Assert.assertFalse(flag3);

	// read has attribute, accept certain value, want to filter read.
	boolean flag4 = t.filterRead(readHasAttribute, tag, values, false, null, false);
	Assert.assertTrue(flag4);

	// read does not have attribute, accept any value, want to retain read.
	boolean flag5 = t.filterRead(readNoAttribute, tag, null, true, null, false);
	Assert.assertTrue(flag5);

	// read does not have attribute, accept any value, want to filter read.
	boolean flag6 = t.filterRead(readNoAttribute, tag, null, false, null, false);
	Assert.assertFalse(flag6);

	// read does not have attribute, accept certain value, want to retain read.
	boolean flag7 = t.filterRead(readNoAttribute, tag, values, true, null, false);
	Assert.assertTrue(flag7);

	// read does not have attribute, accept certain value, want to filter read.
	boolean flag8 = t.filterRead(readNoAttribute, tag, values, false, null, false);
	Assert.assertFalse(flag8);
	
	// test map quality filtering
	
	readHasAttribute.setMappingQuality(10);
	boolean flag9 = t.filterRead(readHasAttribute, tag, null, true, 10, false);
	Assert.assertFalse(flag9);
	boolean flag10 = t.filterRead(readHasAttribute, tag, null, true, 20, false);
	Assert.assertTrue(flag10);
	
	// want to test partial matching.
	SAMRecord readHasGene = new SAMRecord(null);
	String geneNameTag = "gn";
	readHasGene.setAttribute(geneNameTag, "A,B");

	Set<String> geneValues = new HashSet<> (Arrays.asList("A"));
	boolean flagExact =t.filterRead(readHasGene, geneNameTag, geneValues, true, 0, false);
	boolean flagPartial =t.filterRead(readHasGene, geneNameTag, geneValues, true, 0, true);
	// filter the exact match
	Assert.assertTrue(flagExact);
	// don't filter the partial match
	Assert.assertFalse(flagPartial);
	
}

Source File: Evidence2SAM.java From cramtools with Apache License 2.0

4 votes

public static void main(String[] args) throws IOException, ParseException {
	EvidenceRecordFileIterator iterator = new EvidenceRecordFileIterator(new File(args[0]));
	Read context = new Read();
	SAMFileHeader header = new SAMFileHeader();
	header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
	SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr10", 135534747);
	samSequenceRecord.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, iterator.assembly_ID);
	String readGroup = String.format("%s-%s", iterator.assembly_ID, iterator.chromosome);
	SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG, iterator.sample);
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_UNIT_TAG, readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.SEQUENCING_CENTER_TAG, "\"Complete Genomics\"");
	Date date = new SimpleDateFormat("yyyy-MMM-dd hh:mm:ss.S").parse(iterator.generatedAt);
	readGroupRecord.setAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG,
			new SimpleDateFormat("yyyy-MM-dd").format(date));
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_TAG, "\"Complete Genomics\"");
	header.addReadGroup(readGroupRecord);

	header.addSequence(samSequenceRecord);

	SAMFileWriterFactory f = new SAMFileWriterFactory();
	SAMFileWriter samWriter;
	if (args.length > 1)
		samWriter = f.makeBAMWriter(header, false, new File(args[1]));
	else
		samWriter = f.makeSAMWriter(header, false, System.out);
	int i = 0;
	long time = System.currentTimeMillis();
	DedupIterator dedupIt = new DedupIterator(iterator);

	while (dedupIt.hasNext()) {
		EvidenceRecord evidenceRecord = dedupIt.next();
		if (evidenceRecord == null)
			throw new RuntimeException();
		try {
			context.reset(evidenceRecord);
			context.parse();
		} catch (Exception e) {
			System.err.println("Failed on line:");
			System.err.println(evidenceRecord.line);
			throw new RuntimeException(e);
		}

		SAMRecord[] samRecords = context.toSAMRecord(header);
		for (SAMRecord samRecord : samRecords) {
			samRecord.setAttribute(SAMTag.RG.name(), readGroup);
			samWriter.addAlignment(samRecord);
		}

		i++;
		if (i % 1000 == 0) {
			if (System.currentTimeMillis() - time > 10 * 1000) {
				time = System.currentTimeMillis();
				System.err.println(i);
			}
		}

		if (i > 10000)
			break;
	}
	samWriter.close();
}

Source File: AlignmentsTags.java From cramtools with Apache License 2.0

4 votes

/**
 * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags}
 * but allows for reference excerpt instead of a full reference sequence.
 * 
 * @param record
 *            SAMRecord to inject NM and MD tags into
 * @param ref
 *            reference sequence bases, may be a subsequence starting at
 *            refOffest
 * @param refOffset
 *            array offset of the reference bases, 0 is the same as the
 *            whole sequence. This value will be subtracted from the
 *            reference array index in calculations.
 * @param calcMD
 *            calculate MD tag if true
 * @param calcNM
 *            calculate NM tag if true
 */
public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset,
		final boolean calcMD, final boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	final Cigar cigar = record.getCigar();
	final List<CigarElement> cigarElements = cigar.getCigarElements();
	final byte[] seq = record.getReadBases();
	final int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	final StringBuilder str = new StringBuilder();

	final int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		final CigarElement ce = cigarElements.get(i);
		int j;
		final int length = ce.getLength();
		final CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < length; ++j) {
				final int z = y + j;

				if (refOffset + ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j - refOffset];

				if ((c1 == c2) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j - refOffset]);
					u = 0;
					++nm;
				}
			}
			if (j < length)
				break;
			x += length;
			y += length;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < length; ++j) {
				if (ref[x + j - refOffset] == 0)
					break;
				str.appendCodePoint(ref[x + j - refOffset]);
			}
			u = 0;
			if (j < length)
				break;
			x += length;
			nm += length;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += length;
			if (op == CigarOperator.INSERTION)
				nm += length;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += length;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}

Java Code Examples for htsjdk.samtools.SAMRecord#setAttribute()