Java Code Examples for htsjdk.samtools.SAMRecord#setAttribute()
The following examples show how to use
htsjdk.samtools.SAMRecord#setAttribute() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IntervalTagComparatorTest.java From Drop-seq with MIT License | 6 votes |
private List<SAMRecord> getRecords () { SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); // chromosome 2 and chromosome 10 SAMRecord r1 = builder.addFrag("read1", 1, 1, false); Interval i1 = new Interval("2", 1, 10, true, null); r1.setAttribute(this.intervalTag, IntervalTagComparator.toString(i1)); SAMRecord r2 = builder.addFrag("read1", 1, 1, false); Interval i2 = new Interval("10", 1, 10, true, null); r2.setAttribute(this.intervalTag, IntervalTagComparator.toString(i2)); List<SAMRecord> result = new ArrayList<>(); result.add(r1); result.add(r2); return result; }
Example 2
Source File: TagReadWithInterval.java From Drop-seq with MIT License | 6 votes |
private SAMRecord tagRead(final SAMRecord r, final OverlapDetector<Interval> od) { Set<Interval>intervals = new HashSet<>(); List<AlignmentBlock> blocks = r.getAlignmentBlocks(); for (AlignmentBlock b: blocks) { int refStart =b.getReferenceStart(); int refEnd = refStart+b.getLength()-1; Interval v = new Interval(r.getReferenceName(), refStart, refEnd); Collection<Interval> blockResult = od.getOverlaps(v); intervals.addAll(blockResult); } String tagName = getIntervalName(intervals); if (tagName!=null) r.setAttribute(this.TAG, tagName); else r.setAttribute(this.TAG, null); return (r); }
Example 3
Source File: SNPUMICellReadIteratorWrapper.java From Drop-seq with MIT License | 6 votes |
/** * Check if a read overlaps any SNPs in the OverlapDetector. Tag reads with SNPs. * If more than 1 SNP tags a read, make a read for each SNP. * Simplified since data goes through GeneFunctionIteratorWrapper to take care of how reads/genes interact. */ private void processSNP (final SAMRecord r) { List<AlignmentBlock> blocks = r.getAlignmentBlocks(); Collection<String> snps = new HashSet<>(); for (AlignmentBlock b: blocks) { int start = b.getReferenceStart(); int end = start + b.getLength() -1; Interval i = new Interval(r.getReferenceName(), start, end); Collection<Interval> overlaps = this.snpIntervals.getOverlaps(i); for (Interval o: overlaps) snps.add(IntervalTagComparator.toString(o)); } // 1 read per SNP. for (String snp:snps) { SAMRecord rr = Utils.getClone(r); rr.setAttribute(this.snpTag, snp); queueRecordForOutput(rr); } }
Example 4
Source File: MarkDuplicatesTest.java From picard with MIT License | 6 votes |
@Test public void testWithBarcodeComplex() { final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester(); tester.getSamRecordSetBuilder().setReadLength(68); final String readNameOne = "RUNID:1:1:15993:13361"; final String readNameTwo = "RUNID:2:2:15993:13362"; final String readNameThree = "RUNID:3:3:15993:13362"; // first two reads have the same barcode, third read has a different barcode tester.addMatePair(readNameOne, 2, 41212324, 41212310, false, false, false, false, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY); tester.addMatePair(readNameTwo, 2, 41212324, 41212310, false, false, true, true, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY); // same barcode as the first tester.addMatePair(readNameThree, 2, 41212324, 41212310, false, false, false, false, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY); final String barcodeTag = "BC"; for (final SAMRecord record : new IterableAdapter<>(tester.getRecordIterator())) { if (record.getReadName().equals(readNameOne) || record.getReadName().equals(readNameTwo)) { record.setAttribute(barcodeTag, "AAAA"); } else if (record.getReadName().equals(readNameThree)) { record.setAttribute(barcodeTag, "CCCC"); } } tester.addArg("BARCODE_TAG=" + barcodeTag); tester.runTest(); }
Example 5
Source File: CollapseTagWithContext.java From Drop-seq with MIT License | 6 votes |
private void retagBarcodedReads (Iterator<SAMRecord> informativeRecs, ObjectCounter<String> barcodeCounts, Map<String, String> collapseMap, boolean dropSmallCounts, SAMFileWriter writer, String collapseTag, String outTag) { Set<String> expectedBarcodes = null; // already validated that if dropSmallCounts is true, then the minNumObservations > 1. if (dropSmallCounts) // use all the remaining barcodes that have counts. expectedBarcodes = new HashSet<>(barcodeCounts.getKeys()); while (informativeRecs.hasNext()) { SAMRecord r = informativeRecs.next(); String tagValue = r.getStringAttribute(collapseTag); // if the tag was not set, then don't set it. if (tagValue!=null) { // tag was set, // if the tagValue is not in the expected barocde list and the barcode list is populated, then don't add this read and short circuit to next read. if (expectedBarcodes!=null && !expectedBarcodes.contains(tagValue)) continue; // tag was set, set it. // is it in the map? If so, update the tag value. if (collapseMap.containsKey(tagValue)) tagValue = collapseMap.get(tagValue); r.setAttribute(outTag, tagValue); } writer.addAlignment(r); } }
Example 6
Source File: CellBarcodeFilteringIteratorTest.java From Drop-seq with MIT License | 6 votes |
@Test public void filterOut2() { Iterator<SAMRecord> underlyingIterator = Collections.emptyIterator(); Collection<String> cellBarcodesToKeep = null; String cellBCTag ="XC"; CellBarcodeFilteringIterator f = new CellBarcodeFilteringIterator(underlyingIterator, cellBCTag, cellBarcodesToKeep); SAMRecordSetBuilder b = new SAMRecordSetBuilder(); // second argument is the contig index which is 0 based. So contig index=0 -> chr1. index=2 -> chr3, etc. b.addFrag("1", 0, 1, false); SAMRecord r1 = b.getRecords().iterator().next(); r1.setAttribute(cellBCTag, "CELL1"); Assert.assertFalse(f.filterOut(r1)); r1.setAttribute(cellBCTag, "CELL2"); Assert.assertFalse(f.filterOut(r1)); }
Example 7
Source File: Read.java From cramtools with Apache License 2.0 | 6 votes |
SAMRecord secondSAMRecord(SAMFileHeader header) { SAMRecord r = new SAMRecord(header); r.setReadName(evidenceRecord.getReadName()); r.setReferenceName(evidenceRecord.Chromosome); r.setAlignmentStart(Integer.valueOf(evidenceRecord.MateOffsetInReference) + 1); r.setMappingQuality(Integer.valueOf(evidenceRecord.ScoreAllele0)); r.setReadPairedFlag(true); r.setReadUnmappedFlag(false); r.setReadNegativeStrandFlag(negative); r.setFirstOfPairFlag(evidenceRecord.side == 1); r.setSecondOfPairFlag(!r.getFirstOfPairFlag()); r.setCigar(new Cigar(Utils.toCigarOperatorList(secondHalf.samCigarElements))); r.setReadBases(Utils.toByteArray(secondHalf.readBasesBuf)); r.setBaseQualities(Utils.toByteArray(secondHalf.readScoresBuf)); r.setAttribute("GC", Utils.toString(secondHalf.gcList)); r.setAttribute("GS", Utils.toString(secondHalf.gsBuf)); r.setAttribute("GQ", SAMUtils.phredToFastq(Utils.toByteArray(secondHalf.gqBuf))); return r; }
Example 8
Source File: OrientationTest.java From rtg-tools with BSD 2-Clause "Simplified" License | 5 votes |
public static SAMRecord makeTestRecord(SAMFileHeader sfh, int flags, String sequence, String mateSequence, int pos, int matePos, String readGroup) { final SAMRecord sr = new SAMRecord(sfh); sr.setFlags(flags); sr.setReferenceName(sequence); sr.setMateReferenceName(mateSequence); sr.setAlignmentStart(pos); sr.setMateAlignmentStart(matePos); sr.setAttribute("RG", readGroup); if (pos < matePos) { sr.setInferredInsertSize(matePos - pos + 5); } else { sr.setInferredInsertSize(matePos - pos - 5); //-1 * (pos - matePos + 5)); } return sr; }
Example 9
Source File: UmiUtil.java From picard with MIT License | 5 votes |
/** * Set molecular identifier tag of record using UMI and alignment start position along with top and bottom strand information * @param rec SAMRecord to set molecular index of * @param assignedUmi Assigned or inferred UMI to use in the molecular index tag * @param molecularIdentifierTag SAM tag to use as molecular identifier, if null no modification to the record will be made * @param duplexUmis Treat UMI as duplex, if true /A and /B will be added to denote top and bottom strands respectively */ static void setMolecularIdentifier(final SAMRecord rec, final String assignedUmi, final String molecularIdentifierTag, final boolean duplexUmis) { if (molecularIdentifierTag == null) { return; } final StringBuilder molecularIdentifier = new StringBuilder(); molecularIdentifier.append(rec.getContig()); molecularIdentifier.append(CONTIG_SEPARATOR); molecularIdentifier.append(rec.getReadNegativeStrandFlag() ? rec.getAlignmentStart() : rec.getMateAlignmentStart()); molecularIdentifier.append(UMI_NAME_SEPARATOR); molecularIdentifier.append(assignedUmi); if (duplexUmis) { // Reads whose strand position can be determined will have their // molecularIdentifier set to include an identifier appended that // indicates top or bottom strand. ReadStrand strand = getStrand(rec); switch (strand) { case TOP: molecularIdentifier.append(TOP_STRAND_DUPLEX); break; case BOTTOM: molecularIdentifier.append(BOTTOM_STRAND_DUPLEX); break; default: // If we can't determine strand position nothing // is appended to the molecularIdentifier. break; } } rec.setAttribute(molecularIdentifierTag, molecularIdentifier.toString()); }
Example 10
Source File: DetectBeadSynthesisErrors.java From Drop-seq with MIT License | 5 votes |
/** * @return null if the read should not be included in the output BAM. */ SAMRecord padCellBarcodeFix (final SAMRecord r, final Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions, final Map<String, String> intendedSequenceMap, final String cellBarcodeTag, final String molecularBarcodeTag, final double extremeBaseRatio) { String cellBC=r.getStringAttribute(cellBarcodeTag); BeadSynthesisErrorData bsed = errorBarcodesWithPositions.get(cellBC); if (bsed==null) return (r); // no correction data, no fix. // we're only going to fix cells where there's one or more synthesis errors BeadSynthesisErrorType bset = bsed.getErrorType(extremeBaseRatio, this.detectPrimerTool, this.EDIT_DISTANCE); if (bset==BeadSynthesisErrorType.NO_ERROR) return (r); // no error, return. // has an error, not a synthesis error... if (bset!=BeadSynthesisErrorType.SYNTH_MISSING_BASE) return (null); // has a synthesis error int polyTErrorPosition = bsed.getPolyTErrorPosition(this.EXTREME_BASE_RATIO); int umiLength = bsed.getBaseLength(); int numErrors= umiLength-polyTErrorPosition+1; // if there are too many errors, or the errors aren't all polyT, return null. if (numErrors > MAX_NUM_ERRORS) return null; // apply the fix and return the fixed read. String umi = r.getStringAttribute(molecularBarcodeTag); String cellBCFixed = padCellBarcode(cellBC, polyTErrorPosition, umiLength); String umiFixed = fixUMI(cellBC, umi, polyTErrorPosition); // if there's an intended sequence, use that instead of the default padded cell barcode. String intendedSeq = intendedSequenceMap.get(cellBC); if (intendedSeq!=null) cellBCFixed=intendedSeq; r.setAttribute(cellBarcodeTag, cellBCFixed); r.setAttribute(molecularBarcodeTag, umiFixed); return r; }
Example 11
Source File: DefaultTaggingIterator.java From Drop-seq with MIT License | 5 votes |
@Override public SAMRecord next() { SAMRecord r= this.underlyingIterator.next(); String v = r.getStringAttribute(collapseTag); if (v!=null) r.setAttribute(outTag, v); return (r); }
Example 12
Source File: UmiGraph.java From picard with MIT License | 5 votes |
/** * Creates a UmiGraph object * @param set Set of reads that have the same start-stop positions, these will be broken up by UMI * @param umiTag UMI tag used in the SAM/BAM/CRAM file ie. RX * @param molecularIdentifierTag Molecular identifier tag used in the SAM/BAM/CRAM file ie. MI * @param allowMissingUmis Allow for missing UMIs * @param duplexUmis Whether or not to use duplex of single strand UMIs */ UmiGraph(final DuplicateSet set, final String umiTag, final String molecularIdentifierTag, final boolean allowMissingUmis, final boolean duplexUmis) { this.umiTag = umiTag; this.molecularIdentifierTag = molecularIdentifierTag; this.allowMissingUmis = allowMissingUmis; this.duplexUmis = duplexUmis; records = set.getRecords(); // First ensure that all the reads have a UMI, if any reads are missing a UMI throw an exception unless allowMissingUmis is true for (final SAMRecord rec : records) { if (rec.getStringAttribute(umiTag) == null) { if (!allowMissingUmis) { throw new PicardException("Read " + rec.getReadName() + " does not contain a UMI with the " + umiTag + " attribute."); } rec.setAttribute(umiTag, ""); } } // Count the number of times each UMI occurs umiCounts = records.stream().collect(Collectors.groupingBy(p -> UmiUtil.getTopStrandNormalizedUmi(p, umiTag, duplexUmis), counting())); // At first we consider every UMI as if it were its own duplicate set numUmis = umiCounts.size(); umi = new String[numUmis]; duplicateSetID = IntStream.rangeClosed(0, numUmis-1).toArray(); int i = 0; for (final String key : umiCounts.keySet()) { umi[i] = key; i++; } }
Example 13
Source File: DefaultSamFilterTest.java From rtg-tools with BSD 2-Clause "Simplified" License | 5 votes |
public void testVariantInvalids() { final SamFilterParams.SamFilterParamsBuilder builder = SamFilterParams.builder().excludeVariantInvalid(true); final DefaultSamFilter f = new DefaultSamFilter(builder.create()); final DefaultSamFilter notf = new DefaultSamFilter(builder.invertFilters(true).create()); final SAMRecord rec = new SAMRecord(new SAMFileHeader()); // Not unmapped but alignment position == 0 rec.setAlignmentStart(0); assertFalse(f.acceptRecord(rec)); assertFalse(f.acceptRecord(rec) == notf.acceptRecord(rec)); rec.setAlignmentStart(1); assertTrue(f.acceptRecord(rec)); assertFalse(f.acceptRecord(rec) == notf.acceptRecord(rec)); rec.setAttribute("NH", 0); assertFalse(f.acceptRecord(rec)); assertFalse(f.acceptRecord(rec) == notf.acceptRecord(rec)); }
Example 14
Source File: PreprocessingTools.java From halvade with GNU General Public License v3.0 | 5 votes |
public int streamElPrep(Reducer.Context context, String output, String rg, int threads, SAMRecordIterator SAMit, SAMFileHeader header, String dictFile, boolean updateRG, boolean keepDups, String RGID) throws InterruptedException, IOException, QualityException { long startTime = System.currentTimeMillis(); String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "elprep", ""); String[] command = CommandGenerator.elPrep(bin, "/dev/stdin", output, threads, true, rg, null, !keepDups, customArgs); // runProcessAndWait(command); ProcessBuilderWrapper builder = new ProcessBuilderWrapper(command, null); builder.startProcess(true); BufferedWriter localWriter = builder.getSTDINWriter(); // write header final StringWriter headerTextBuffer = new StringWriter(); new SAMTextHeaderCodec().encode(headerTextBuffer, header); final String headerText = headerTextBuffer.toString(); localWriter.write(headerText, 0, headerText.length()); SAMRecord sam; int reads = 0; while(SAMit.hasNext()) { sam = SAMit.next(); if(updateRG) sam.setAttribute(SAMTag.RG.name(), RGID); String samString = sam.getSAMString(); localWriter.write(samString, 0, samString.length()); reads++; } localWriter.flush(); localWriter.close(); int error = builder.waitForCompletion(); if(error != 0) throw new ProcessException("elPrep", error); long estimatedTime = System.currentTimeMillis() - startTime; Logger.DEBUG("estimated time: " + estimatedTime / 1000); if(context != null) context.getCounter(HalvadeCounters.TIME_ELPREP).increment(estimatedTime); return reads; }
Example 15
Source File: UmiUtilTest.java From picard with MIT License | 5 votes |
@Test(dataProvider = "brokenUmiProvider", expectedExceptions = PicardException.class) public void testBrokenUmi(final String brokenUmi) { SAMFileHeader header = new SAMFileHeader(); SAMRecord rec = new SAMRecord(header); rec.setAttribute("RX", brokenUmi); // This should throw an exception due to a broken UMI in rec UmiUtil.getTopStrandNormalizedUmi(rec, "RX", true); }
Example 16
Source File: ReorderSam.java From picard with MIT License | 4 votes |
/** * Helper function that writes reads from iterator it into writer out, updating each SAMRecord along the way * according to the newOrder mapping from dictionary index -> index. Name is used for printing only. */ private void writeReads(final SAMFileWriter out, final SAMRecordIterator it, final Map<Integer, Integer> newOrder, final String name) { long counter = 0; log.info(" Processing " + name); while (it.hasNext()) { counter++; final SAMRecord read = it.next(); final int oldRefIndex = read.getReferenceIndex(); final int oldMateIndex = read.getMateReferenceIndex(); final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder); read.setHeader(out.getFileHeader()); read.setReferenceIndex(newRefIndex); // read becoming unmapped if (oldRefIndex != NO_ALIGNMENT_REFERENCE_INDEX && newRefIndex == NO_ALIGNMENT_REFERENCE_INDEX) { read.setAlignmentStart(NO_ALIGNMENT_START); read.setReadUnmappedFlag(true); read.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR); read.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY); } final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder); if (oldMateIndex != NO_ALIGNMENT_REFERENCE_INDEX && newMateIndex == NO_ALIGNMENT_REFERENCE_INDEX) { // mate becoming unmapped read.setMateAlignmentStart(NO_ALIGNMENT_START); read.setMateUnmappedFlag(true); read.setAttribute(SAMTag.MC.name(), null); // Set the Mate Cigar String to null } read.setMateReferenceIndex(newMateIndex); out.addAlignment(read); } it.close(); log.info("Wrote " + counter + " reads"); }
Example 17
Source File: BamTagCountingIteratorTest.java From Drop-seq with MIT License | 4 votes |
@Test public void testFilter() { Iterator<SAMRecord> underlyingIterator = Collections.emptyIterator(); String tagName = "XC"; // filter always returns false. BamTagCountingIterator f = new BamTagCountingIterator(underlyingIterator, tagName); SAMRecordSetBuilder b = new SAMRecordSetBuilder(); // second argument is the contig index which is 0 based. So contig index=0 -> chr1. index=2 -> chr3, etc. b.addFrag("1", 0, 1, false); SAMRecord r1 = b.getRecords().iterator().next(); r1.setAttribute(tagName, "A"); // this one counts. Assert.assertFalse(f.filterOut(r1)); // wrong tag set, correct tag is not set. r1.setAttribute("XZ", "A"); r1.setAttribute(tagName, null); Assert.assertFalse(f.filterOut(r1)); r1.setAttribute(tagName, "A"); // this one counts. Assert.assertFalse(f.filterOut(r1)); r1.setAttribute(tagName, "B"); // this one counts. Assert.assertFalse(f.filterOut(r1)); r1.setAttribute(tagName, "C"); // this one counts. Assert.assertFalse(f.filterOut(r1)); ObjectCounter<String> expected = f.getCounts(); expected.incrementByCount("A", 2); expected.incrementByCount("B", 1); expected.incrementByCount("C", 1); ObjectCounter<String> actual = f.getCounts(); Assert.assertEquals(actual, expected); }
Example 18
Source File: FilterBamByTagTest.java From Drop-seq with MIT License | 4 votes |
@Test public void filterReadTest() { SAMRecord readHasAttribute = new SAMRecord(null); String tag = "XT"; readHasAttribute.setAttribute(tag, "1"); Set<String> values = new HashSet<>(); values.add("1"); SAMRecord readNoAttribute = new SAMRecord(null); FilterBamByTag t = new FilterBamByTag(); // read has attribute, accept any value, want to retain read. boolean flag1 = t.filterRead(readHasAttribute, tag, null, true, null, false); Assert.assertFalse(flag1); // read has attribute, accept any value, want to filter read. boolean flag2 = t.filterRead(readHasAttribute, tag, null, false, null, false); Assert.assertTrue(flag2); // read has attribute, accept certain value, want to retain read. boolean flag3 = t.filterRead(readHasAttribute, tag, values, true, null, false); Assert.assertFalse(flag3); // read has attribute, accept certain value, want to filter read. boolean flag4 = t.filterRead(readHasAttribute, tag, values, false, null, false); Assert.assertTrue(flag4); // read does not have attribute, accept any value, want to retain read. boolean flag5 = t.filterRead(readNoAttribute, tag, null, true, null, false); Assert.assertTrue(flag5); // read does not have attribute, accept any value, want to filter read. boolean flag6 = t.filterRead(readNoAttribute, tag, null, false, null, false); Assert.assertFalse(flag6); // read does not have attribute, accept certain value, want to retain read. boolean flag7 = t.filterRead(readNoAttribute, tag, values, true, null, false); Assert.assertTrue(flag7); // read does not have attribute, accept certain value, want to filter read. boolean flag8 = t.filterRead(readNoAttribute, tag, values, false, null, false); Assert.assertFalse(flag8); // test map quality filtering readHasAttribute.setMappingQuality(10); boolean flag9 = t.filterRead(readHasAttribute, tag, null, true, 10, false); Assert.assertFalse(flag9); boolean flag10 = t.filterRead(readHasAttribute, tag, null, true, 20, false); Assert.assertTrue(flag10); // want to test partial matching. SAMRecord readHasGene = new SAMRecord(null); String geneNameTag = "gn"; readHasGene.setAttribute(geneNameTag, "A,B"); Set<String> geneValues = new HashSet<> (Arrays.asList("A")); boolean flagExact =t.filterRead(readHasGene, geneNameTag, geneValues, true, 0, false); boolean flagPartial =t.filterRead(readHasGene, geneNameTag, geneValues, true, 0, true); // filter the exact match Assert.assertTrue(flagExact); // don't filter the partial match Assert.assertFalse(flagPartial); }
Example 19
Source File: Evidence2SAM.java From cramtools with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException, ParseException { EvidenceRecordFileIterator iterator = new EvidenceRecordFileIterator(new File(args[0])); Read context = new Read(); SAMFileHeader header = new SAMFileHeader(); header.setSortOrder(SAMFileHeader.SortOrder.unsorted); SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr10", 135534747); samSequenceRecord.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, iterator.assembly_ID); String readGroup = String.format("%s-%s", iterator.assembly_ID, iterator.chromosome); SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(readGroup); readGroupRecord.setAttribute(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG, iterator.sample); readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_UNIT_TAG, readGroup); readGroupRecord.setAttribute(SAMReadGroupRecord.SEQUENCING_CENTER_TAG, "\"Complete Genomics\""); Date date = new SimpleDateFormat("yyyy-MMM-dd hh:mm:ss.S").parse(iterator.generatedAt); readGroupRecord.setAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG, new SimpleDateFormat("yyyy-MM-dd").format(date)); readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_TAG, "\"Complete Genomics\""); header.addReadGroup(readGroupRecord); header.addSequence(samSequenceRecord); SAMFileWriterFactory f = new SAMFileWriterFactory(); SAMFileWriter samWriter; if (args.length > 1) samWriter = f.makeBAMWriter(header, false, new File(args[1])); else samWriter = f.makeSAMWriter(header, false, System.out); int i = 0; long time = System.currentTimeMillis(); DedupIterator dedupIt = new DedupIterator(iterator); while (dedupIt.hasNext()) { EvidenceRecord evidenceRecord = dedupIt.next(); if (evidenceRecord == null) throw new RuntimeException(); try { context.reset(evidenceRecord); context.parse(); } catch (Exception e) { System.err.println("Failed on line:"); System.err.println(evidenceRecord.line); throw new RuntimeException(e); } SAMRecord[] samRecords = context.toSAMRecord(header); for (SAMRecord samRecord : samRecords) { samRecord.setAttribute(SAMTag.RG.name(), readGroup); samWriter.addAlignment(samRecord); } i++; if (i % 1000 == 0) { if (System.currentTimeMillis() - time > 10 * 1000) { time = System.currentTimeMillis(); System.err.println(i); } } if (i > 10000) break; } samWriter.close(); }
Example 20
Source File: AlignmentsTags.java From cramtools with Apache License 2.0 | 4 votes |
/** * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags} * but allows for reference excerpt instead of a full reference sequence. * * @param record * SAMRecord to inject NM and MD tags into * @param ref * reference sequence bases, may be a subsequence starting at * refOffest * @param refOffset * array offset of the reference bases, 0 is the same as the * whole sequence. This value will be subtracted from the * reference array index in calculations. * @param calcMD * calculate MD tag if true * @param calcNM * calculate NM tag if true */ public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset, final boolean calcMD, final boolean calcNM) { if (!calcMD && !calcNM) return; final Cigar cigar = record.getCigar(); final List<CigarElement> cigarElements = cigar.getCigarElements(); final byte[] seq = record.getReadBases(); final int start = record.getAlignmentStart() - 1; int i, x, y, u = 0; int nm = 0; final StringBuilder str = new StringBuilder(); final int size = cigarElements.size(); for (i = y = 0, x = start; i < size; ++i) { final CigarElement ce = cigarElements.get(i); int j; final int length = ce.getLength(); final CigarOperator op = ce.getOperator(); if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) { for (j = 0; j < length; ++j) { final int z = y + j; if (refOffset + ref.length <= x + j) break; // out of boundary int c1 = 0; int c2 = 0; // try { c1 = seq[z]; c2 = ref[x + j - refOffset]; if ((c1 == c2) || c1 == 0) { // a match ++u; } else { str.append(u); str.appendCodePoint(ref[x + j - refOffset]); u = 0; ++nm; } } if (j < length) break; x += length; y += length; } else if (op == CigarOperator.DELETION) { str.append(u); str.append('^'); for (j = 0; j < length; ++j) { if (ref[x + j - refOffset] == 0) break; str.appendCodePoint(ref[x + j - refOffset]); } u = 0; if (j < length) break; x += length; nm += length; } else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) { y += length; if (op == CigarOperator.INSERTION) nm += length; } else if (op == CigarOperator.SKIPPED_REGION) { x += length; } } str.append(u); if (calcMD) record.setAttribute(SAMTag.MD.name(), str.toString()); if (calcNM) record.setAttribute(SAMTag.NM.name(), nm); }