Java Code Examples for htsjdk.samtools.SAMRecord#getReadString()
The following examples show how to use
htsjdk.samtools.SAMRecord#getReadString() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseQualityFilter.java From Drop-seq with MIT License | 6 votes |
public int scoreBaseQuality(final SAMRecord barcodedRead) { int numBasesBelowQuality=0; byte [] qual= barcodedRead.getBaseQualities(); char [] seq = barcodedRead.getReadString().toUpperCase().toCharArray(); for (BaseRange b: baseRanges) for (int i=b.getStart()-1; i<b.getEnd(); i++) { if (i> (seq.length-1)) throw new TranscriptomeException("Base [" + Integer.toString(i+1) + "] was requested, but the read isn't long enough ["+ barcodedRead.getReadString()+"]"); byte q = qual[i]; char s = seq[i]; if (q < this.baseQualityThrehsold || s=='N') numBasesBelowQuality++; } this.metric.addFailedBase(numBasesBelowQuality); return (numBasesBelowQuality); }
Example 2
Source File: TrimStartingSequenceTest.java From Drop-seq with MIT License | 5 votes |
@Test public void testTrimStartingSequenceClp() throws IOException { final TrimStartingSequence clp = new TrimStartingSequence(); clp.INPUT = INPUT; clp.OUTPUT = File.createTempFile("TrimStartingSequenceTest.", ".sam"); clp.OUTPUT.deleteOnExit(); clp.OUTPUT_SUMMARY = File.createTempFile("TrimStartingSequenceTest.", ".summary"); clp.OUTPUT_SUMMARY.deleteOnExit(); clp.SEQUENCE = "AAGCAGTGGTATCAACGCAGAGTGAATGGG"; clp.MISMATCHES=0; clp.NUM_BASES=5; Assert.assertEquals(clp.doWork(), 0); // Confirm that the expected stuff is trimmed. final SamReader outputReader = SamReaderFactory.makeDefault().open(clp.OUTPUT); final SAMRecordIterator outputIterator = outputReader.iterator(); final SamReader inputReader = SamReaderFactory.makeDefault().open(clp.INPUT); for (final SAMRecord inputRec: inputReader) { Assert.assertTrue(outputIterator.hasNext()); final SAMRecord outputRec = outputIterator.next(); final String inputBases = inputRec.getReadString(); final String outputBases = outputRec.getReadString(); Assert.assertTrue(inputBases.endsWith(outputBases)); final String trimmedBases = inputBases.substring(0, inputBases.length() - outputBases.length()); Assert.assertTrue(clp.SEQUENCE.endsWith(trimmedBases)); } Assert.assertFalse(outputIterator.hasNext()); CloserUtil.close(Arrays.asList(outputReader, inputReader)); }
Example 3
Source File: ReadRecord.java From hmftools with GNU General Public License v3.0 | 5 votes |
public static ReadRecord from(final SAMRecord record) { ReadRecord read = new ReadRecord( record.getReadName(), record.getReferenceName(), record.getStart(), record.getEnd(), record.getReadString(), record.getCigar(), record.getInferredInsertSize(), record.getFlags(), record.getMateReferenceName(), record.getMateAlignmentStart()); read.setSuppAlignment(record.getStringAttribute(SUPPLEMENTARY_ATTRIBUTE)); return read; }
Example 4
Source File: SAMRecordUtils.java From abra2 with MIT License | 5 votes |
/** * Remove leading or trailing soft clips from the input read. * Does not modify a read entirely comprised of soft clips. */ public static void removeSoftClips(SAMRecord read) { Cigar cigar = read.getCigar(); CigarElement firstElement = cigar.getCigarElement(0); CigarElement lastElement = cigar.getCigarElement(cigar.numCigarElements()-1); if ((firstElement.getOperator() == CigarOperator.S) || (lastElement.getOperator() == CigarOperator.S)) { Cigar newCigar = new Cigar(); String bases = read.getReadString(); //String qualities = read.getBaseQualityString(); if (firstElement.getOperator() == CigarOperator.S) { bases = bases.substring(firstElement.getLength(), bases.length()); //qualities = qualities.substring(firstElement.getLength(), qualities.length()-1); } else { newCigar.add(firstElement); } for (int i=1; i<cigar.numCigarElements()-1; i++) { newCigar.add(cigar.getCigarElement(i)); } if (lastElement.getOperator() == CigarOperator.S) { bases = bases.substring(0, bases.length() - lastElement.getLength()); //qualities = qualities.substring(0, qualities.length() - lastElement.getLength() - 1); } else { newCigar.add(lastElement); } read.setCigar(newCigar); read.setReadString(bases); //read.setBaseQualityString(qualities); } }
Example 5
Source File: SamRecordComparision.java From cramtools with Apache License 2.0 | 5 votes |
public static Object getValue(SAMRecord record, FIELD_TYPE field, String tagId) { if (field == null) throw new IllegalArgumentException("Record field is null."); switch (field) { case QNAME: return record.getReadName(); case FLAG: return Integer.toString(record.getFlags()); case RNAME: return record.getReferenceName(); case POS: return Integer.toString(record.getAlignmentStart()); case MAPQ: return Integer.toString(record.getMappingQuality()); case CIGAR: return record.getCigarString(); case RNEXT: return record.getMateReferenceName(); case PNEXT: return Integer.toString(record.getMateAlignmentStart()); case TLEN: return Integer.toString(record.getInferredInsertSize()); case SEQ: return record.getReadString(); case QUAL: return record.getBaseQualityString(); case TAG: if (tagId == null) throw new IllegalArgumentException("Tag mismatch reqiues tag id. "); return record.getAttribute(tagId); default: throw new IllegalArgumentException("Unknown record field: " + field.name()); } }
Example 6
Source File: SAMRecordField.java From cramtools with Apache License 2.0 | 5 votes |
public Object getValue(SAMRecord record) { switch (type) { case QNAME: return record.getReadName(); case FLAG: return Integer.toString(record.getFlags()); case RNAME: return record.getReferenceName(); case POS: return Integer.toString(record.getAlignmentStart()); case MAPQ: return Integer.toString(record.getMappingQuality()); case CIGAR: return record.getCigarString(); case RNEXT: return record.getMateReferenceName(); case PNEXT: return Integer.toString(record.getMateAlignmentStart()); case TLEN: return Integer.toString(record.getInferredInsertSize()); case SEQ: return record.getReadString(); case QUAL: return record.getBaseQualityString(); case TAG: if (tagId == null) throw new IllegalArgumentException("Tag mismatch reqiues tag id. "); return record.getAttribute(tagId); default: throw new IllegalArgumentException("Unknown record field: " + type.name()); } }
Example 7
Source File: SamToFastq.java From picard with MIT License | 4 votes |
private void writeRecord(final SAMRecord read, final Integer mateNumber, final FastqWriter writer, final int basesToTrim, final Integer maxBasesToWrite) { final String seqHeader = mateNumber == null ? read.getReadName() : read.getReadName() + "/" + mateNumber; String readString = read.getReadString(); String baseQualities = read.getBaseQualityString(); // If we're clipping, do the right thing to the bases or qualities if (CLIPPING_ATTRIBUTE != null) { Integer clipPoint = (Integer) read.getAttribute(CLIPPING_ATTRIBUTE); if (clipPoint != null && clipPoint < CLIPPING_MIN_LENGTH) { clipPoint = Math.min(readString.length(), CLIPPING_MIN_LENGTH); } if (clipPoint != null) { if (CLIPPING_ACTION.equalsIgnoreCase(CLIP_TRIM)) { readString = clip(readString, clipPoint, null, !read.getReadNegativeStrandFlag()); baseQualities = clip(baseQualities, clipPoint, null, !read.getReadNegativeStrandFlag()); } else if (CLIPPING_ACTION.equalsIgnoreCase(CLIP_TO_N)) { readString = clip(readString, clipPoint, CLIP_TO_N.charAt(0), !read.getReadNegativeStrandFlag()); } else { final char newQual = SAMUtils.phredToFastq(new byte[]{(byte) Integer.parseInt(CLIPPING_ACTION)}).charAt(0); baseQualities = clip(baseQualities, clipPoint, newQual, !read.getReadNegativeStrandFlag()); } } } if (RE_REVERSE && read.getReadNegativeStrandFlag()) { readString = SequenceUtil.reverseComplement(readString); baseQualities = StringUtil.reverseString(baseQualities); } if (basesToTrim > 0) { readString = readString.substring(basesToTrim); baseQualities = baseQualities.substring(basesToTrim); } // Perform quality trimming if desired, making sure to leave at least one base! if (QUALITY != null) { final byte[] quals = SAMUtils.fastqToPhred(baseQualities); final int qualityTrimIndex = Math.max(1, TrimmingUtil.findQualityTrimPoint(quals, QUALITY)); if (qualityTrimIndex < quals.length) { readString = readString.substring(0, qualityTrimIndex); baseQualities = baseQualities.substring(0, qualityTrimIndex); } } if (maxBasesToWrite != null && maxBasesToWrite < readString.length()) { readString = readString.substring(0, maxBasesToWrite); baseQualities = baseQualities.substring(0, maxBasesToWrite); } writer.write(new FastqRecord(seqHeader, readString, "", baseQualities)); }
Example 8
Source File: BamToDetailsConversion.java From chipster with MIT License | 4 votes |
/** * Find reads in a given range. * * <p> * TODO add cigar to the list of returned values * <p> * TODO add pair information to the list of returned values * * @param request * @return * @throws InterruptedException */ @Override protected void processDataRequest(DataRequest request) throws InterruptedException { // Read the given region CloseableIterator<SAMRecord> iterator = dataSource.query(request.start.chr, request.start.bp.intValue(), request.end.bp.intValue()); // Produce results while (iterator.hasNext()) { List<Feature> responseList = new LinkedList<Feature>(); // Split results into chunks for (int c = 0; c < RESULT_CHUNK_SIZE && iterator.hasNext(); c++) { SAMRecord record = iterator.next(); // Region for this read Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr); // Values for this read LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>(); Feature read = new Feature(recordRegion, values); if (request.getRequestedContents().contains(DataType.ID)) { values.put(DataType.ID, record.getReadName()); } if (request.getRequestedContents().contains(DataType.STRAND)) { values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType)); } if (request.getRequestedContents().contains(DataType.QUALITY)) { values.put(DataType.QUALITY, record.getBaseQualityString()); } if (request.getRequestedContents().contains(DataType.CIGAR)) { Cigar cigar = new Cigar(read, record.getCigar()); values.put(DataType.CIGAR, cigar); } // TODO Deal with "=" and "N" in read string if (request.getRequestedContents().contains(DataType.SEQUENCE)) { String seq = record.getReadString(); values.put(DataType.SEQUENCE, seq); } if (request.getRequestedContents().contains(DataType.MATE_POSITION)) { BpCoord mate = new BpCoord((Long)(long)record.getMateAlignmentStart(), new Chromosome(record.getMateReferenceName())); values.put(DataType.MATE_POSITION, mate); } if (request.getRequestedContents().contains(DataType.BAM_TAG_NH)) { Object ng = record.getAttribute("NH"); if (ng != null) { values.put(DataType.BAM_TAG_NH, (Integer)record.getAttribute("NH")); } } /* * NOTE! RegionContents created from the same read area has to be equal in methods equals, hash and compareTo. Primary types * should be ok, but objects (including tables) has to be handled in those methods separately. Otherwise tracks keep adding * the same reads to their read sets again and again. */ responseList.add(read); } // Send result super.createDataResult(new DataResult(request.getStatus(), responseList)); } // We are done iterator.close(); }
Example 9
Source File: BamToCoverageConversion.java From chipster with MIT License | 4 votes |
private void calculateCoverage(DataRequest request, BpCoord from, BpCoord to) throws InterruptedException { //query data for full average bins, because merging them later would be difficult long start = CoverageTool.getBin(from.bp); long end = CoverageTool.getBin(to.bp) + CoverageTool.BIN_SIZE - 1; //if end coordinate is smaller than 1 Picard returns a full chromosome and we'll run out of memory if (end < 1) { end = 1; } CloseableIterator<SAMRecord> iterator = dataSource.query(from.chr, (int)start, (int)end); BaseStorage forwardBaseStorage = new BaseStorage(); BaseStorage reverseBaseStorage = new BaseStorage(); while (iterator.hasNext()) { SAMRecord record = iterator.next(); LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>(); Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr); Feature read = new Feature(recordRegion, values); values.put(DataType.ID, record.getReadName()); values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType)); Cigar cigar = new Cigar(read, record.getCigar()); values.put(DataType.CIGAR, cigar); String seq = record.getReadString(); values.put(DataType.SEQUENCE, seq); // Split read into continuous blocks (elements) by using the cigar List<ReadPart> parts = Cigar.splitElements(read); for (ReadPart part : parts) { if (read.values.get(DataType.STRAND) == Strand.FORWARD) { forwardBaseStorage.addNucleotideCounts(part); } else if (read.values.get(DataType.STRAND) == Strand.REVERSE) { reverseBaseStorage.addNucleotideCounts(part); } } } // We are done iterator.close(); /* Reads that overlap query regions create nucleotide counts outside the query region. * Remove those extra nucleotide counts, because they don't contain all reads of those regions and would show * wrong information. */ Region filterRegion = new Region(start, end, from.chr); forwardBaseStorage.filter(filterRegion); reverseBaseStorage.filter(filterRegion); // Send result LinkedList<Feature> resultList = new LinkedList<Feature>(); createResultList(from, forwardBaseStorage, resultList, Strand.FORWARD); createResultList(from, reverseBaseStorage, resultList, Strand.REVERSE); LinkedList<Feature> averageCoverage = CoverageTool.average(resultList, from.chr); if (request.getRequestedContents().contains(DataType.COVERAGE)) { super.createDataResult(new DataResult(request, resultList)); } if (request.getRequestedContents().contains(DataType.COVERAGE_AVERAGE)) { super.createDataResult(new DataResult(request, averageCoverage)); } }