Java Code Examples for htsjdk.samtools.SAMRecord#getReadString()

The following examples show how to use htsjdk.samtools.SAMRecord#getReadString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseQualityFilter.java    From Drop-seq with MIT License 6 votes vote down vote up
public int scoreBaseQuality(final SAMRecord barcodedRead) {
	int numBasesBelowQuality=0;
	byte [] qual= barcodedRead.getBaseQualities();
	char [] seq = barcodedRead.getReadString().toUpperCase().toCharArray();

	for (BaseRange b: baseRanges)
		for (int i=b.getStart()-1; i<b.getEnd(); i++) {
			if (i> (seq.length-1))
				throw new TranscriptomeException("Base [" + Integer.toString(i+1) + "] was requested, but the read isn't long enough ["+ barcodedRead.getReadString()+"]");

			byte q = qual[i];
			char s = seq[i];

			if (q < this.baseQualityThrehsold || s=='N')
				numBasesBelowQuality++;
		}

	this.metric.addFailedBase(numBasesBelowQuality);
	return (numBasesBelowQuality);
}
 
Example 2
Source File: TrimStartingSequenceTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testTrimStartingSequenceClp() throws IOException {
    final TrimStartingSequence clp = new TrimStartingSequence();
    clp.INPUT = INPUT;
    clp.OUTPUT = File.createTempFile("TrimStartingSequenceTest.", ".sam");
    clp.OUTPUT.deleteOnExit();
    clp.OUTPUT_SUMMARY = File.createTempFile("TrimStartingSequenceTest.", ".summary");
    clp.OUTPUT_SUMMARY.deleteOnExit();
    clp.SEQUENCE = "AAGCAGTGGTATCAACGCAGAGTGAATGGG";
    clp.MISMATCHES=0;
    clp.NUM_BASES=5;
    Assert.assertEquals(clp.doWork(), 0);

    // Confirm that the expected stuff is trimmed.
    final SamReader outputReader = SamReaderFactory.makeDefault().open(clp.OUTPUT);
    final SAMRecordIterator outputIterator = outputReader.iterator();
    final SamReader inputReader = SamReaderFactory.makeDefault().open(clp.INPUT);
    for (final SAMRecord inputRec: inputReader) {
        Assert.assertTrue(outputIterator.hasNext());
        final SAMRecord outputRec = outputIterator.next();
        final String inputBases = inputRec.getReadString();
        final String outputBases = outputRec.getReadString();
        Assert.assertTrue(inputBases.endsWith(outputBases));
        final String trimmedBases = inputBases.substring(0, inputBases.length() - outputBases.length());
        Assert.assertTrue(clp.SEQUENCE.endsWith(trimmedBases));
    }
    Assert.assertFalse(outputIterator.hasNext());
    CloserUtil.close(Arrays.asList(outputReader, inputReader));
}
 
Example 3
Source File: ReadRecord.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
public static ReadRecord from(final SAMRecord record)
{
    ReadRecord read = new ReadRecord(
            record.getReadName(), record.getReferenceName(), record.getStart(), record.getEnd(),
            record.getReadString(), record.getCigar(), record.getInferredInsertSize(), record.getFlags(),
            record.getMateReferenceName(), record.getMateAlignmentStart());

    read.setSuppAlignment(record.getStringAttribute(SUPPLEMENTARY_ATTRIBUTE));
    return read;
}
 
Example 4
Source File: SAMRecordUtils.java    From abra2 with MIT License 5 votes vote down vote up
/**
 * Remove leading or trailing soft clips from the input read.
 * Does not modify a read entirely comprised of soft clips.
 */
public static void removeSoftClips(SAMRecord read) {
	
	Cigar cigar = read.getCigar();
	
	CigarElement firstElement = cigar.getCigarElement(0);
	CigarElement lastElement  = cigar.getCigarElement(cigar.numCigarElements()-1);
	
	if ((firstElement.getOperator() == CigarOperator.S) ||
		(lastElement.getOperator() == CigarOperator.S)) {
	
		Cigar newCigar = new Cigar();
		
		String bases = read.getReadString();
		//String qualities = read.getBaseQualityString();
				
		if (firstElement.getOperator() == CigarOperator.S) {
			bases = bases.substring(firstElement.getLength(), bases.length());
			//qualities = qualities.substring(firstElement.getLength(), qualities.length()-1);
		} else {
			newCigar.add(firstElement);
		}
		
		for (int i=1; i<cigar.numCigarElements()-1; i++) {
			newCigar.add(cigar.getCigarElement(i));
		}
		
		if (lastElement.getOperator() == CigarOperator.S) {
			bases = bases.substring(0, bases.length() - lastElement.getLength());
			//qualities = qualities.substring(0, qualities.length() - lastElement.getLength() - 1);
		} else {
			newCigar.add(lastElement);
		}
		
		read.setCigar(newCigar);
		read.setReadString(bases);
		//read.setBaseQualityString(qualities);
	}
}
 
Example 5
Source File: SamRecordComparision.java    From cramtools with Apache License 2.0 5 votes vote down vote up
public static Object getValue(SAMRecord record, FIELD_TYPE field, String tagId) {
	if (field == null)
		throw new IllegalArgumentException("Record field is null.");

	switch (field) {
	case QNAME:
		return record.getReadName();
	case FLAG:
		return Integer.toString(record.getFlags());
	case RNAME:
		return record.getReferenceName();
	case POS:
		return Integer.toString(record.getAlignmentStart());
	case MAPQ:
		return Integer.toString(record.getMappingQuality());
	case CIGAR:
		return record.getCigarString();
	case RNEXT:
		return record.getMateReferenceName();
	case PNEXT:
		return Integer.toString(record.getMateAlignmentStart());
	case TLEN:
		return Integer.toString(record.getInferredInsertSize());
	case SEQ:
		return record.getReadString();
	case QUAL:
		return record.getBaseQualityString();

	case TAG:
		if (tagId == null)
			throw new IllegalArgumentException("Tag mismatch reqiues tag id. ");
		return record.getAttribute(tagId);

	default:
		throw new IllegalArgumentException("Unknown record field: " + field.name());
	}
}
 
Example 6
Source File: SAMRecordField.java    From cramtools with Apache License 2.0 5 votes vote down vote up
public Object getValue(SAMRecord record) {
	switch (type) {
	case QNAME:
		return record.getReadName();
	case FLAG:
		return Integer.toString(record.getFlags());
	case RNAME:
		return record.getReferenceName();
	case POS:
		return Integer.toString(record.getAlignmentStart());
	case MAPQ:
		return Integer.toString(record.getMappingQuality());
	case CIGAR:
		return record.getCigarString();
	case RNEXT:
		return record.getMateReferenceName();
	case PNEXT:
		return Integer.toString(record.getMateAlignmentStart());
	case TLEN:
		return Integer.toString(record.getInferredInsertSize());
	case SEQ:
		return record.getReadString();
	case QUAL:
		return record.getBaseQualityString();

	case TAG:
		if (tagId == null)
			throw new IllegalArgumentException("Tag mismatch reqiues tag id. ");
		return record.getAttribute(tagId);

	default:
		throw new IllegalArgumentException("Unknown record field: " + type.name());
	}
}
 
Example 7
Source File: SamToFastq.java    From picard with MIT License 4 votes vote down vote up
private void writeRecord(final SAMRecord read, final Integer mateNumber, final FastqWriter writer,
                         final int basesToTrim, final Integer maxBasesToWrite) {
    final String seqHeader = mateNumber == null ? read.getReadName() : read.getReadName() + "/" + mateNumber;
    String readString = read.getReadString();
    String baseQualities = read.getBaseQualityString();

    // If we're clipping, do the right thing to the bases or qualities
    if (CLIPPING_ATTRIBUTE != null) {
        Integer clipPoint = (Integer) read.getAttribute(CLIPPING_ATTRIBUTE);
        if (clipPoint != null && clipPoint < CLIPPING_MIN_LENGTH) {
            clipPoint = Math.min(readString.length(), CLIPPING_MIN_LENGTH);
        }

        if (clipPoint != null) {
            if (CLIPPING_ACTION.equalsIgnoreCase(CLIP_TRIM)) {
                readString = clip(readString, clipPoint, null, !read.getReadNegativeStrandFlag());
                baseQualities = clip(baseQualities, clipPoint, null, !read.getReadNegativeStrandFlag());
            } else if (CLIPPING_ACTION.equalsIgnoreCase(CLIP_TO_N)) {
                readString = clip(readString, clipPoint, CLIP_TO_N.charAt(0), !read.getReadNegativeStrandFlag());
            } else {
                final char newQual = SAMUtils.phredToFastq(new byte[]{(byte) Integer.parseInt(CLIPPING_ACTION)}).charAt(0);
                baseQualities = clip(baseQualities, clipPoint, newQual, !read.getReadNegativeStrandFlag());
            }
        }
    }

    if (RE_REVERSE && read.getReadNegativeStrandFlag()) {
        readString = SequenceUtil.reverseComplement(readString);
        baseQualities = StringUtil.reverseString(baseQualities);
    }

    if (basesToTrim > 0) {
        readString = readString.substring(basesToTrim);
        baseQualities = baseQualities.substring(basesToTrim);
    }

    // Perform quality trimming if desired, making sure to leave at least one base!
    if (QUALITY != null) {
        final byte[] quals = SAMUtils.fastqToPhred(baseQualities);
        final int qualityTrimIndex = Math.max(1, TrimmingUtil.findQualityTrimPoint(quals, QUALITY));
        if (qualityTrimIndex < quals.length) {
            readString = readString.substring(0, qualityTrimIndex);
            baseQualities = baseQualities.substring(0, qualityTrimIndex);
        }
    }

    if (maxBasesToWrite != null && maxBasesToWrite < readString.length()) {
        readString = readString.substring(0, maxBasesToWrite);
        baseQualities = baseQualities.substring(0, maxBasesToWrite);
    }

    writer.write(new FastqRecord(seqHeader, readString, "", baseQualities));
}
 
Example 8
Source File: BamToDetailsConversion.java    From chipster with MIT License 4 votes vote down vote up
/**
 * Find reads in a given range.
 * 
 * <p>
 * TODO add cigar to the list of returned values
 * <p>
 * TODO add pair information to the list of returned values
 * 
 * @param request
 * @return
 * @throws InterruptedException 
 */
@Override
protected void processDataRequest(DataRequest request) throws InterruptedException {
	
	// Read the given region
	CloseableIterator<SAMRecord> iterator = dataSource.query(request.start.chr, request.start.bp.intValue(), request.end.bp.intValue());
	
	// Produce results
	while (iterator.hasNext()) {

		List<Feature> responseList = new LinkedList<Feature>();

		// Split results into chunks
		for (int c = 0; c < RESULT_CHUNK_SIZE && iterator.hasNext(); c++) {
			SAMRecord record = iterator.next();

			// Region for this read
			Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr);

			// Values for this read
			LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>();

			Feature read = new Feature(recordRegion, values);

			if (request.getRequestedContents().contains(DataType.ID)) {
				values.put(DataType.ID, record.getReadName());
			}

			if (request.getRequestedContents().contains(DataType.STRAND)) {
				values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType));					
			}

			if (request.getRequestedContents().contains(DataType.QUALITY)) {
				values.put(DataType.QUALITY, record.getBaseQualityString());
			}

			if (request.getRequestedContents().contains(DataType.CIGAR)) {
				Cigar cigar = new Cigar(read, record.getCigar());
				values.put(DataType.CIGAR, cigar);
			}

			// TODO Deal with "=" and "N" in read string
			if (request.getRequestedContents().contains(DataType.SEQUENCE)) {
				String seq = record.getReadString();
				values.put(DataType.SEQUENCE, seq);
			}

			if (request.getRequestedContents().contains(DataType.MATE_POSITION)) {
				
				BpCoord mate = new BpCoord((Long)(long)record.getMateAlignmentStart(),
						new Chromosome(record.getMateReferenceName()));
				
				values.put(DataType.MATE_POSITION, mate);
			}
			
			if (request.getRequestedContents().contains(DataType.BAM_TAG_NH)) {
				Object ng = record.getAttribute("NH");
				if (ng != null) {
					values.put(DataType.BAM_TAG_NH, (Integer)record.getAttribute("NH"));
				}
			}
			
			/*
			 * NOTE! RegionContents created from the same read area has to be equal in methods equals, hash and compareTo. Primary types
			 * should be ok, but objects (including tables) has to be handled in those methods separately. Otherwise tracks keep adding
			 * the same reads to their read sets again and again.
			 */
			responseList.add(read);
		}

		// Send result			
		super.createDataResult(new DataResult(request.getStatus(), responseList));			
	}

	// We are done
	iterator.close();
}
 
Example 9
Source File: BamToCoverageConversion.java    From chipster with MIT License 4 votes vote down vote up
private void calculateCoverage(DataRequest request, BpCoord from, BpCoord to) throws InterruptedException {	
			
	//query data for full average bins, because merging them later would be difficult
	long start = CoverageTool.getBin(from.bp);		
	long end = CoverageTool.getBin(to.bp) + CoverageTool.BIN_SIZE - 1;

	//if end coordinate is smaller than 1 Picard returns a full chromosome and we'll run out of memory
	if (end < 1) {
		end = 1;
	}
			
	CloseableIterator<SAMRecord> iterator = dataSource.query(from.chr, (int)start, (int)end);		
	
	BaseStorage forwardBaseStorage = new BaseStorage();
	BaseStorage reverseBaseStorage = new BaseStorage();
	
	while (iterator.hasNext()) {
		
		SAMRecord record = iterator.next();
		
		LinkedHashMap<DataType, Object> values = new LinkedHashMap<DataType, Object>();

		Region recordRegion = new Region((long) record.getAlignmentStart(), (long) record.getAlignmentEnd(), request.start.chr);
		
		Feature read = new Feature(recordRegion, values);

		values.put(DataType.ID, record.getReadName());
		
		values.put(DataType.STRAND, BamUtils.getStrand(record, coverageType));
		
		Cigar cigar = new Cigar(read, record.getCigar());
		values.put(DataType.CIGAR, cigar);
		
		String seq = record.getReadString();
		values.put(DataType.SEQUENCE, seq);
		
		
		// Split read into continuous blocks (elements) by using the cigar
		List<ReadPart> parts = Cigar.splitElements(read);
		
		for (ReadPart part : parts) {				 
			
			if (read.values.get(DataType.STRAND) == Strand.FORWARD) {
				forwardBaseStorage.addNucleotideCounts(part);
			} else if (read.values.get(DataType.STRAND) == Strand.REVERSE) {
				reverseBaseStorage.addNucleotideCounts(part);			
			}
		}						
	}
			
	// We are done
	iterator.close();
			
	/* Reads that overlap query regions create nucleotide counts outside the query region.
	 * Remove those extra nucleotide counts, because they don't contain all reads of those regions and would show
	 * wrong information. 
	 */
	Region filterRegion = new Region(start, end, from.chr);
	forwardBaseStorage.filter(filterRegion);
	reverseBaseStorage.filter(filterRegion);

	// Send result		
	LinkedList<Feature> resultList = new LinkedList<Feature>();					
	
	createResultList(from, forwardBaseStorage, resultList, Strand.FORWARD);
	createResultList(from, reverseBaseStorage, resultList, Strand.REVERSE);		
	
	LinkedList<Feature> averageCoverage = CoverageTool.average(resultList, from.chr);
	
	if (request.getRequestedContents().contains(DataType.COVERAGE)) {		
		super.createDataResult(new DataResult(request, resultList));
	}
	
	if (request.getRequestedContents().contains(DataType.COVERAGE_AVERAGE)) {
		super.createDataResult(new DataResult(request, averageCoverage));
	}
}