Java Code Examples for htsjdk.samtools.SAMRecord#getReadBases()

The following examples show how to use htsjdk.samtools.SAMRecord#getReadBases() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RefContextConsumer.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@Nullable
private AltRead processInsert(@NotNull final CigarElement e, @NotNull final SAMRecord record, int readIndex, int refPosition,
        final IndexedBases refBases, int numberOfEvents) {
    int refIndex = refBases.index(refPosition);

    if (refPosition <= bounds.end() && refPosition >= bounds.start()) {
        final String ref = new String(refBases.bases(), refIndex, 1);
        final String alt = new String(record.getReadBases(), readIndex, e.getLength() + 1);
        boolean findReadContext = findReadContext(readIndex, record);

        final RefContext refContext = candidates.refContext(record.getContig(), refPosition);
        if (!refContext.reachedLimit()) {
            final int baseQuality = baseQuality(readIndex, record, alt.length());
            final ReadContext readContext =
                    findReadContext ? readContextFactory.createInsertContext(alt, refPosition, readIndex, record, refBases) : null;
            return new AltRead(refContext, ref, alt, baseQuality, numberOfEvents, readContext);
        }
    }

    return null;
}
 
Example 2
Source File: RefContextConsumer.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
@Nullable
private AltRead processDel(@NotNull final CigarElement e, @NotNull final SAMRecord record, int readIndex, int refPosition,
        final IndexedBases refBases, int numberOfEvents) {
    int refIndex = refBases.index(refPosition);

    if (refPosition <= bounds.end() && refPosition >= bounds.start()) {
        final String ref = new String(refBases.bases(), refIndex, e.getLength() + 1);
        final String alt = new String(record.getReadBases(), readIndex, 1);
        boolean findReadContext = findReadContext(readIndex, record);

        final RefContext refContext = candidates.refContext(record.getContig(), refPosition);
        if (!refContext.reachedLimit()) {
            final int baseQuality = baseQuality(readIndex, record, 2);
            final ReadContext readContext =
                    findReadContext ? readContextFactory.createDelContext(ref, refPosition, readIndex, record, refBases) : null;
            return new AltRead(refContext, ref, alt, baseQuality, numberOfEvents, readContext);
        }
    }

    return null;
}
 
Example 3
Source File: ClippingUtility.java    From picard with MIT License 6 votes vote down vote up
/**
 * When an adapter is matched in only one end of a pair, we check it again with
 * stricter thresholds.  If it still matches, then we trim both ends of the read
 * at the same location.
  */
private static boolean attemptOneSidedMatch(final SAMRecord read1,
                                         final SAMRecord read2,
                                         final int index1,
                                         final int index2,
                                         final int stricterMinMatchBases) {

    // Save all the data about the read where we found the adapter match
    final int matchedIndex = index1 == NO_MATCH ? index2 : index1;
    final SAMRecord matchedRead = index1 == NO_MATCH ? read2 : read1;


    // If it still matches with a stricter minimum matched bases, then
    // clip both reads
    if (matchedRead.getReadLength() - matchedIndex >= stricterMinMatchBases) {
        if (read1.getReadBases().length > matchedIndex) {
            read1.setAttribute(ReservedTagConstants.XT, matchedIndex + 1);
        }
        if (read2.getReadBases().length > matchedIndex) {
            read2.setAttribute(ReservedTagConstants.XT, matchedIndex + 1);
        }
        return true;
    }
    return false;
}
 
Example 4
Source File: AdapterUtility.java    From picard with MIT License 6 votes vote down vote up
/**
 * Checks the first ADAPTER_MATCH_LENGTH bases of the read against known adapter sequences and returns
 * true if the read matches an adapter sequence with MAX_ADAPTER_ERRORS mismsatches or fewer.
 * <p>
 * Only unmapped reads and reads with MQ=0 are considers eligible for being adapter
 */

 public boolean isAdapter(final SAMRecord record) {

    // If the read mapped with mapping quality > 0 we do not consider it an adapter read.
    if (!record.getReadUnmappedFlag() && record.getMappingQuality() != 0) {
        return false;
    }

    // if the read is mapped and is aligned to the reverse strand, it needs to be
    // rev-comp'ed before calling isAdapterSequence.
    final boolean revComp = !record.getReadUnmappedFlag() &&
            record.getReadNegativeStrandFlag();

    final byte[] readBases = record.getReadBases();
    return isAdapterSequence(readBases, revComp);
}
 
Example 5
Source File: BaseDistributionAtReadPosition.java    From Drop-seq with MIT License 5 votes vote down vote up
BaseDistributionMetricCollection gatherBaseQualities (final File input, final int readNumber) {
	ProgressLogger p = new ProgressLogger(this.log);

	SamReader inputSam = SamReaderFactory.makeDefault().open(input);
	BaseDistributionMetricCollection c = new BaseDistributionMetricCollection();

	MAIN_LOOP:
	for (final SAMRecord samRecord : inputSam) {

		p.record(samRecord);
		if (samRecord.isSecondaryOrSupplementary()) continue;
		boolean readPaired = samRecord.getReadPairedFlag();

		boolean firstRead=false;
		if (!readPaired & readNumber==2)
			continue;
		else if (!readPaired & readNumber==1)
			firstRead=true;
		else
			firstRead = samRecord.getFirstOfPairFlag();

		// if you're looking for the first read and this isn't, or looking for the 2nd read and this isn't, then go to the next read.
		if ((firstRead && readNumber!=1) || (!firstRead && readNumber==1)) continue MAIN_LOOP;

		byte [] bases = samRecord.getReadBases();

		for (int i=0; i<bases.length; i++) {
			char base = (char) (bases[i]);
			int idx=i+1;
			c.addBase(base, idx);
		}
	}

	CloserUtil.close(inputSam);
	return (c);


}
 
Example 6
Source File: QualityCounterCigarHandler.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@Override
public void handleAlignment(@NotNull final SAMRecord r, @NotNull final CigarElement e, final int startReadIndex, final int refPos) {
    for (int i = 0; i < e.getLength(); i++) {
        int readIndex = startReadIndex + i;
        int position = refPos + i;

        if (position > bounds.end()) {
            return;
        }

        if (position < bounds.start()) {
            continue;
        }

        byte ref = refGenome.base(position);
        byte alt = r.getReadBases()[readIndex];
        byte quality = r.getBaseQualities()[readIndex];
        byte[] trinucleotideContext = refGenome.trinucleotideContext(position);

        if (alt != N && isValid(trinucleotideContext)) {
            final QualityCounterKey key = ImmutableQualityCounterKey.builder()
                    .ref(ref)
                    .alt(alt)
                    .qual(quality)
                    .position(position)
                    .trinucleotideContext(trinucleotideContext)
                    .build();
            qualityMap.computeIfAbsent(key, QualityCounter::new).increment();
        }
    }
}
 
Example 7
Source File: RawContextCigarHandler.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
private static boolean matchesString(@NotNull final SAMRecord record, int index, @NotNull final String expected) {
    for (int i = 0; i < expected.length(); i++) {
        if ((byte) expected.charAt(i) != record.getReadBases()[index + i]) {
            return false;
        }
    }
    return true;
}
 
Example 8
Source File: CramSerilization.java    From cramtools with Apache License 2.0 5 votes vote down vote up
private static void updateTracks(final List<SAMRecord> samRecords, final ReferenceTracks tracks) {
	for (final SAMRecord samRecord : samRecords) {
		if (samRecord.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START
				&& samRecord.getReferenceIndex() == tracks.getSequenceId()) {
			int refPos = samRecord.getAlignmentStart();
			int readPos = 0;
			for (final CigarElement cigarElement : samRecord.getCigar().getCigarElements()) {
				if (cigarElement.getOperator().consumesReferenceBases()) {
					for (int elementIndex = 0; elementIndex < cigarElement.getLength(); elementIndex++)
						tracks.addCoverage(refPos + elementIndex, 1);
				}
				switch (cigarElement.getOperator()) {
				case M:
				case X:
				case EQ:
					for (int pos = readPos; pos < cigarElement.getLength(); pos++) {
						final byte readBase = samRecord.getReadBases()[readPos + pos];
						final byte refBase = tracks.baseAt(refPos + pos);
						if (readBase != refBase)
							tracks.addMismatches(refPos + pos, 1);
					}
					break;

				default:
					break;
				}

				readPos += cigarElement.getOperator().consumesReadBases() ? cigarElement.getLength() : 0;
				refPos += cigarElement.getOperator().consumesReferenceBases() ? cigarElement.getLength() : 0;
			}
		}
	}
}
 
Example 9
Source File: MultiHitAlignedReadIterator.java    From picard with MIT License 5 votes vote down vote up
/** Replaces hard clips with soft clips and fills in bases and qualities with dummy values as needed. */
 private void replaceHardWithSoftClips(final SAMRecord rec) {
     if (rec.getReadUnmappedFlag()) return;
     if (rec.getCigar().isEmpty()) return;

     List<CigarElement> elements = rec.getCigar().getCigarElements();
     final CigarElement first = elements.get(0);
     final CigarElement last  = elements.size() == 1 ? null : elements.get(elements.size()-1);
     final int startHardClip = first.getOperator() == CigarOperator.H ? first.getLength() : 0;
     final int endHardClip   = (last != null && last.getOperator() == CigarOperator.H) ? last.getLength() : 0;

     if (startHardClip + endHardClip > 0) {
         final int len = rec.getReadBases().length + startHardClip + endHardClip;

         // Fix the basecalls
         final byte[] bases = new byte[len];
         Arrays.fill(bases, (byte) 'N');
         System.arraycopy(rec.getReadBases(), 0, bases, startHardClip, rec.getReadBases().length);

         // Fix the quality scores
         final byte[] quals = new byte[len];
         Arrays.fill(quals, (byte) 2  );
         System.arraycopy(rec.getBaseQualities(), 0, quals, startHardClip, rec.getBaseQualities().length);

         // Fix the cigar!
         elements = new ArrayList<CigarElement>(elements); // make it modifiable
         if (startHardClip > 0) elements.set(0, new CigarElement(first.getLength(), CigarOperator.S));
         if (endHardClip   > 0) elements.set(elements.size()-1, new CigarElement(last.getLength(), CigarOperator.S));

         // Set the update structures on the new record
         rec.setReadBases(bases);
         rec.setBaseQualities(quals);
         rec.setCigar(new Cigar(elements));
     }
}
 
Example 10
Source File: SamRecordComparision.java    From cramtools with Apache License 2.0 5 votes vote down vote up
private static ScoreDiff[] findScoreDiffs(SAMRecord r1, SAMRecord r2, byte[] ref, List<PreservationPolicy> policies) {
	if (!r1.getCigarString().equals(r2.getCigarString()))
		throw new RuntimeException("CIGAR string are different.");

	List<ScoreDiff> diffs = new ArrayList<SamRecordComparision.ScoreDiff>();
	int posInRead = 1, posInRef = r1.getAlignmentStart();

	for (CigarElement ce : r1.getCigar().getCigarElements()) {
		if (ce.getOperator().consumesReadBases()) {
			for (int i = 0; i < ce.getLength(); i++) {
				if (r1.getBaseQualities()[i + posInRead - 1] != r2.getBaseQualities()[i + posInRead - 1]) {
					ScoreDiff d = new ScoreDiff();
					d.base = r1.getReadBases()[i + posInRead - 1];
					d.refBase = ref[i + posInRef - 1];
					ce.getOperator();
					d.cigarOp = CigarOperator.enumToCharacter(ce.getOperator());
					d.oScore = r1.getBaseQualities()[i + posInRead - 1];
					d.score = r2.getBaseQualities()[i + posInRead - 1];
					d.posInRead = i + posInRead;

					if (d.score == Binning.Illumina_binning_matrix[d.oScore])
						d.treatment = QualityScoreTreatmentType.BIN;
					else if (d.score == DEFAULT_SCORE)
						d.treatment = QualityScoreTreatmentType.DROP;

					if (policies == null || !obeysLossyModel(policies, r1, d))
						diffs.add(d);
				}
			}
		}

		posInRead += ce.getOperator().consumesReadBases() ? ce.getLength() : 0;
		posInRef += ce.getOperator().consumesReferenceBases() ? ce.getLength() : 0;
	}
	return diffs.toArray(new ScoreDiff[diffs.size()]);
}
 
Example 11
Source File: Utils.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * A rip off samtools bam_md.c
 * 
 * @param record
 * @param ref
 * @param calcMD
 * @param calcNM
 */
public static void calculateMdAndNmTags(SAMRecord record, byte[] ref, boolean calcMD, boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	Cigar cigar = record.getCigar();
	List<CigarElement> cigarElements = cigar.getCigarElements();
	byte[] seq = record.getReadBases();
	int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	StringBuffer str = new StringBuffer();

	int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		CigarElement ce = cigarElements.get(i);
		int j, l = ce.getLength();
		CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < l; ++j) {
				int z = y + j;

				if (ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j];

				if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j]);
					u = 0;
					++nm;
				}
			}
			if (j < l)
				break;
			x += l;
			y += l;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < l; ++j) {
				if (ref[x + j] == 0)
					break;
				str.appendCodePoint(ref[x + j]);
			}
			u = 0;
			if (j < l)
				break;
			x += l;
			nm += l;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += l;
			if (op == CigarOperator.INSERTION)
				nm += l;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += l;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}
 
Example 12
Source File: SNPBasePileUp.java    From Drop-seq with MIT License 4 votes vote down vote up
/**
 * For a read on this pileup, get the base and quality of the base that is at the same
 * position as the SNP.  If the read does not overlap the interval, then return an empty array.
 * @param r The read
 * @return A length 2 byte array containing the base and quality.  Empty if the read does not overlap the interval.
 */
public byte [] getBaseAndQualityOverlappingInterval (final SAMRecord r) {
	byte [] result = new byte [2];

	List<CigarElement> elements = r.getCigar().getCigarElements();
	Iterator<AlignmentBlock> blocks = r.getAlignmentBlocks().iterator();

	int finalSNPLocalPosition=-1;
	int lengthTraversed=0;

	for (CigarElement ce: elements) {
		CigarOperator co = ce.getOperator();
		// you're in an alignment block
		if (co==CigarOperator.M) {
			// get the next alignment block
			AlignmentBlock b = blocks.next();
			int refStart = b.getReferenceStart();
			int snpLocalPos=this.getPosition() - refStart +1;
			int blockLength=b.getLength();

			// is the local position inside this alignment block?
			// if not, move onto the next block.
			if (snpLocalPos >0 && snpLocalPos<=blockLength) {
				// found it!  Done.
				finalSNPLocalPosition=snpLocalPos+lengthTraversed;
				break;
			}
		}
		// consume the bases if necessary and move on to the next element.
		if (co.consumesReadBases())
			lengthTraversed+=ce.getLength();
	}

	// if the position is assigned, then add to the pileup.
	if (finalSNPLocalPosition!=-1) {
		// arrays 0 based.
		byte [] quals = r.getBaseQualities();
		byte [] bases = r.getReadBases();
		byte base = bases[finalSNPLocalPosition-1];
		// char baseChar = StringUtil.byteToChar(base);
		byte qual = quals[finalSNPLocalPosition-1];
		result[0]=base;
		result[1]=qual;
		return (result);
	}
	return (ArrayUtils.EMPTY_BYTE_ARRAY);
}
 
Example 13
Source File: AlignmentsTags.java    From cramtools with Apache License 2.0 4 votes vote down vote up
/**
 * Same as {@link htsjdk.samtools.util.SequenceUtil#calculateMdAndNmTags}
 * but allows for reference excerpt instead of a full reference sequence.
 * 
 * @param record
 *            SAMRecord to inject NM and MD tags into
 * @param ref
 *            reference sequence bases, may be a subsequence starting at
 *            refOffest
 * @param refOffset
 *            array offset of the reference bases, 0 is the same as the
 *            whole sequence. This value will be subtracted from the
 *            reference array index in calculations.
 * @param calcMD
 *            calculate MD tag if true
 * @param calcNM
 *            calculate NM tag if true
 */
public static void calculateMdAndNmTags(final SAMRecord record, final byte[] ref, final int refOffset,
		final boolean calcMD, final boolean calcNM) {
	if (!calcMD && !calcNM)
		return;

	final Cigar cigar = record.getCigar();
	final List<CigarElement> cigarElements = cigar.getCigarElements();
	final byte[] seq = record.getReadBases();
	final int start = record.getAlignmentStart() - 1;
	int i, x, y, u = 0;
	int nm = 0;
	final StringBuilder str = new StringBuilder();

	final int size = cigarElements.size();
	for (i = y = 0, x = start; i < size; ++i) {
		final CigarElement ce = cigarElements.get(i);
		int j;
		final int length = ce.getLength();
		final CigarOperator op = ce.getOperator();
		if (op == CigarOperator.MATCH_OR_MISMATCH || op == CigarOperator.EQ || op == CigarOperator.X) {
			for (j = 0; j < length; ++j) {
				final int z = y + j;

				if (refOffset + ref.length <= x + j)
					break; // out of boundary

				int c1 = 0;
				int c2 = 0;
				// try {
				c1 = seq[z];
				c2 = ref[x + j - refOffset];

				if ((c1 == c2) || c1 == 0) {
					// a match
					++u;
				} else {
					str.append(u);
					str.appendCodePoint(ref[x + j - refOffset]);
					u = 0;
					++nm;
				}
			}
			if (j < length)
				break;
			x += length;
			y += length;
		} else if (op == CigarOperator.DELETION) {
			str.append(u);
			str.append('^');
			for (j = 0; j < length; ++j) {
				if (ref[x + j - refOffset] == 0)
					break;
				str.appendCodePoint(ref[x + j - refOffset]);
			}
			u = 0;
			if (j < length)
				break;
			x += length;
			nm += length;
		} else if (op == CigarOperator.INSERTION || op == CigarOperator.SOFT_CLIP) {
			y += length;
			if (op == CigarOperator.INSERTION)
				nm += length;
		} else if (op == CigarOperator.SKIPPED_REGION) {
			x += length;
		}
	}
	str.append(u);

	if (calcMD)
		record.setAttribute(SAMTag.MD.name(), str.toString());
	if (calcNM)
		record.setAttribute(SAMTag.NM.name(), nm);
}
 
Example 14
Source File: RrbsMetricsCollector.java    From picard with MIT License 4 votes vote down vote up
public void acceptRecord(final SAMRecordAndReference args) {
	mappedRecordCount++;

	final SAMRecord samRecord = args.getSamRecord();
	final ReferenceSequence referenceSequence = args.getReferenceSequence();

	final byte[] readBases = samRecord.getReadBases();
	final byte[] readQualities = samRecord.getBaseQualities();
	final byte[] refBases = referenceSequence.getBases();

	if (samRecord.getReadLength() < minReadLength) {
		smallReadCount++;
		return;
	} else if (SequenceUtil.countMismatches(samRecord, refBases, true) > Math.round(samRecord.getReadLength() * maxMismatchRate)) {
		mismatchCount++;
		return;
	}

	// We only record non-CpG C sites if there was at least one CpG in the read, keep track of
	// the values for this record and then apply to the global totals if valid
	int recordCpgs = 0;

	for (final AlignmentBlock alignmentBlock : samRecord.getAlignmentBlocks()) {
		final int blockLength = alignmentBlock.getLength();
		final int refFragmentStart = alignmentBlock.getReferenceStart() - 1;
		final int readFragmentStart = alignmentBlock.getReadStart() - 1;

		final byte[] refFragment = getFragment(refBases, refFragmentStart, blockLength);
		final byte[] readFragment = getFragment(readBases, readFragmentStart, blockLength);
		final byte[] readQualityFragment = getFragment(readQualities, readFragmentStart, blockLength);

		if (samRecord.getReadNegativeStrandFlag()) {
			// In the case of a negative strand, reverse (and complement for base arrays) the reference,
			// reads & qualities so that it can be treated as a positive strand for the rest of the process
			SequenceUtil.reverseComplement(refFragment);
			SequenceUtil.reverseComplement(readFragment);
			SequenceUtil.reverseQualities(readQualityFragment);
		}

		for (int i=0; i < blockLength-1; i++) {
			final int curRefIndex = getCurRefIndex(refFragmentStart, blockLength, i, samRecord.getReadNegativeStrandFlag());

			// Look at a 2-base window to see if we're on a CpG site, and if so check for conversion
			// (CG -> TG). We do not consider ourselves to be on a CpG site if we're on the last base of a read
			if ((SequenceUtil.basesEqual(refFragment[i], SequenceUtil.C)) &&
					(SequenceUtil.basesEqual(refFragment[i+1], SequenceUtil.G))) {
				// We want to catch the case where there's a CpG in the reference, even if it is not valid
				// to prevent the C showing up as a non-CpG C down below. Otherwise this could have been all
				// in one if statement
				if (isValidCpg(refFragment, readFragment, readQualityFragment, i)) {
					recordCpgs++;
					final CpgLocation curLocation = new CpgLocation(samRecord.getReferenceName(), curRefIndex);
					cpgTotal.increment(curLocation);
					if (SequenceUtil.isBisulfiteConverted(readFragment[i], refFragment[i])) {
						cpgConverted.increment(curLocation);
					}
				}
				i++;
			} else if (isC(refFragment[i], readFragment[i]) && isAboveCytoQcThreshold(readQualities, i) &&
					SequenceUtil.bisulfiteBasesEqual(false, readFragment[i+1], refFragment[i+1])) {
				// C base in the reference that's not associated with a CpG
				nCytoTotal++;
				if (SequenceUtil.isBisulfiteConverted(readFragment[i], refFragment[i])) {
					nCytoConverted++;
				}
			}
		}
	}

	if (recordCpgs == 0) {
		noCpgCount++;
	}
}
 
Example 15
Source File: CollectSequencingArtifactMetrics.java    From picard with MIT License 4 votes vote down vote up
@Override
protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
    // see if the whole read should be skipped
    if (recordFilter.filterOut(rec)) return;

    // check read group + library
    final String library = (rec.getReadGroup() == null) ? UNKNOWN_LIBRARY : getOrElse(rec.getReadGroup().getLibrary(), UNKNOWN_LIBRARY);
    if (!libraries.contains(library)) {
        // should never happen if SAM is valid
        throw new PicardException("Record contains library that is missing from header: " + library);
    }

    // set up some constants that don't change in the loop below
    final int contextFullLength = 2 * CONTEXT_SIZE + 1;
    final ArtifactCounter counter = artifactCounters.get(library);
    final byte[] readBases = rec.getReadBases();
    final byte[] readQuals;
    if (USE_OQ) {
        final byte[] tmp = rec.getOriginalBaseQualities();
        readQuals = tmp == null ? rec.getBaseQualities() : tmp;
    } else {
        readQuals = rec.getBaseQualities();
    }

    // iterate over aligned positions
    for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
        for (int offset = 0; offset < block.getLength(); offset++) {
            // remember, these are 1-based!
            final int readPos = block.getReadStart() + offset;
            final int refPos = block.getReferenceStart() + offset;

            // skip low BQ sites
            final byte qual = readQuals[readPos - 1];
            if (qual < MINIMUM_QUALITY_SCORE) continue;

            // skip N bases in read
            final char readBase = Character.toUpperCase((char)readBases[readPos - 1]);
            if (readBase == 'N') continue;

            /**
             * Skip regions outside of intervals.
             *
             * NB: IntervalListReferenceSequenceMask.get() has side-effects which assume
             * that successive ReferenceSequence's passed to this method will be in-order
             * (e.g. it will break if you call acceptRead() with chr1, then chr2, then chr1
             * again). So this only works if the underlying iteration is coordinate-sorted.
             */
            if (intervalMask != null && !intervalMask.get(ref.getContigIndex(), refPos)) continue;

            // skip dbSNP sites
            if (dbSnpMask != null && dbSnpMask.isDbSnpSite(ref.getName(), refPos)) continue;

            // skip the ends of the reference
            final int contextStartIndex = refPos - CONTEXT_SIZE - 1;
            if (contextStartIndex < 0 || contextStartIndex + contextFullLength > ref.length()) continue;

            // skip contexts with N bases
            final String context = getRefContext(ref, contextStartIndex, contextFullLength);
            if (context.contains("N")) continue;

            // skip non-ACGT bases
            if (!SequenceUtil.isUpperACGTN((byte)readBase))
                continue;

            // count the base!
            counter.countRecord(context, readBase, rec);
        }
    }
}
 
Example 16
Source File: CompareToReference2.java    From abra2 with MIT License 4 votes vote down vote up
private char getReadBase(SAMRecord read, int index) {
	return (char) read.getReadBases()[index];
}
 
Example 17
Source File: SimpleCaller.java    From abra2 with MIT License 4 votes vote down vote up
private BaseInfo getBaseAtReferencePosition(SAMRecord read, int refPos) {
	boolean isNearEdge = false;
	boolean isIndel = false;
	int alignmentStart = read.getAlignmentStart();
	Cigar cigar = read.getCigar();
	
	char base = 'N';
	
	int readIdx = 0;
	int currRefPos = alignmentStart;
	
	for (CigarElement element : cigar.getCigarElements()) {
					
		if (element.getOperator() == CigarOperator.M) {
			readIdx += element.getLength();
			currRefPos += element.getLength();
			
			if (currRefPos > refPos) {  // TODO: double check end of read base here...
				
				int offset = currRefPos - refPos;
				
				if ((offset < 3) || (offset+3 >= element.getLength())) {
					// This position is within 3 bases of start/end of alignment or clipping or indel.
					// Tag this base for evaluation downstream.
					isNearEdge = true;
				}
				
				readIdx -= offset;
				if ((readIdx < 0) || (readIdx >= read.getReadBases().length)) {
					System.err.println("Read index out of bounds for read: " + read.getSAMString());
					break;
				}
				
				if (read.getBaseQualities()[readIdx] >= MIN_BASE_QUALITY) {
					base = (char) read.getReadBases()[readIdx];
				}
				break;
			}
		} else if (element.getOperator() == CigarOperator.I) {
			if (currRefPos == refPos) {
				//TODO: Handle insertions
				isIndel = true;
				break;
			}
			readIdx += element.getLength();
		} else if (element.getOperator() == CigarOperator.D) {
			if (refPos >= currRefPos && refPos <= currRefPos+element.getLength()) {
				//TODO: Handle deletions
				isIndel = true;
				break;
			}				
			currRefPos += element.getLength();
		} else if (element.getOperator() == CigarOperator.S) {
			readIdx += element.getLength();
		}			
	}
	
	return new BaseInfo(Character.toUpperCase(base), isNearEdge, isIndel);
}
 
Example 18
Source File: RawContextCigarHandler.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
private static boolean matchesFirstBase(@NotNull final SAMRecord record, int index, @NotNull final String expected) {
    return expected.charAt(0) == record.getReadBases()[index];
}
 
Example 19
Source File: RefContextConsumer.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
@NotNull
private List<AltRead> processAlignment(@NotNull final SAMRecord record, int readBasesStartIndex, int refPositionStart,
        int alignmentLength, final IndexedBases refBases, int numberOfEvents) {
    final List<AltRead> result = Lists.newArrayList();

    int refIndex = refBases.index(refPositionStart);

    for (int i = 0; i < alignmentLength; i++) {
        int refPosition = refPositionStart + i;
        int readBaseIndex = readBasesStartIndex + i;
        int refBaseIndex = refIndex + i;

        if (!inBounds(refPosition)) {
            continue;
        }

        final byte refByte = refBases.bases()[refBaseIndex];
        final String ref = String.valueOf((char) refByte);
        final byte readByte = record.getReadBases()[readBaseIndex];
        boolean findReadContext = findReadContext(readBaseIndex, record);

        final RefContext refContext = candidates.refContext(record.getContig(), refPosition);
        if (!refContext.reachedLimit()) {
            int baseQuality = record.getBaseQualities()[readBaseIndex];
            if (readByte != refByte) {
                final String alt = String.valueOf((char) readByte);
                final ReadContext readContext =
                        findReadContext ? readContextFactory.createSNVContext(refPosition, readBaseIndex, record, refBases) : null;

                result.add(new AltRead(refContext, ref, alt, baseQuality, numberOfEvents, readContext));

                if (config.mnvEnabled()) {
                    int mnvMaxLength = mnvLength(readBaseIndex, refBaseIndex, record.getReadBases(), refBases.bases());
                    for (int mnvLength = 2; mnvLength <= mnvMaxLength; mnvLength++) {

                        final String mnvRef = new String(refBases.bases(), refBaseIndex, mnvLength);
                        final String mnvAlt = new String(record.getReadBases(), readBaseIndex, mnvLength);

                        // Only check last base because some subsets may not be valid,
                        // ie CA > TA is not a valid subset of CAC > TAT
                        if (mnvRef.charAt(mnvLength - 1) != mnvAlt.charAt(mnvLength - 1)) {
                            final ReadContext mnvReadContext = findReadContext ? readContextFactory.createMNVContext(refPosition,
                                    readBaseIndex,
                                    mnvLength,
                                    record,
                                    refBases) : null;

                            result.add(new AltRead(refContext,
                                    mnvRef,
                                    mnvAlt,
                                    baseQuality,
                                    NumberEvents.numberOfEventsWithMNV(numberOfEvents, mnvRef, mnvAlt),
                                    mnvReadContext));
                        }
                    }
                }
            } else {
                refContext.refRead();
            }
        }
    }

    return result;
}