Java Code Examples for htsjdk.samtools.util.SequenceUtil

The following examples show how to use htsjdk.samtools.util.SequenceUtil. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Drop-seq   Source File: TrimSequenceTemplate.java    License: MIT License 6 votes vote down vote up
/**
 * Test to see if this read matches this barcode. If any base of a barcode
 * starts with N or n, then ignore that position.
 *
 * @param testString
 *            The read to look for this barcode in. The barcode should be at
 *            the start of the read for this method.  The entire barcode is expected for a match.
 * @return true if this barcode is found in the read.
 */
public boolean hasForwardMatch(final String testString) {
	byte[] testBases = StringUtil.stringToBytes(testString);
	int numBasesCanMatch = 0;
	int numBasesMatch = 0;
	for (int i = 0; i < bases.length; i++) {
		if (isIgnoreBase(this.bases[i]))
			continue;
		numBasesCanMatch++;
		if (SequenceUtil.basesEqual(testBases[i], bases[i]))
			numBasesMatch++;
	}
	if (numBasesCanMatch == numBasesMatch)
		return (true);
	return false;
}
 
Example 2
public static AlignedContig fromPrimarySAMRecordString(final String samRecordStringWithExplicitTabEscapeSequenceAndNoXAField,
                                                       final boolean hasSATag) {
    final AlignmentInterval primaryAlignment = fromSAMRecordString(samRecordStringWithExplicitTabEscapeSequenceAndNoXAField, hasSATag);

    final String[] fields = samRecordStringWithExplicitTabEscapeSequenceAndNoXAField.split("\t");
    final String readName = fields[0];
    final int samFlag = Integer.valueOf( fields[1] ) ;
    final byte[] sequence = fields[9].getBytes();
    final boolean forwardStrand = SAMFlag.READ_REVERSE_STRAND.isUnset(samFlag);
    if (!forwardStrand) {
        SequenceUtil.reverseComplement(sequence);
    }

    if (hasSATag) {
        final String saTag = fields[11];
        final String[] supplements = saTag.substring(3 + saTag.indexOf(":Z:")).split(";");
        final List<AlignmentInterval> alignments = new ArrayList<>(supplements.length + 1);
        alignments.add(primaryAlignment);
        for (final String sup : supplements) {
            alignments.add( new AlignmentInterval(sup) );
        }
        return new AlignedContig(readName, sequence, alignments);
    } else {
        return new AlignedContig(readName, sequence, Collections.singletonList(primaryAlignment));
    }
}
 
Example 3
Source Project: picard   Source File: ReadBaseStratification.java    License: MIT License 6 votes vote down vote up
@Override
public Integer stratify(final SAMRecord sam) {
    int numberOfNsInRead = 0;

    // Check to see if we've already seen this record before.  If not, count the number
    // of Ns in the read, and store it in a transient attribute.  If we have seen it
    // before, simply retrieve the value and avoid doing the computation again.
    if (sam.getTransientAttribute(numberOfNsTag) != null) {
        numberOfNsInRead = (Integer) sam.getTransientAttribute(numberOfNsTag);
    } else {
        final byte[] bases = sam.getReadBases();
        for (final byte base : bases) {
            if (SequenceUtil.isNoCall(base)) {
                numberOfNsInRead++;
            }
        }
        sam.setTransientAttribute(numberOfNsTag, numberOfNsInRead);
    }
    return numberOfNsInRead;
}
 
Example 4
Source Project: picard   Source File: GcBiasUtils.java    License: MIT License 6 votes vote down vote up
public static int calculateGc(final byte[] bases, final int startIndex, final int endIndex, final CalculateGcState state) {
    if (state.init) {
        state.init = false;
        state.gcCount = 0;
        state.nCount = 0;
        for (int i = startIndex; i < endIndex; ++i) {
            final byte base = bases[i];
            if (SequenceUtil.basesEqual(base, (byte)'G') || SequenceUtil.basesEqual(base, (byte)'C')) ++state.gcCount;
            else if (SequenceUtil.basesEqual(base, (byte)'N')) ++state.nCount;
        }
    } else {
        final byte newBase = bases[endIndex - 1];
        if (SequenceUtil.basesEqual(newBase, (byte)'G') || SequenceUtil.basesEqual(newBase, (byte)'C')) ++state.gcCount;
        else if (newBase == 'N') ++state.nCount;

        if (SequenceUtil.basesEqual(state.priorBase, (byte)'G') || SequenceUtil.basesEqual(state.priorBase, (byte)'C')) --state.gcCount;
        else if (SequenceUtil.basesEqual(state.priorBase, (byte)'N')) --state.nCount;
    }
    state.priorBase = bases[startIndex];
    if (state.nCount > 4) return -1;
    else return (state.gcCount * 100) / (endIndex - startIndex);
}
 
Example 5
Source Project: picard   Source File: AdapterUtility.java    License: MIT License 6 votes vote down vote up
/**
 * Checks the first ADAPTER_MATCH_LENGTH bases of the read against known adapter sequences and returns
 * true if the read matches an adapter sequence with MAX_ADAPTER_ERRORS mismsatches or fewer.
 *
 * @param read the basecalls for the read in the order and orientation the machine read them
 * @param  revCompRead When aligned reads are checked for being adapter, this specified if the original
 *                     read had ben rev-comped during alignment.
 * @return true if the read matches an adapter and false otherwise
 */
public boolean isAdapterSequence(final byte[] read, boolean revCompRead) {
    if (read.length < ADAPTER_MATCH_LENGTH) return false;

    for (final byte[] adapter : adapterKmers) {
        int errors = 0;

        for (int i = 0; i < adapter.length; ++i) {
            final byte base = revCompRead ? SequenceUtil.complement(read[read.length - i - 1]) : read[i];
            if (!SequenceUtil.basesEqual(base, adapter[i]) && ++errors > MAX_ADAPTER_ERRORS) {
                break;
            }
        }

        if (errors <= MAX_ADAPTER_ERRORS) {
            return true;
        }
    }

    return false;
}
 
Example 6
Source Project: cramtools   Source File: ReferenceRegionTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test_Start_1() {
	int start = 1;

	ReferenceRegion region = new ReferenceRegion(data, 0, "chr1", start);
	Assert.assertEquals(start, region.alignmentStart);
	Assert.assertEquals(0, region.arrayPosition(start));

	for (int pos = start, index = 0; pos < start + 10; pos++, index++) {
		Assert.assertEquals(data[index], region.base(pos));
	}

	int len = 10;
	String expectedMD5 = SequenceUtil.calculateMD5String(data, 0, len);
	String md5 = region.md5(start, len);
	Assert.assertEquals(expectedMD5, md5);
}
 
Example 7
Source Project: picard   Source File: RnaSeqMetricsCollector.java    License: MIT License 6 votes vote down vote up
public static OverlapDetector<Interval> makeOverlapDetector(final File samFile, final SAMFileHeader header, final File ribosomalIntervalsFile, final Log log) {

        final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = new OverlapDetector<Interval>(0, 0);
        if (ribosomalIntervalsFile != null) {

            final IntervalList ribosomalIntervals = IntervalList.fromFile(ribosomalIntervalsFile);
            if (ribosomalIntervals.size() == 0) {
                log.warn("The RIBOSOMAL_INTERVALS file, " + ribosomalIntervalsFile.getAbsolutePath() + " does not contain intervals");
            }
            try {
                SequenceUtil.assertSequenceDictionariesEqual(header.getSequenceDictionary(), ribosomalIntervals.getHeader().getSequenceDictionary());
            } catch (SequenceUtil.SequenceListsDifferException e) {
                throw new PicardException("Sequence dictionaries differ in " + samFile.getAbsolutePath() + " and " + ribosomalIntervalsFile.getAbsolutePath(),
                        e);
            }
            final IntervalList uniquedRibosomalIntervals = ribosomalIntervals.uniqued();
            final List<Interval> intervals = uniquedRibosomalIntervals.getIntervals();
            ribosomalSequenceOverlapDetector.addAll(intervals, intervals);
        }
        return ribosomalSequenceOverlapDetector;
    }
 
Example 8
Source Project: picard   Source File: GcBiasMetricsCollector.java    License: MIT License 6 votes vote down vote up
private void addRead(final GcObject gcObj, final SAMRecord rec, final String group, final byte[] gc, final byte[] refBases) {
    if (!rec.getReadPairedFlag() || rec.getFirstOfPairFlag()) ++gcObj.totalClusters;
    final int pos = rec.getReadNegativeStrandFlag() ? rec.getAlignmentEnd() - scanWindowSize : rec.getAlignmentStart();
    ++gcObj.totalAlignedReads;
    if (pos > 0) {
        final int windowGc = gc[pos];
        if (windowGc >= 0) {
            ++gcObj.readsByGc[windowGc];
            gcObj.basesByGc[windowGc] += rec.getReadLength();
            gcObj.errorsByGc[windowGc] +=
                    SequenceUtil.countMismatches(rec, refBases, bisulfite) +
                            SequenceUtil.countInsertedBases(rec) + SequenceUtil.countDeletedBases(rec);
        }
    }
    if (gcObj.group == null) {
        gcObj.group = group;
    }
}
 
Example 9
Source Project: picard   Source File: QualityScoreDistribution.java    License: MIT License 6 votes vote down vote up
@Override
protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
    // Skip unwanted records
    if (PF_READS_ONLY && rec.getReadFailsVendorQualityCheckFlag()) return;
    if (ALIGNED_READS_ONLY && rec.getReadUnmappedFlag()) return;
    if (rec.isSecondaryOrSupplementary()) return;

    final byte[] bases = rec.getReadBases();
    final byte[] quals = rec.getBaseQualities();
    final byte[] oq    = rec.getOriginalBaseQualities();

    final int length = quals.length;

    for (int i=0; i<length; ++i) {
        if (INCLUDE_NO_CALLS || !SequenceUtil.isNoCall(bases[i])) {
            qCounts[quals[i]]++;
            if (oq != null) oqCounts[oq[i]]++;
        }
    }
}
 
Example 10
Source Project: picard   Source File: SingleBarcodeDistanceMetric.java    License: MIT License 6 votes vote down vote up
public int hammingDistance() {
    int numMismatches = 0;
    for (int i = 0; i < barcodeBases.length && i < readBases.length && numMismatches <= maximalInterestingDistance; ++i) {

        if (SequenceUtil.isNoCall(readBases[i])) {
            continue;
        }

        if (!SequenceUtil.basesEqual(barcodeBases[i], readBases[i])) {
            ++numMismatches;
            continue;
        }

        // bases are equal but if quality is low we still penalize.
        // TODO: is it actually useful to penalize barcodes in this way?
        if (readQualities != null && readQualities[i] < minimumBaseQuality) {
            ++numMismatches;
        }
    }
    return numMismatches;
}
 
Example 11
Source Project: cramtools   Source File: ReferenceRegionTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test_HangingEnd() {
	int start = 2;

	ReferenceRegion region = new ReferenceRegion(data, 0, "chr1", start);
	Assert.assertEquals(start, region.alignmentStart);
	Assert.assertEquals(0, region.arrayPosition(start));

	for (int pos = start, index = 0; pos < start + 10; pos++, index++) {
		Assert.assertEquals(data[index], region.base(pos));
	}

	String expectedMD5 = SequenceUtil.calculateMD5String(data, 0, data.length);
	String md5 = region.md5(start, data.length + 10);
	Assert.assertEquals(expectedMD5, md5);

	md5 = region.md5(start, data.length + 100);
	Assert.assertEquals(expectedMD5, md5);
}
 
Example 12
Source Project: gatk   Source File: PSFilter.java    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Returns input read with alignment-related info cleared
 */
private static GATKRead clearReadAlignment(final GATKRead read, final SAMFileHeader header) {
    final GATKRead newRead = new SAMRecordToGATKReadAdapter(new SAMRecord(header));
    newRead.setName(read.getName());
    newRead.setBases(read.getBases());
    newRead.setBaseQualities(read.getBaseQualities());
    if (read.isReverseStrand()) {
        SequenceUtil.reverseComplement(newRead.getBases());
        SequenceUtil.reverseQualities(newRead.getBaseQualities());
    }
    newRead.setIsUnmapped();
    newRead.setIsPaired(read.isPaired());
    if (read.isPaired()) {
        newRead.setMateIsUnmapped();
        if (read.isFirstOfPair()) {
            newRead.setIsFirstOfPair();
        } else if (read.isSecondOfPair()) {
            newRead.setIsSecondOfPair();
        }
    }
    final String readGroup = read.getReadGroup();
    if (readGroup != null) {
        newRead.setAttribute(SAMTag.RG.name(), readGroup);
    }
    return newRead;
}
 
Example 13
Source Project: cramtools   Source File: Read.java    License: Apache License 2.0 6 votes vote down vote up
void reset(EvidenceRecord evidenceRecord) {
	this.evidenceRecord = evidenceRecord;

	negative = "-".equals(evidenceRecord.Strand);

	baseBuf.clear();
	scoreBuf.clear();
	if (evidenceRecord.Strand.equals("+")) {
		baseBuf.put(evidenceRecord.Sequence.getBytes());
		scoreBuf.put(SAMUtils.fastqToPhred(evidenceRecord.Scores));
	} else {
		byte[] bytes = evidenceRecord.Sequence.getBytes();
		SequenceUtil.reverseComplement(bytes);
		baseBuf.put(bytes);
		bytes = SAMUtils.fastqToPhred(evidenceRecord.Scores);
		SequenceUtil.reverseQualities(bytes);
		scoreBuf.put(bytes);
	}
	baseBuf.flip();
	scoreBuf.flip();

	firstHalf.clear();
	secondHalf.clear();
}
 
Example 14
/**
 * Adds a read to this count object by increasing counts of the base qualities.
 */
Counts addRead(final GATKRead read) {
    final byte[] bases = read.getBases();
    final byte[] quals = read.getBaseQualities();
    final byte[] oq    = ReadUtils.getOriginalBaseQualities(read);

    final int length = quals.length;

    for (int i=0; i<length; ++i) {
        if (includeNoCalls || !SequenceUtil.isNoCall(bases[i])) {
            qCounts[quals[i]]++;
            if (oq != null) {
                oqCounts[oq[i]]++;
            }
        }
    }
    return this;
}
 
Example 15
Source Project: Drop-seq   Source File: AdapterDescriptor.java    License: MIT License 5 votes vote down vote up
@Override
public String getSequence(SAMRecord rec) {
    final String attribute = (String) rec.getAttribute(binaryTag);
    if (reverseComplement) {
        return SequenceUtil.reverseComplement(attribute);
    } else {
        return attribute;
    }
}
 
Example 16
Source Project: Drop-seq   Source File: TrimSequenceTemplate.java    License: MIT License 5 votes vote down vote up
public TrimSequenceTemplate(final String sequence, final String ignoredBases) {
	this.sequence = sequence;
	this.reverseComplement = SequenceUtil.reverseComplement(this.sequence);
	bases = StringUtil.stringToBytes(this.sequence);
	rcBases = StringUtil
			.stringToBytes(this.reverseComplement);
	this.ignoredBases = StringUtil
			.stringToBytes(ignoredBases);
}
 
Example 17
Source Project: Drop-seq   Source File: TrimSequenceTemplate.java    License: MIT License 5 votes vote down vote up
/**
 * Does the testString have part (or all) of the template in it?
 * The test string starting at position 1 may have some portion of the template.
 * For example, ANY bases of the template could have the 8 bases in the middle of a 30 base testString.
 * This is more flexible and slow than getPositionInTemplate, as it will let any portion of the template occur anywhere in the read.
 * @param testString A string to test against the template
 * @param minMatch The number of bases that must match in both
 * @param mismatchesAllowed How many mismatches can there be between the template and the read
 * @return The position in the read, 0 based.
 */
public int getPositionInRead (final String testString, final int minMatch, final int mismatchesAllowed) {
	byte [] read = StringUtil.stringToBytes(testString);
	// If the read's too short we can't possibly match it
       if (read == null || read.length < minMatch) return -1;

       int maxNumMatches=0;
       int bestReadStartPos=0;

       int lastViableReadIndex=read.length-minMatch+1;
       // Walk forwards
       READ_LOOP:  // walks through the read, looking for the template.
       for (int readIndex = 0; readIndex<lastViableReadIndex; readIndex++) {
           int mismatches = 0;
           int numMatches= 0;

           // can only search as far as you have left in the read.
           final int searchLength = Math.min(this.bases.length, read.length-readIndex);
           if (searchLength<minMatch) break;  // if you don't have enough search space left to match a min number of bases you give up.
           for (int templateIndex = 0; templateIndex < searchLength; templateIndex++) {
           	int tempReadIndex=templateIndex+readIndex;
           	char templateBase = (char)this.bases[templateIndex];
           	char readBase = (char) read[tempReadIndex];
               if (SequenceUtil.isNoCall(read[tempReadIndex]) || !SequenceUtil.basesEqual(this.bases[templateIndex], read[tempReadIndex])) {
               	if (++mismatches > mismatchesAllowed) continue READ_LOOP;
               } else
				numMatches++;
               if (numMatches>maxNumMatches) {
               	maxNumMatches=numMatches;
               	bestReadStartPos=readIndex;
               }
           }

       }
       if (maxNumMatches<minMatch) return (-1);
       return bestReadStartPos;
}
 
Example 18
Source Project: genomewarp   Source File: TransformationUnit.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns the target assembly variant representing this query variant.
 *
 * <p>This function performs the coordinate transformation and sequence complementing necessary
 * for variants on either strand. However, it does not perform any left-shifting or changing of
 * anchor bases for indels on the negative strand.
 *
 * @param range the homologous range linking the query and target assemblies
 * @param template the variant template to use in creation of the target variant
 * @param queryStartPosition the start position of the variant on the query assembly
 * @param targetPositiveStrandReferenceAllele the reference allele in the target variant if
 *        it is on the positive strand
 * @param targetPositiveStrandAlternateAlleles the list of alternate alleles in the target variant
 *        if it is on the positive strand
 * @return the variant of interest on the target assembly
 */
@VisibleForTesting
static Variant simpleCoordinateTransform(
    HomologousRange range,
    Variant template,
    long queryStartPosition,
    String targetPositiveStrandReferenceAllele,
    List<String> targetPositiveStrandAlternateAlleles) {
  long convertedQueryStartPosition = positionConversion(range, queryStartPosition);
  switch (range.getTargetStrand()) {
    case POSITIVE_STRAND:
      return Variant.newBuilder(template)
          .setReferenceName(range.getTargetRange().getReferenceName())
          .setStart(convertedQueryStartPosition)
          .setEnd(convertedQueryStartPosition + targetPositiveStrandReferenceAllele.length())
          .setReferenceBases(targetPositiveStrandReferenceAllele)
          .clearAlternateBases()
          .addAllAlternateBases(targetPositiveStrandAlternateAlleles)
          .build();
    case NEGATIVE_STRAND:
      ImmutableList.Builder<String> revcompAlternateAlleles = ImmutableList.builder();
      for (String allele : targetPositiveStrandAlternateAlleles) {
        revcompAlternateAlleles.add(SequenceUtil.reverseComplement(allele));
      }
      return Variant.newBuilder(template)
          .setReferenceName(range.getTargetRange().getReferenceName())
          // Note that start and end are swapped on the negative strand.
          .setStart(convertedQueryStartPosition - targetPositiveStrandReferenceAllele.length())
          .setEnd(convertedQueryStartPosition)
          .setReferenceBases(SequenceUtil.reverseComplement(targetPositiveStrandReferenceAllele))
          .clearAlternateBases()
          .addAllAlternateBases(revcompAlternateAlleles.build())
          .build();
    case UNKNOWN_TARGET_STRAND:
      // Fall through to default exception.
    default:
      throw new IllegalArgumentException("Cannot transform without known strand: " + range);
  }
}
 
Example 19
Source Project: genomewarp   Source File: GenomeWarpUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns true if the input string is a valid DNA string,
 * which we take to mean only containing the characters ACTGactg.
 *
 * @param inString the input string sequence to test
 * @return a boolean indicating whether the input is a DNA sequence
 */
public static boolean isValidDna(String inString) {
  final byte[] in = StringUtil.stringToBytes(inString);
  for (int i = 0; i < in.length; i++) {
    if (!SequenceUtil.isValidBase(in[i])) {
      return false;
    }
  }
  return true;
}
 
Example 20
Source Project: genomewarp   Source File: GenomeWarpSerial.java    License: Apache License 2.0 5 votes vote down vote up
public static RegionType getRegionType(HomologousRange range, Fasta queryFasta,
    Fasta targetFasta) {
  Range queryRange = range.getQueryRange();
  Range targetRange = range.getTargetRange();
  boolean isPositiveStrand = range.getTargetStrand() == TargetStrand.POSITIVE_STRAND;

  if ((queryRange.getEnd() - queryRange.getStart())
      != (targetRange.getEnd() - targetRange.getStart())) {
    return RegionType.ALIGNMENT_REQUIRED;
  }

  String querySeq = queryFasta.get(queryRange.getReferenceName(),
      queryRange.getStart(), queryRange.getEnd());

  String targetSeq = targetFasta.get(targetRange.getReferenceName(),
      targetRange.getStart(), targetRange.getEnd());

  if (Fasta.MISSING_CHROMOSOME.equals(targetSeq)) {
    return RegionType.UNKNOWN_REGION_TYPE;
  }

  if (!GenomeWarpUtils.isValidDna(querySeq) || !GenomeWarpUtils.isValidDna(targetSeq)) {
    // This type will be filtered out
    return RegionType.UNKNOWN_REGION_TYPE;
  }

  if ((isPositiveStrand && querySeq.equals(targetSeq))
      || (!isPositiveStrand && querySeq.equals(SequenceUtil.reverseComplement(targetSeq)))) {
    return RegionType.IDENTICAL;
  }

  return RegionType.MISMATCHED_BASES;
}
 
Example 21
Source Project: VarDictJava   Source File: Utils.java    License: MIT License 5 votes vote down vote up
/**
 * Method returns reverse complemented sequence for the part of the record. Can work with 3' and 5' ends
 * (if start index < 0, then it will found the index in the end of sequence by adding the length of record).
 * @param record read from SAM file to process
 * @param startIndex index where start the sequence
 * @param length length of pert of sequence
 * @return reverse complemented part of record
 */
public static String getReverseComplementedSequence(SAMRecord record, int startIndex, int length) {
    if (startIndex < 0) {
        startIndex = record.getReadLength() + startIndex;
    }
    byte[] rangeBytes = Arrays.copyOfRange(record.getReadBases(), startIndex, startIndex + length);
    SequenceUtil.reverseComplement(rangeBytes);
    return new String(rangeBytes);
}
 
Example 22
Source Project: VarDictJava   Source File: Utils.java    License: MIT License 5 votes vote down vote up
public static void complement(byte[] bases) {
    final int lastIndex = bases.length;

    for (int i = 0; i < lastIndex; i++) {
        bases[i] = SequenceUtil.complement(bases[i]);
    }
}
 
Example 23
Source Project: picard   Source File: CustomAdapterPair.java    License: MIT License 5 votes vote down vote up
CustomAdapterPair(final String fivePrime, final String threePrime) {
    this.threePrime = threePrime;
    this.threePrimeBytes = StringUtil.stringToBytes(threePrime);

    this.fivePrime = fivePrime;
    this.fivePrimeReadOrder = SequenceUtil.reverseComplement(fivePrime);
    this.fivePrimeBytes = StringUtil.stringToBytes(fivePrime);
    this.fivePrimeReadOrderBytes = StringUtil.stringToBytes(fivePrimeReadOrder);
}
 
Example 24
Source Project: picard   Source File: AbstractAlignmentMerger.java    License: MIT License 5 votes vote down vote up
/** Calculates and sets the NM, MD, and and UQ tags from the record and the reference
 *
 * @param record the record to be fixed
 * @param refSeqWalker a ReferenceSequenceWalker that will be used to traverse the reference
 * @param isBisulfiteSequence a flag indicating whether the sequence came from bisulfite-sequencing which would imply a different
 * calculation of the NM tag.
 *
 * No return value, modifies the provided record.
 */
public static void fixNmMdAndUq(final SAMRecord record, final ReferenceSequenceFileWalker refSeqWalker, final boolean isBisulfiteSequence) {
    final byte[] referenceBases = refSeqWalker.get(record.getReferenceIndex()).getBases();
    // only recalculate NM if it isn't bisulfite, since it needs to be treated specially below
    SequenceUtil.calculateMdAndNmTags(record, referenceBases, true, !isBisulfiteSequence);
    if (isBisulfiteSequence) {  // recalculate the NM tag for bisulfite data
        record.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(record, referenceBases, 0, isBisulfiteSequence));
    }
    fixUq(record, refSeqWalker, isBisulfiteSequence);
}
 
Example 25
Source Project: picard   Source File: AbstractAlignmentMerger.java    License: MIT License 5 votes vote down vote up
private static void moveClippedBasesToTag(final SAMRecord rec, final int clipFrom) {
    if (rec.getAttribute(HARD_CLIPPED_BASES_TAG) != null || rec.getAttribute(HARD_CLIPPED_BASE_QUALITIES_TAG) != null) {
        throw new PicardException("Record " + rec.getReadName() + " already contains tags for restoring hard-clipped bases.  This operation will permanently erase information if it proceeds.");
    }

    final byte[] bases = rec.getReadBases();
    final byte[] baseQualities = rec.getBaseQualities();
    final int readLength = rec.getReadLength();

    final int clipPositionFrom, clipPositionTo;
    if (rec.getReadNegativeStrandFlag()) {
        clipPositionFrom = 0;
        clipPositionTo = readLength - clipFrom + 1;
    } else {
        clipPositionFrom = clipFrom - 1;
        clipPositionTo = readLength;
    }

    String basesToKeepInTag = StringUtil.bytesToString(Arrays.copyOfRange(bases, clipPositionFrom, clipPositionTo));
    String qualitiesToKeepInTag = SAMUtils.phredToFastq(Arrays.copyOfRange(baseQualities, clipPositionFrom, clipPositionTo));

    if (rec.getReadNegativeStrandFlag()) {
        // Ensures that the qualities and bases in the tags are stored in their original order, as produced by the sequencer
        basesToKeepInTag = SequenceUtil.reverseComplement(basesToKeepInTag);
        qualitiesToKeepInTag =  new StringBuilder(qualitiesToKeepInTag).reverse().toString();
    }
    rec.setAttribute(HARD_CLIPPED_BASES_TAG, basesToKeepInTag);
    rec.setAttribute(HARD_CLIPPED_BASE_QUALITIES_TAG, qualitiesToKeepInTag);
}
 
Example 26
Source Project: picard   Source File: FastqToSam.java    License: MIT License 5 votes vote down vote up
/** Creates a simple SAM file from a single fastq file. */
protected int doUnpaired(final FastqReader freader, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for ( ; freader.hasNext()  ; readCount++) {
        final FastqRecord frec = freader.next();
        final SAMRecord srec = createSamRecord(writer.getFileHeader(), SequenceUtil.getSamReadNameFromFastqHeader(frec.getReadHeader()) , frec, false) ;
        srec.setReadPairedFlag(false);
        writer.addAlignment(srec);
        progress.record(srec);
    }

    return readCount;
}
 
Example 27
Source Project: picard   Source File: FastqToSam.java    License: MIT License 5 votes vote down vote up
/** More complicated method that takes two fastq files and builds pairing information in the SAM. */
protected int doPaired(final FastqReader freader1, final FastqReader freader2, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for ( ; freader1.hasNext() && freader2.hasNext() ; readCount++) {
        final FastqRecord frec1 = freader1.next();
        final FastqRecord frec2 = freader2.next();

        final String frec1Name = SequenceUtil.getSamReadNameFromFastqHeader(frec1.getReadHeader());
        final String frec2Name = SequenceUtil.getSamReadNameFromFastqHeader(frec2.getReadHeader());
        final String baseName = getBaseName(frec1Name, frec2Name, freader1, freader2);

        final SAMRecord srec1 = createSamRecord(writer.getFileHeader(), baseName, frec1, true) ;
        srec1.setFirstOfPairFlag(true);
        srec1.setSecondOfPairFlag(false);
        writer.addAlignment(srec1);
        progress.record(srec1);

        final SAMRecord srec2 = createSamRecord(writer.getFileHeader(), baseName, frec2, true) ;
        srec2.setFirstOfPairFlag(false);
        srec2.setSecondOfPairFlag(true);
        writer.addAlignment(srec2);
        progress.record(srec2);
    }

    if (freader1.hasNext() || freader2.hasNext()) {
        throw new PicardException("Input paired fastq files must be the same length");
    }

    return readCount;
}
 
Example 28
Source Project: picard   Source File: ReadBaseStratification.java    License: MIT License 5 votes vote down vote up
@Override
public Double stratify(final SAMRecord sam) {

    try {
        return gcCache.get(sam, () -> (double) Math.round(100 * SequenceUtil.calculateGc(sam.getReadBases())) / 100D);
    } catch (final ExecutionException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
}
 
Example 29
Source Project: picard   Source File: ReadBaseStratification.java    License: MIT License 5 votes vote down vote up
private static Integer stratifyHomopolymerLength(final RecordAndOffset recordAndOffset, final SAMLocusAndReference locusInfo) {

        final ReadDirection direction = ReadDirection.of(recordAndOffset.getRecord());
        final byte readBases[] = recordAndOffset.getRecord().getReadBases();
        if (SequenceUtil.isNoCall(locusInfo.getReferenceBase())) {
            return null;
        }
        int runLengthOffset = recordAndOffset.getOffset();

        if (runLengthOffset < 0 || runLengthOffset >= recordAndOffset.getRecord().getReadLength()) {
            return null;
        }

        if (direction == ReadDirection.POSITIVE) {
            while (--runLengthOffset >= 0) {
                if (readBases[runLengthOffset] != readBases[recordAndOffset.getOffset() - 1]) {
                    break;
                }
            }
            return recordAndOffset.getOffset() - runLengthOffset - 1;
        } else {
            while (++runLengthOffset < recordAndOffset.getRecord().getReadLength()) {
                if (readBases[runLengthOffset] != readBases[recordAndOffset.getOffset() + 1]) {
                    break;
                }
            }
            return runLengthOffset - recordAndOffset.getOffset() - 1;
        }
    }
 
Example 30
Source Project: picard   Source File: ReadBaseStratification.java    License: MIT License 5 votes vote down vote up
private static Character stratifyReferenceBase(final RecordAndOffset recordAndOffset,
                                               final SAMLocusAndReference locusInfo) {
    final ReadDirection direction = ReadDirection.of(recordAndOffset.getRecord());

    if (SequenceUtil.isNoCall(locusInfo.getReferenceBase())) {
        return null;
    }

    return stratifySequenceBase(locusInfo.getReferenceBase(), direction == ReadDirection.NEGATIVE);
}