Java Code Examples for htsjdk.samtools.util.StringUtil#stringToBytes()

The following examples show how to use htsjdk.samtools.util.StringUtil#stringToBytes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TrimSequenceTemplate.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Test to see if this read matches this barcode. If any base of a barcode
 * starts with N or n, then ignore that position.
 *
 * @param testString
 *            The read to look for this barcode in. The barcode should be at
 *            the start of the read for this method.  The entire barcode is expected for a match.
 * @return true if this barcode is found in the read.
 */
public boolean hasForwardMatch(final String testString) {
	byte[] testBases = StringUtil.stringToBytes(testString);
	int numBasesCanMatch = 0;
	int numBasesMatch = 0;
	for (int i = 0; i < bases.length; i++) {
		if (isIgnoreBase(this.bases[i]))
			continue;
		numBasesCanMatch++;
		if (SequenceUtil.basesEqual(testBases[i], bases[i]))
			numBasesMatch++;
	}
	if (numBasesCanMatch == numBasesMatch)
		return (true);
	return false;
}
 
Example 2
Source File: CollapseTagWithContextTest.java    From Drop-seq with MIT License 6 votes vote down vote up
private final String alterBaseString(final String baseString, final int numChanges) {
     final byte[] bases = StringUtil.stringToBytes(baseString);
     if (numChanges > baseString.length())
throw new IllegalArgumentException("Too many changes requested");
     final Set<Integer> mutatedPositions = new HashSet<>();
     int changesSoFar = 0;
     while (changesSoFar < numChanges) {
         int positionToChange = random.nextInt(bases.length);
         while (mutatedPositions.contains(positionToChange))
	positionToChange = random.nextInt(bases.length);
         mutatedPositions.add(positionToChange);
         bases[positionToChange] = alterBase(bases[positionToChange]);
         ++changesSoFar;
     }
     return StringUtil.bytesToString(bases);
 }
 
Example 3
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
private SAMRecord createSamRecord(final SAMFileHeader header, final String baseName, final FastqRecord frec, final boolean paired) {
    final SAMRecord srec = new SAMRecord(header);
    srec.setReadName(baseName);
    srec.setReadString(frec.getReadString());
    srec.setReadUnmappedFlag(true);
    srec.setAttribute(ReservedTagConstants.READ_GROUP_ID, READ_GROUP_NAME);
    final byte[] quals = StringUtil.stringToBytes(frec.getBaseQualityString());
    convertQuality(quals, QUALITY_FORMAT);
    for (final byte qual : quals) {
        final int uQual = qual & 0xff;
        if (uQual < MIN_Q || uQual > MAX_Q) {
            throw new PicardException("Base quality " + uQual + " is not in the range " + MIN_Q + ".." +
            MAX_Q + " for read " + frec.getReadHeader());
        }
    }
    srec.setBaseQualities(quals);

    if (paired) {
        srec.setReadPairedFlag(true);
        srec.setMateUnmappedFlag(true);
    }
    return srec ;
}
 
Example 4
Source File: PolyAWithAdapterFinder.java    From Drop-seq with MIT License 5 votes vote down vote up
public PolyARun getPolyAStart(final String readString, final String adapterSequence) {
    final byte[] readBases = StringUtil.stringToBytes(readString);
    int adapterClipPosition = ClippingUtility.findIndexOfClipSequence(
            readBases,
            StringUtil.stringToBytes(adapterSequence),
            minAdapterMatch,
            maxAdapterErrorRate);
    if (adapterClipPosition == ClippingUtility.NO_MATCH) {
        adapterClipPosition = readString.length();
    } else if (adapterClipPosition == 0) {
        return new PolyARun(0, 0, 0);
    }
    final SimplePolyAFinder.PolyARun ret = getPolyARun(readString, adapterClipPosition);

    // If there was a short adapter match, but not enough poly A before it,
    // see if there would be enough poly A if the adapter considered not to match.
    if (ret.isNoMatch() && adapterClipPosition < readString.length() &&
            adapterClipPosition + dubiousAdapterMatchLength >= readString.length()) {
        // If did not find enough polyA looking before adapter, try again looking from end of read.
        final SimplePolyAFinder.PolyARun tryWithoutAdapter = getPolyARun(readString, readString.length());
        if (!tryWithoutAdapter.isNoMatch()) {
            return tryWithoutAdapter;
        }
    }
    return ret;

}
 
Example 5
Source File: TrimSequenceTemplate.java    From Drop-seq with MIT License 5 votes vote down vote up
public TrimSequenceTemplate(final String sequence, final String ignoredBases) {
	this.sequence = sequence;
	this.reverseComplement = SequenceUtil.reverseComplement(this.sequence);
	bases = StringUtil.stringToBytes(this.sequence);
	rcBases = StringUtil
			.stringToBytes(this.reverseComplement);
	this.ignoredBases = StringUtil
			.stringToBytes(ignoredBases);
}
 
Example 6
Source File: TrimSequenceTemplate.java    From Drop-seq with MIT License 5 votes vote down vote up
public TrimSequenceTemplate(final String barcode) {
	this.sequence = barcode;
	this.reverseComplement = SequenceUtil.reverseComplement(this.sequence);
	bases = StringUtil.stringToBytes(this.sequence);
	rcBases = StringUtil.stringToBytes(this.reverseComplement);
	this.ignoredBases = StringUtil.stringToBytes("Nn");
}
 
Example 7
Source File: TrimSequenceTemplate.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * Does the testString have part (or all) of the template in it?
 * The test string starting at position 1 may have some portion of the template.
 * For example, ANY bases of the template could have the 8 bases in the middle of a 30 base testString.
 * This is more flexible and slow than getPositionInTemplate, as it will let any portion of the template occur anywhere in the read.
 * @param testString A string to test against the template
 * @param minMatch The number of bases that must match in both
 * @param mismatchesAllowed How many mismatches can there be between the template and the read
 * @return The position in the read, 0 based.
 */
public int getPositionInRead (final String testString, final int minMatch, final int mismatchesAllowed) {
	byte [] read = StringUtil.stringToBytes(testString);
	// If the read's too short we can't possibly match it
       if (read == null || read.length < minMatch) return -1;

       int maxNumMatches=0;
       int bestReadStartPos=0;

       int lastViableReadIndex=read.length-minMatch+1;
       // Walk forwards
       READ_LOOP:  // walks through the read, looking for the template.
       for (int readIndex = 0; readIndex<lastViableReadIndex; readIndex++) {
           int mismatches = 0;
           int numMatches= 0;

           // can only search as far as you have left in the read.
           final int searchLength = Math.min(this.bases.length, read.length-readIndex);
           if (searchLength<minMatch) break;  // if you don't have enough search space left to match a min number of bases you give up.
           for (int templateIndex = 0; templateIndex < searchLength; templateIndex++) {
           	int tempReadIndex=templateIndex+readIndex;
           	char templateBase = (char)this.bases[templateIndex];
           	char readBase = (char) read[tempReadIndex];
               if (SequenceUtil.isNoCall(read[tempReadIndex]) || !SequenceUtil.basesEqual(this.bases[templateIndex], read[tempReadIndex])) {
               	if (++mismatches > mismatchesAllowed) continue READ_LOOP;
               } else
				numMatches++;
               if (numMatches>maxNumMatches) {
               	maxNumMatches=numMatches;
               	bestReadStartPos=readIndex;
               }
           }

       }
       if (maxNumMatches<minMatch) return (-1);
       return bestReadStartPos;
}
 
Example 8
Source File: GenomeWarpUtils.java    From genomewarp with Apache License 2.0 5 votes vote down vote up
/**
 * Returns true if the input string is a valid DNA string,
 * which we take to mean only containing the characters ACTGactg.
 *
 * @param inString the input string sequence to test
 * @return a boolean indicating whether the input is a DNA sequence
 */
public static boolean isValidDna(String inString) {
  final byte[] in = StringUtil.stringToBytes(inString);
  for (int i = 0; i < in.length; i++) {
    if (!SequenceUtil.isValidBase(in[i])) {
      return false;
    }
  }
  return true;
}
 
Example 9
Source File: CustomAdapterPair.java    From picard with MIT License 5 votes vote down vote up
CustomAdapterPair(final String fivePrime, final String threePrime) {
    this.threePrime = threePrime;
    this.threePrimeBytes = StringUtil.stringToBytes(threePrime);

    this.fivePrime = fivePrime;
    this.fivePrimeReadOrder = SequenceUtil.reverseComplement(fivePrime);
    this.fivePrimeBytes = StringUtil.stringToBytes(fivePrime);
    this.fivePrimeReadOrderBytes = StringUtil.stringToBytes(fivePrimeReadOrder);
}
 
Example 10
Source File: AdapterMarker.java    From picard with MIT License 5 votes vote down vote up
/**
 * Truncate to the given length, and in addition truncate any trailing Ns.
 */
private String substringAndRemoveTrailingNs(final String s, int length) {
    length = Math.min(length, s.length());
    final byte[] bytes = StringUtil.stringToBytes(s);
    while (length > 0 && SequenceUtil.isNoCall(bytes[length - 1])) {
        length--;
    }
    return s.substring(0, length);
}
 
Example 11
Source File: AdapterMarker.java    From picard with MIT License 5 votes vote down vote up
private TruncatedAdapterPair(final String name, final String threePrimeReadOrder, final String fivePrimeReadOrder) {
    this.name = name;
    this.threePrime = threePrimeReadOrder;
    this.threePrimeBytes = StringUtil.stringToBytes(threePrimeReadOrder);
    this.fivePrimeReadOrder = fivePrimeReadOrder;
    this.fivePrimeReadOrderBytes = StringUtil.stringToBytes(fivePrimeReadOrder);
    this.fivePrime = SequenceUtil.reverseComplement(fivePrimeReadOrder);
    this.fivePrimeBytes = StringUtil.stringToBytes(this.fivePrime);
}
 
Example 12
Source File: IlluminaUtil.java    From picard with MIT License 5 votes vote down vote up
private IlluminaAdapterPair(final String fivePrime, final String threePrime) {
    this.threePrime = threePrime;
    this.threePrimeBytes = StringUtil.stringToBytes(threePrime);

    this.fivePrime = fivePrime;
    this.fivePrimeReadOrder = SequenceUtil.reverseComplement(fivePrime);
    this.fivePrimeBytes = StringUtil.stringToBytes(fivePrime);
    this.fivePrimeReadOrderBytes = StringUtil.stringToBytes(fivePrimeReadOrder);
}
 
Example 13
Source File: ClippingUtilityTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider="clipTestData")
public void testBasicClip(final String testName, final String read, final String clip, final int minMatch, final double errRate, final int expected) {
    final byte[] r = (read == null) ? null : StringUtil.stringToBytes(read);
    final byte[] c = (clip == null) ? null : StringUtil.stringToBytes(clip);

    final int result = ClippingUtility.findIndexOfClipSequence(r, c, minMatch, errRate);
    Assert.assertEquals(result, expected, testName);

}
 
Example 14
Source File: TrimSequenceTemplate.java    From Drop-seq with MIT License 4 votes vote down vote up
/**
 * If a barcode has ignore bases, then expand those bases to A/C/G/T.
 * Otherwise, return the barcode. This is recursive, so multiple ignored
 * bases will be expanded.
 *
 * @return
 */
public static Collection<TrimSequenceTemplate> expandBarcode(
		final TrimSequenceTemplate b, final byte[] ignoredBases) {
	Collection<TrimSequenceTemplate> result = new ArrayList<>();
	result.add(b);
	byte[] bases = StringUtil.stringToBytes(b.getSequence());
	for (int i = 0; i < bases.length; i++) {
		boolean ignoreBaseFound = baseInBaseList(bases[i], ignoredBases);
		if (ignoreBaseFound) {
			result.remove(b);
			bases[i] = A;
			TrimSequenceTemplate newBC = new TrimSequenceTemplate(
					StringUtil.bytesToString(bases),
					StringUtil.bytesToString(ignoredBases));
			Collection<TrimSequenceTemplate> r = expandBarcode(newBC,
					ignoredBases);
			result.addAll(r);

			bases[i] = C;
			newBC = new TrimSequenceTemplate(
					StringUtil.bytesToString(bases),
					StringUtil.bytesToString(ignoredBases));
			r = expandBarcode(newBC, ignoredBases);
			result.addAll(r);

			bases[i] = G;
			newBC = new TrimSequenceTemplate(
					StringUtil.bytesToString(bases),
					StringUtil.bytesToString(ignoredBases));
			r = expandBarcode(newBC, ignoredBases);
			result.addAll(r);

			bases[i] = T;
			newBC = new TrimSequenceTemplate(
					StringUtil.bytesToString(bases),
					StringUtil.bytesToString(ignoredBases));
			r = expandBarcode(newBC, ignoredBases);
			result.addAll(r);
			break; // stop looping

		}

	}
	return (result);
}
 
Example 15
Source File: ClippingUtilityTest.java    From picard with MIT License 4 votes vote down vote up
@Override
public byte[] get3PrimeAdapterBytes() {
    return StringUtil.stringToBytes(threePrimeAdapter);
}
 
Example 16
Source File: IlluminaUtil.java    From picard with MIT License 2 votes vote down vote up
/**
 * Convert from Solexa-scaled ASCII qualities to Phred-scaled binary.  The only difference is Solexa qualities have
 * 64 added to the phred binary to make them printable.
 *
 * @param solexaQualities Printable ASCII qualities.
 * @param offset Character at which to start conversion.
 * @param length Number of characters to convert.
 * @return binary Phred-scaled qualities.
 */
public static byte[] makePhredBinaryFromSolexaQualityAscii_1_3(final String solexaQualities, final int offset, final int length) {
    final byte[] quals = StringUtil.stringToBytes(solexaQualities, offset, length);
    SolexaQualityConverter.getSingleton().convertSolexa_1_3_QualityCharsToPhredBinary(quals);
    return quals;
}