Java Code Examples for htsjdk.samtools.util.StringUtil#toUpperCase()

The following examples show how to use htsjdk.samtools.util.StringUtil#toUpperCase() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GcBiasUtils.java    From picard with MIT License 6 votes vote down vote up
public static int[] calculateRefWindowsByGc(final int windows, final File referenceSequence, final int windowSize) {
    final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(referenceSequence);
    ReferenceSequence ref;

    final int [] windowsByGc = new int [windows];

    while ((ref = refFile.nextSequence()) != null) {
        final byte[] refBases = ref.getBases();
        StringUtil.toUpperCase(refBases);
        final int refLength = refBases.length;
        final int lastWindowStart = refLength - windowSize;

        final CalculateGcState state = new GcBiasUtils().new CalculateGcState();

        for (int i = 1; i < lastWindowStart; ++i) {
            final int windowEnd = i + windowSize;
            final int gcBin = calculateGc(refBases, i, windowEnd, state);
            if (gcBin != -1) windowsByGc[gcBin]++;
        }
    }

    return windowsByGc;
}
 
Example 2
Source File: SequenceDictionaryUtils.java    From picard with MIT License 6 votes vote down vote up
/**
 * Create one SAMSequenceRecord from a single fasta sequence
 */
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
    final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());

    // Compute MD5 of upcased bases
    final byte[] bases = refSeq.getBases();
    for (int i = 0; i < bases.length; ++i) {
        bases[i] = StringUtil.toUpperCase(bases[i]);
    }

    ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
    if (genomeAssembly != null) {
        ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, genomeAssembly);
    }
    ret.setAttribute(SAMSequenceRecord.URI_TAG, uri);
    if (species != null) {
        ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, species);
    }
    return ret;
}
 
Example 3
Source File: CreateSequenceDictionary.java    From varsim with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Create one SAMSequenceRecord from a single fasta sequence
 */
private SAMSequenceRecord makeSequenceRecord(final ReferenceSequence refSeq) {
  final SAMSequenceRecord ret = new SAMSequenceRecord(refSeq.getName(), refSeq.length());

  // Compute MD5 of upcased bases
  final byte[] bases = refSeq.getBases();
  for (int i = 0; i < bases.length; ++i) {
    bases[i] = StringUtil.toUpperCase(bases[i]);
  }

  ret.setAttribute(SAMSequenceRecord.MD5_TAG, md5Hash(bases));
  if (GENOME_ASSEMBLY != null) {
    ret.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, GENOME_ASSEMBLY);
  }
  ret.setAttribute(SAMSequenceRecord.URI_TAG, URI);
  if (SPECIES != null) {
    ret.setAttribute(SAMSequenceRecord.SPECIES_TAG, SPECIES);
  }
  return ret;
}
 
Example 4
Source File: Snp.java    From picard with MIT License 5 votes vote down vote up
public Snp(final String name, final String chrom, final int pos, final byte allele1, final byte allele2,
           final double maf, final List<String> fingerprintPanels) {
    this.name = name;
    this.chrom = chrom;
    this.pos = pos;
    this.allele1 = StringUtil.toUpperCase(allele1);
    this.allele2 = StringUtil.toUpperCase(allele2);
    this.maf = maf;
    this.fingerprintPanels = fingerprintPanels == null ? new ArrayList<String>() : fingerprintPanels;

    // Construct the genotypes for ease of comparison
    this.genotypes[0] = DiploidGenotype.fromBases(allele1, allele1);
    this.genotypes[1] = DiploidGenotype.fromBases(allele1, allele2);
    this.genotypes[2] = DiploidGenotype.fromBases(allele2, allele2);
}
 
Example 5
Source File: GcBiasMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
@Override
public void acceptRecord(final GcBiasCollectorArgs args) {
    final SAMRecord rec = args.getRec();
    if (logCounter < 100 && rec.getReadBases().length == 0) {
        log.warn("Omitting read " + rec.getReadName() + " with '*' in SEQ field.");
        if (++logCounter == 100) {
            log.warn("There are more than 100 reads with '*' in SEQ field in file.");
        }
        return;
    }
    if (!rec.getReadUnmappedFlag()) {
        if (referenceIndex != rec.getReferenceIndex() || gc == null) {
            final ReferenceSequence ref = args.getRef();
            refBases = ref.getBases();
            StringUtil.toUpperCase(refBases);
            final int refLength = refBases.length;
            final int lastWindowStart = refLength - scanWindowSize;
            gc = GcBiasUtils.calculateAllGcs(refBases, lastWindowStart, scanWindowSize);
            referenceIndex = rec.getReferenceIndex();
        }

        addReadToGcData(rec, this.gcData);
        if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
            addReadToGcData(rec, this.gcDataNonDups);
        }
    } else {
        updateTotalClusters(rec, this.gcData);
        if (ignoreDuplicates && !rec.getDuplicateReadFlag()) {
            updateTotalClusters(rec, this.gcDataNonDups);
        }
    }
}
 
Example 6
Source File: FingerprintUtils.java    From picard with MIT License 4 votes vote down vote up
private static VariantContext getVariantContext(final ReferenceSequenceFile reference,
                                                final String sample,
                                                final HaplotypeProbabilities haplotypeProbabilities) {
    final Snp snp = haplotypeProbabilities.getRepresentativeSnp();
    final byte refAllele = StringUtil.toUpperCase(reference.getSubsequenceAt(
            snp.getChrom(),
            snp.getPos(),
            snp.getPos()).getBases()[0]);

    if (snp.getAllele1() != refAllele && snp.getAllele2() != refAllele) {
        throw new PicardException("Don't know how to deal with missing reference allele in fingerprinting map");
    }

    final Allele alleleRef;
    final Allele alleleAlt;
    final int obsRef, obsAlt;
    final boolean swap12 = snp.getAllele2() == refAllele;

    if (swap12) {
        alleleRef = Allele.create(snp.getAllele2(), true);
        alleleAlt = Allele.create(snp.getAllele1(), false);
        obsRef = haplotypeProbabilities.getObsAllele2();
        obsAlt = haplotypeProbabilities.getObsAllele1();
    } else {
        alleleRef = Allele.create(snp.getAllele1(), true);
        alleleAlt = Allele.create(snp.getAllele2(), false);
        obsRef = haplotypeProbabilities.getObsAllele1();
        obsAlt = haplotypeProbabilities.getObsAllele2();
    }

    final double[] origPLs = haplotypeProbabilities.getLogLikelihoods();
    final double[] PLs =  Arrays.copyOf(origPLs,origPLs.length);
    if (swap12) {
        ArrayUtils.reverse(PLs);
    }
    final List<Allele> alleles = Arrays.asList(alleleRef, alleleAlt);

    final Genotype gt = new GenotypeBuilder()
            .DP(haplotypeProbabilities.getTotalObs())
            .noAttributes()
            .PL(PLs)
            .AD(new int[]{obsAlt, obsRef})
            .name(sample)
            .make();
    try {
        return new VariantContextBuilder(
                snp.getName(),
                snp.getChrom(),
                snp.getPos(),
                snp.getPos(),
                alleles)
                .log10PError(VariantContext.NO_LOG10_PERROR)
                .genotypes(gt)
                .unfiltered().make();
    } catch (IllegalArgumentException e) {
        throw new IllegalArgumentException(String.format("Trouble creating variant at %s-%d", snp.getChrom(), snp.getPos()), e);
    }
}
 
Example 7
Source File: NonNFastaSize.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    // set up the reference and a mask so that we only count the positions requested by the user
    final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT);
    final ReferenceSequenceMask referenceSequenceMask;
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
        final IntervalList intervalList = IntervalList.fromFile(INTERVALS);
        referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList);
    } else {
        final SAMFileHeader header = new SAMFileHeader();
        header.setSequenceDictionary(ref.getSequenceDictionary());
        referenceSequenceMask = new WholeGenomeReferenceSequenceMask(header);
    }

    long nonNbases = 0L;

    for (final SAMSequenceRecord rec : ref.getSequenceDictionary().getSequences()) {
        // pull out the contig and set up the bases
        final ReferenceSequence sequence = ref.getSequence(rec.getSequenceName());
        final byte[] bases = sequence.getBases();
        StringUtil.toUpperCase(bases);

        for (int i = 0; i < bases.length; i++) {
            // only investigate this position if it's within our mask
            if (referenceSequenceMask.get(sequence.getContigIndex(), i+1)) {
                nonNbases += bases[i] == SequenceUtil.N ? 0 : 1;
            }
        }
    }

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        out.write(nonNbases + "\n");
        out.close();
    }
    catch (IOException ioe) {
        throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
    }

    return 0;
}