htsjdk.samtools.SAMUtils Java Examples

The following examples show how to use htsjdk.samtools.SAMUtils. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BAQ.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void initializeCachedData() {
    for ( int i = 0; i < 256; i++ )
        for ( int j = 0; j < 256; j++ )
            for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
                EPSILONS[i][j][q] = 1.0;
            }

    for ( char b1 : "ACGTacgt".toCharArray() ) {
        for ( char b2 : "ACGTacgt".toCharArray() ) {
            for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
                double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
                double e = Character.toLowerCase(b1) == Character.toLowerCase(b2) ? 1 - qual : qual * EM;
                EPSILONS[(byte)b1][(byte)b2][q] = e;
            }
        }
    }
}
 
Example #2
Source File: Read.java    From cramtools with Apache License 2.0 6 votes vote down vote up
SAMRecord secondSAMRecord(SAMFileHeader header) {
	SAMRecord r = new SAMRecord(header);
	r.setReadName(evidenceRecord.getReadName());
	r.setReferenceName(evidenceRecord.Chromosome);
	r.setAlignmentStart(Integer.valueOf(evidenceRecord.MateOffsetInReference) + 1);
	r.setMappingQuality(Integer.valueOf(evidenceRecord.ScoreAllele0));
	r.setReadPairedFlag(true);
	r.setReadUnmappedFlag(false);
	r.setReadNegativeStrandFlag(negative);
	r.setFirstOfPairFlag(evidenceRecord.side == 1);
	r.setSecondOfPairFlag(!r.getFirstOfPairFlag());

	r.setCigar(new Cigar(Utils.toCigarOperatorList(secondHalf.samCigarElements)));

	r.setReadBases(Utils.toByteArray(secondHalf.readBasesBuf));
	r.setBaseQualities(Utils.toByteArray(secondHalf.readScoresBuf));

	r.setAttribute("GC", Utils.toString(secondHalf.gcList));
	r.setAttribute("GS", Utils.toString(secondHalf.gsBuf));
	r.setAttribute("GQ", SAMUtils.phredToFastq(Utils.toByteArray(secondHalf.gqBuf)));

	return r;
}
 
Example #3
Source File: Read.java    From cramtools with Apache License 2.0 6 votes vote down vote up
SAMRecord firstSAMRecord(SAMFileHeader header) {
	SAMRecord r = new SAMRecord(header);
	r.setReadName(evidenceRecord.getReadName());
	r.setReferenceName(evidenceRecord.Chromosome);
	r.setAlignmentStart(Integer.valueOf(evidenceRecord.OffsetInReference) + 1);
	r.setMappingQuality(Integer.valueOf(evidenceRecord.ScoreAllele0));
	r.setReadPairedFlag(true);
	r.setReadUnmappedFlag(false);
	r.setReadNegativeStrandFlag(negative);
	r.setFirstOfPairFlag(evidenceRecord.side == 0);
	r.setSecondOfPairFlag(!r.getFirstOfPairFlag());

	r.setCigar(new Cigar(Utils.toCigarOperatorList(firstHalf.samCigarElements)));

	r.setReadBases(Utils.toByteArray(firstHalf.readBasesBuf));
	r.setBaseQualities(Utils.toByteArray(firstHalf.readScoresBuf));

	r.setAttribute("GC", Utils.toString(firstHalf.gcList));
	r.setAttribute("GS", Utils.toString(firstHalf.gsBuf));
	r.setAttribute("GQ", SAMUtils.phredToFastq(Utils.toByteArray(firstHalf.gqBuf)));

	return r;
}
 
Example #4
Source File: Read.java    From cramtools with Apache License 2.0 6 votes vote down vote up
void reset(EvidenceRecord evidenceRecord) {
	this.evidenceRecord = evidenceRecord;

	negative = "-".equals(evidenceRecord.Strand);

	baseBuf.clear();
	scoreBuf.clear();
	if (evidenceRecord.Strand.equals("+")) {
		baseBuf.put(evidenceRecord.Sequence.getBytes());
		scoreBuf.put(SAMUtils.fastqToPhred(evidenceRecord.Scores));
	} else {
		byte[] bytes = evidenceRecord.Sequence.getBytes();
		SequenceUtil.reverseComplement(bytes);
		baseBuf.put(bytes);
		bytes = SAMUtils.fastqToPhred(evidenceRecord.Scores);
		SequenceUtil.reverseQualities(bytes);
		scoreBuf.put(bytes);
	}
	baseBuf.flip();
	scoreBuf.flip();

	firstHalf.clear();
	secondHalf.clear();
}
 
Example #5
Source File: MeanQualityHistogramGeneratorUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testUsingOriginalQualities() throws Exception {
    final MeanQualityByCycleSpark.HistogramGenerator hg = new MeanQualityByCycleSpark.HistogramGenerator(true);
    Assert.assertEquals(hg.useOriginalQualities, true);

    GATKRead read1 = ArtificialReadUtils.createArtificialRead("aa".getBytes(), new byte[]{50, 50}, "2M");
    hg.addRead(read1);
    Assert.assertEquals(hg.firstReadCountsByCycle, new long[0]);
    assertEqualsDoubleArray(hg.firstReadTotalsByCycle, new double[0], 1e-05);
    Assert.assertEquals(hg.secondReadCountsByCycle, new long[0]);
    assertEqualsDoubleArray(hg.secondReadTotalsByCycle, new double[0], 1e-05);

    GATKRead read2 = ArtificialReadUtils.createArtificialRead("aa".getBytes(), new byte[]{50, 50}, "2M");
    read2.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(new byte[]{30, 40}));
    hg.addRead(read2);

    Assert.assertEquals(hg.firstReadCountsByCycle, new long[]{0, 1, 1});
    assertEqualsDoubleArray(hg.firstReadTotalsByCycle, new double[]{0, 30, 40}, 1e-05);
    Assert.assertEquals(hg.secondReadCountsByCycle, new long[]{0, 0, 0});
    assertEqualsDoubleArray(hg.secondReadTotalsByCycle, new double[]{0, 0, 0}, 1e-05);
}
 
Example #6
Source File: MultiHitAlignedReadIterator.java    From picard with MIT License 6 votes vote down vote up
/**
 *
 * @param querynameOrderIterator
 * @param primaryAlignmentSelectionStrategy Algorithm for selecting primary alignment when it is not clear from
 *                                          the input what should be primary.
 */
MultiHitAlignedReadIterator(final CloseableIterator<SAMRecord> querynameOrderIterator,
                            final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy) {
    this.primaryAlignmentSelectionStrategy = primaryAlignmentSelectionStrategy;
    peekIterator = new PeekableIterator<SAMRecord>(new FilteringSamIterator(querynameOrderIterator,
            new SamRecordFilter() {
                // Filter unmapped reads.
                public boolean filterOut(final SAMRecord record) {
                    return record.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(record.getCigar());
                }
                public boolean filterOut(final SAMRecord first, final SAMRecord second) {
                    return ((first.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(first.getCigar()))
                            && (second.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(second.getCigar())));
                }
            }));


    advance();
}
 
Example #7
Source File: UmiUtil.java    From picard with MIT License 6 votes vote down vote up
/**
 * Determines if the read represented by a SAM record belongs to the top or bottom strand
 * or if it cannot determine strand position due to one of the reads being unmapped.
 * Top strand is defined as having a read 1 unclipped 5' coordinate
 * less than the read 2 unclipped 5' coordinate.  If a read is unmapped
 * we do not attempt to determine the strand to which the read or its mate belongs.
 * If the mate belongs to a different contig from the read, then the reference
 * index for the read and its mate is used in leu of the unclipped 5' coordinate.
 * @param rec Record to determine top or bottom strand
 * @return Top or bottom strand, unknown if it cannot be determined.
 */
static ReadStrand getStrand(final SAMRecord rec) {
    if (rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag()) {
        return ReadStrand.UNKNOWN;
    }

    // If the read pair are aligned to different contigs we use
    // the reference index to determine relative 5' coordinate ordering.
    // Both the read and its mate should not have their unmapped flag set to true.
    if (!rec.getReferenceIndex().equals(rec.getMateReferenceIndex())) {
        if (rec.getFirstOfPairFlag() == (rec.getReferenceIndex() < rec.getMateReferenceIndex())) {
            return ReadStrand.TOP;
        } else {
            return ReadStrand.BOTTOM;
        }
    }

    final int read5PrimeStart = (rec.getReadNegativeStrandFlag()) ? rec.getUnclippedEnd() : rec.getUnclippedStart();
    final int mate5PrimeStart = (rec.getMateNegativeStrandFlag()) ? SAMUtils.getMateUnclippedEnd(rec) : SAMUtils.getMateUnclippedStart(rec);

    if (rec.getFirstOfPairFlag() == (read5PrimeStart <= mate5PrimeStart)) {
        return ReadStrand.TOP;
    } else {
        return ReadStrand.BOTTOM;
    }
}
 
Example #8
Source File: CalculateReadGroupChecksum.java    From picard with MIT License 6 votes vote down vote up
@Override
protected int doWork() {
    final File output =
            OUTPUT == null
                    ? new File(INPUT.getParentFile(), getOutputFileName(INPUT))
                    : OUTPUT;

    IOUtil.assertFileIsWritable(output);
    final String hashText = SAMUtils.calculateReadGroupRecordChecksum(INPUT, REFERENCE_SEQUENCE);

    try {
        final FileWriter outputWriter = new FileWriter(output);
        outputWriter.write(hashText);
        outputWriter.close();
    } catch (final IOException ioe) {
        throw new PicardException(
                "Could not write the computed hash (" + hashText + ") to the output file: " + ioe.getMessage(), ioe);
    }
    return 0;
}
 
Example #9
Source File: BaseQualityReadTransformerTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "sequenceStrings")
public void testTest(final String quals_in, final String test_out) throws Exception {
    final BaseQualityReadTransformer filter = new BaseQualityReadTransformer(15);
    final byte[] bases = quals_in.getBytes().clone();
    Arrays.fill(bases,(byte)'A');
    final GATKRead read_in = ArtificialReadUtils.createArtificialRead(bases, SAMUtils.fastqToPhred(quals_in), "*");
    final GATKRead test_i = filter.apply(read_in);
    Assert.assertEquals(test_out,test_i.getBasesString());
}
 
Example #10
Source File: PairHMMLikelihoodCalculationEngine.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private void writeDebugLikelihoods(final GATKRead processedRead, final Haplotype haplotype, final double log10l){
    // Note: the precision of log10l in the debug output is only ~6 digits (ie., not all digits are necessarily printed)
    likelihoodsStream.printf("%s %s %s %s %s %s %f%n",
            haplotype.getBaseString(),
            new String(processedRead.getBases()),
            SAMUtils.phredToFastq(processedRead.getBaseQualities()),
            SAMUtils.phredToFastq(ReadUtils.getBaseInsertionQualities(processedRead)),
            SAMUtils.phredToFastq(ReadUtils.getBaseDeletionQualities(processedRead)),
            SAMUtils.phredToFastq(constantGCP),
            log10l);
}
 
Example #11
Source File: BestEndMapqPrimaryAlignmentStrategy.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public int compare(final SAMRecord rec1, final SAMRecord rec2) {
    if (rec1.getReadUnmappedFlag()) {
        if (rec2.getReadUnmappedFlag()) return 0;
        else return 1;
    } else if (rec2.getReadUnmappedFlag()) {
        return -1;
    }
    return -SAMUtils.compareMapqs(rec1.getMappingQuality(), rec2.getMappingQuality());
}
 
Example #12
Source File: MostDistantPrimaryAlignmentSelectionStrategy.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void considerBest(final SAMRecord rec) {
    if (bestMapq == -1) {
        bestMapq = rec.getMappingQuality();
        bestAlignments.add(rec);
    } else {
        final int cmp = SAMUtils.compareMapqs(bestMapq, rec.getMappingQuality());
        if (cmp < 0) {
            bestMapq = rec.getMappingQuality();
            bestAlignments.clear();
            bestAlignments.add(rec);
        } else if (cmp == 0) {
            bestAlignments.add(rec);
        }
    }
}
 
Example #13
Source File: MostDistantPrimaryAlignmentSelectionStrategy.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public void considerBest(final SAMRecord firstEnd, final SAMRecord secondEnd) {
    final int thisPairMapq = SAMUtils.combineMapqs(firstEnd.getMappingQuality(), secondEnd.getMappingQuality());
    final int thisDistance = CoordMath.getLength(Math.min(firstEnd.getAlignmentStart(), secondEnd.getAlignmentStart()),
            Math.max(firstEnd.getAlignmentEnd(), secondEnd.getAlignmentEnd()));
    if (thisDistance > bestDistance || (thisDistance == bestDistance && thisPairMapq > bestPairMapq)) {
        bestDistance = thisDistance;
        bestPairMapq = thisPairMapq;
        bestAlignmentPairs.clear();
        bestAlignmentPairs.add(new AbstractMap.SimpleEntry<>(firstEnd, secondEnd));
    } else if (thisDistance == bestDistance && thisPairMapq == bestPairMapq) {
        bestAlignmentPairs.add(new AbstractMap.SimpleEntry<>(firstEnd, secondEnd));
    }
}
 
Example #14
Source File: BaseQualityClipReadTransformerTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "sequenceStrings")
public void testApply(final String quals_in, final String basesOut, final String qualsOut, final String cigarOut) throws Exception {
    final String basesIn = "CTCAAGTACAAGCTGATCCAGACCTACAGGGTGATGTCATTAGAGGCACTGATAACACACACACTATGGGGTGGGGGTGGACAGTTCCCCACTGCAATCC";
    final BaseQualityClipReadTransformer trans = new BaseQualityClipReadTransformer(15);
    final Cigar cigar = new Cigar();
    cigar.add(new CigarElement(basesIn.length(), CigarOperator.MATCH_OR_MISMATCH));
    final GATKRead readIn = ArtificialReadUtils.createArtificialRead(basesIn.getBytes(),SAMUtils.fastqToPhred(quals_in), cigar.toString());
    final GATKRead readOut = trans.apply(readIn);
    Assert.assertEquals(readOut.getBases(),basesOut.getBytes());
    Assert.assertEquals(readOut.getBaseQualities(),SAMUtils.fastqToPhred(qualsOut));
    Assert.assertEquals(readOut.getCigar().toString(), cigarOut);
}
 
Example #15
Source File: SATagBuilder.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns a list of artificial GATKReads corresponding to the reads described by the SA tag in read.
 * Fills as much information as can be inferred from the original read onto the artificial read copies
 * This function is only used for testing (e.g. testGetArtificialReadsBasedOnSATag, testEmptyNMTagSupport)
 *
 * @param header the header to be used in constructing artificial reads
 */
public List<GATKRead> getArtificialReadsBasedOnSATag(SAMFileHeader header) {
    List<GATKRead> output = new ArrayList<>(supplementaryReads.size());
    GATKRead readCopy = read.copy();
    readCopy.setAttribute("SA", getTag());
    SAMRecord record = readCopy.convertToSAMRecord(header);

    List<SAMRecord> readRecords = SAMUtils.getOtherCanonicalAlignments(record);
    for (SAMRecord artificialRead : readRecords) {
        output.add(new SAMRecordToGATKReadAdapter(artificialRead));
    }

    return output;
}
 
Example #16
Source File: BAQUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testBAQQualRange() {
    BAQ baq = new BAQ(1.0e-3, 0.1, 7, (byte) 4);         // matches current samtools parameters
    final byte ref = (byte) 'A';
    final byte alt = (byte) 'A';

    for (int i = 0; i <= SAMUtils.MAX_PHRED_SCORE; i++) {
        Assert.assertTrue(baq.calcEpsilon(ref, alt, (byte) i) >= 0.0, "Failed to get baq epsilon range");
    }
}
 
Example #17
Source File: QualityScoreDistributionSparkIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(groups = {"R", "spark"})
public void testAccumulators() throws Exception {
    final long[] qs= new long[128];
    final long[] oqs= new long[128];

    final Counts counts = new Counts(false);
    final long[] initQs = counts.getQualCounts();
    final long[] initOQs = counts.getOrigQualCounts();
    Assert.assertEquals(initQs, qs);
    Assert.assertEquals(initOQs, oqs);

    final GATKRead read1 = ArtificialReadUtils.createArtificialRead("aa".getBytes(), new byte[]{50, 50}, "2M");
    read1.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(new byte[]{30, 40}));
    counts.addRead(read1);

    qs[50]+=2;//there are two bases in the read with a quality score of 50
    oqs[30]+=1;
    oqs[40]+=1;
    final long[] oneQs = counts.getQualCounts();
    final long[] oneOQs = counts.getOrigQualCounts();
    Assert.assertEquals(oneQs, qs);
    Assert.assertEquals(oneOQs, oqs);

    final Counts counts2 = new Counts(false);

    final GATKRead read2 = ArtificialReadUtils.createArtificialRead("aa".getBytes(), new byte[]{51, 51}, "2M");
    read2.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(new byte[]{31, 41}));
    counts2.addRead(read2);

    counts.merge(counts2);
    qs[51]+=2;//there are two bases in the read with a quality score of 51
    oqs[31]+=1;  //new read OQ
    oqs[41]+=1;  //new read OQ
    final long[] mergedQs = counts.getQualCounts();
    final long[] mergedOQs = counts.getOrigQualCounts();
    Assert.assertEquals(mergedQs, qs);
    Assert.assertEquals(mergedOQs, oqs);
}
 
Example #18
Source File: EvidenceRecord.java    From cramtools with Apache License 2.0 5 votes vote down vote up
public static EvidenceRecord fromString(String line) {
	String[] words = line.split("\t");

	EvidenceRecord r = new EvidenceRecord();
	r.line = line;
	int i = 0;
	r.IntervalId = words[i++];
	r.Chromosome = words[i++];
	r.Slide = words[i++];
	r.Lane = words[i++];
	r.FileNumInLane = words[i++];
	r.DnbOffsetInLaneFile = words[i++];
	r.AlleleIndex = words[i++];
	r.Side = words[i++];
	r.Strand = words[i++];
	r.OffsetInAllele = words[i++];
	r.AlleleAlignment = words[i++];
	r.OffsetInReference = words[i++];
	r.ReferenceAlignment = words[i++];
	r.MateOffsetInReference = words[i++];
	r.MateReferenceAlignment = words[i++];
	r.MappingQuality = words[i++];
	r.ScoreAllele0 = words[i++];
	r.ScoreAllele1 = words[i++];
	r.ScoreAllele2 = words[i++];
	r.Sequence = words[i++];
	r.Scores = words[i++];

	r.interval = Integer.valueOf(r.IntervalId);
	r.negativeStrand = "+".equals(r.Strand) ? false : true;
	r.side = "L".equals(r.Side) ? 0 : 1;
	r.name = String.format("%s-%s-%s:%s", r.Slide, r.Lane, r.FileNumInLane, r.DnbOffsetInLaneFile);
	r.mapq = Integer.valueOf(SAMUtils.fastqToPhred(r.MappingQuality.charAt(0)));
	r.pos = Integer.valueOf(r.OffsetInReference);

	return r;
}
 
Example #19
Source File: TestQualityScorePreservation.java    From cramtools with Apache License 2.0 5 votes vote down vote up
@Ignore("Broken test.")
@Test
public void test3() {
	String line1 = "98573 1107 20 1 60 100M = 999587 -415 CTGGTCTTAGTTCCGCAAGTGGGTATATATAAAGGCTCAAAATCAATCTTTATATTGACATCTCTCTACTTATTTGTGTTGTCTGATGCTCATATTGTAG ::A<<=D@BBC;C9=7DEEBHDEHHACEEBEEEDEE=EFFHEEFFFEHEF@HFBCEFEHFEHEHFEHDHHHFHHHEHHHHDFHHHHHGHHHHHHHHHHHH";
	String line2 = "98738 1187 20 18 29 99M1S = 1000253 432 AGCGGGGATATATAAAGGCTCAAAATTACTTTTTATATGGACAACTCTCTACTGCTTTGAGATGACTGATACTCATATTGATGGAGCTTTATCAAGAAAT !\"#$%&'()*+-./0'''''''''''#'#'#'''''''#''''#'''''''''##''''#'#''#'''''#'''''''''##''''#''##''''''''?";
	String seqName = "20";
	List<String> lines = Arrays.asList(new String[] { line2, line1 });

	byte[] ref = "CTGGTCTTAGTTCCGCAAGTGGGTATATATAAAGGCTCAAAATCAATCTTTATATTGACATCTCTCTACTTATTTGTGTTGTCTGATGCTCATATTGTAGGAGATTCCTCAAGAAAGG"
			.getBytes();
	ReferenceTracks tracks = new ReferenceTracks(0, seqName, ref);
	QualityScorePreservation p = new QualityScorePreservation("R8-N40-M40-D40");

	for (String line : lines) {
		SAMRecord record = buildSAMRecord(seqName, line);

		Sam2CramRecordFactory f = new Sam2CramRecordFactory(ref, record.getHeader(), CramVersions.CRAM_v3);
		CramCompressionRecord cramRecord = f.createCramRecord(record);

		p.addQualityScores(record, cramRecord, tracks);
		if (!cramRecord.isForcePreserveQualityScores()) {
			CramNormalizer.restoreQualityScores((byte) 30, Collections.singletonList(cramRecord));
		}

		StringBuffer sb = new StringBuffer();
		sb.append(record.getBaseQualityString());
		sb.append("\n");
		sb.append(SAMUtils.phredToFastq(cramRecord.qualityScores));

		assertArrayEquals(sb.toString(), record.getBaseQualities(), cramRecord.qualityScores);
	}
}
 
Example #20
Source File: TestQualityScorePreservation.java    From cramtools with Apache License 2.0 5 votes vote down vote up
@Ignore("Broken test.")
@Test
public void test4() {
	String line2 = "98738 1187 20 18 29 99M1S = 1000253 432 AGCGGGGATATATAAAGGCTCAAAATTACTTTTTATATGGACAACTCTCTACTGCTTTGAGATGACTGATACTCATATTGATGGAGCTTTATCAAGAAAT !\"#$%&'()*+-./0'''''''''''#'#'#'''''''#''''#'''''''''##''''#'#''#'''''#'''''''''##''''#''##''''''''?";
	String seqName = "20";
	List<String> lines = Arrays.asList(new String[] { line2 });

	byte[] ref = "CTGGTCTTAGTTCCGCAAGTGGGTATATATAAAGGCTCAAAATCAATCTTTATATTGACATCTCTCTACTTATTTGTGTTGTCTGATGCTCATATTGTAGGAGATTCCTCAAGAAAGG"
			.getBytes();
	ReferenceTracks tracks = new ReferenceTracks(0, seqName, ref);
	QualityScorePreservation p = new QualityScorePreservation("R40X10-N40-U40");
	for (int i = 0; i < ref.length; i++)
		tracks.addCoverage(i + 1, 66);

	for (String line : lines) {
		SAMRecord record = buildSAMRecord(seqName, line);

		Sam2CramRecordFactory f = new Sam2CramRecordFactory(ref, record.getHeader(), CramVersions.CRAM_v3);
		CramCompressionRecord cramRecord = f.createCramRecord(record);

		p.addQualityScores(record, cramRecord, tracks);
		if (!cramRecord.isForcePreserveQualityScores()) {
			CramNormalizer.restoreQualityScores((byte) 30, Collections.singletonList(cramRecord));
		}

		StringBuffer sb = new StringBuffer();
		sb.append(record.getBaseQualityString());
		sb.append("\n");
		sb.append(SAMUtils.phredToFastq(cramRecord.qualityScores));

		assertArrayEquals(sb.toString(), record.getBaseQualities(), cramRecord.qualityScores);
	}
}
 
Example #21
Source File: CleanSamTester.java    From picard with MIT License 5 votes vote down vote up
protected void test() {
    try {
        final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);

        // Validate it has the expected cigar
        validator.setIgnoreWarnings(true);
        validator.setVerbose(true, 1000);
        validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
        SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        SamReader samReader = factory.open(getOutput());
        final SAMRecordIterator iterator = samReader.iterator();
        while (iterator.hasNext()) {
            final SAMRecord rec = iterator.next();
            Assert.assertEquals(rec.getCigarString(), expectedCigar);
            if (SAMUtils.hasMateCigar(rec)) {
                Assert.assertEquals(SAMUtils.getMateCigarString(rec), expectedCigar);
            }
        }
        CloserUtil.close(samReader);

        // Run validation on the output file
        samReader = factory.open(getOutput());
        final boolean validated = validator.validateSamFileVerbose(samReader, null);
        CloserUtil.close(samReader);

        Assert.assertTrue(validated, "ValidateSamFile failed");
    } finally {
        IOUtil.recursiveDelete(getOutputDir().toPath());
    }
}
 
Example #22
Source File: ClusterDataToSamConverter.java    From picard with MIT License 5 votes vote down vote up
private String getBarcodeQuality(ClusterData cluster) {
    final StringJoiner barcodeQ = new StringJoiner(MOLECULAR_INDEX_QUALITY_DELIMITER);

    for (int sampleBarcodeIndex : sampleBarcodeIndices) {
        barcodeQ.add(SAMUtils.phredToFastq(cluster.getRead(sampleBarcodeIndex).getQualities()));
    }
    return barcodeQ.toString();
}
 
Example #23
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 5 votes vote down vote up
private void makeFastqRecords(final FastqRecord[] recs, final int[] indices,
                              final ClusterData cluster, final boolean appendReadNumberSuffix) {
    for (short i = 0; i < indices.length; ++i) {
        final ReadData readData = cluster.getRead(indices[i]);
        final String readBases = StringUtil.bytesToString(readData.getBases()).replace('.', 'N');
        final String readName = readNameEncoder.generateReadName(cluster, appendReadNumberSuffix ? i + 1 : null);
        recs[i] = new FastqRecord(
                readName,
                readBases,
                null,
                SAMUtils.phredToFastq(readData.getQualities())
        );
    }
}
 
Example #24
Source File: MostDistantPrimaryAlignmentSelectionStrategy.java    From picard with MIT License 5 votes vote down vote up
public void considerBest(final SAMRecord firstEnd, final SAMRecord secondEnd) {
    final int thisPairMapq = SAMUtils.combineMapqs(firstEnd.getMappingQuality(), secondEnd.getMappingQuality());
    final int thisDistance = CoordMath.getLength(Math.min(firstEnd.getAlignmentStart(), secondEnd.getAlignmentStart()),
            Math.max(firstEnd.getAlignmentEnd(), secondEnd.getAlignmentEnd()));
    if (thisDistance > bestDistance || (thisDistance == bestDistance && thisPairMapq > bestPairMapq)) {
        bestDistance = thisDistance;
        bestPairMapq = thisPairMapq;
        bestAlignmentPairs.clear();
        bestAlignmentPairs.add(new AbstractMap.SimpleEntry<SAMRecord, SAMRecord>(firstEnd, secondEnd));
    } else if (thisDistance == bestDistance && thisPairMapq == bestPairMapq) {
        bestAlignmentPairs.add(new AbstractMap.SimpleEntry<SAMRecord, SAMRecord>(firstEnd, secondEnd));
    }
}
 
Example #25
Source File: MostDistantPrimaryAlignmentSelectionStrategy.java    From picard with MIT License 5 votes vote down vote up
public void considerBest(final SAMRecord rec) {
    if (bestMapq == -1) {
        bestMapq = rec.getMappingQuality();
        bestAlignments.add(rec);
    } else {
        final int cmp = SAMUtils.compareMapqs(bestMapq, rec.getMappingQuality());
        if (cmp < 0) {
            bestMapq = rec.getMappingQuality();
            bestAlignments.clear();
            bestAlignments.add(rec);
        } else if (cmp == 0) {
            bestAlignments.add(rec);
        }
    }
}
 
Example #26
Source File: BestEndMapqPrimaryAlignmentStrategy.java    From picard with MIT License 5 votes vote down vote up
public int compare(final SAMRecord rec1, final SAMRecord rec2) {
    if (rec1.getReadUnmappedFlag()) {
        if (rec2.getReadUnmappedFlag()) return 0;
        else return 1;
    } else if (rec2.getReadUnmappedFlag()) {
        return -1;
    }
    return -SAMUtils.compareMapqs(rec1.getMappingQuality(), rec2.getMappingQuality());
}
 
Example #27
Source File: FastqToSam.java    From picard with MIT License 5 votes vote down vote up
/** Based on the type of quality scores coming in, converts them to a numeric byte[] in phred scale. */
void convertQuality(final byte[] quals, final FastqQualityFormat version) {
    switch (version)  {
        case Standard:
            SAMUtils.fastqToPhred(quals);
            break ;
        case Solexa:
            solexaQualityConverter.convertSolexaQualityCharsToPhredBinary(quals);
            break ;
        case Illumina:
            solexaQualityConverter.convertSolexa_1_3_QualityCharsToPhredBinary(quals);
            break ;
        }
}
 
Example #28
Source File: TestBAMSplitGuesser.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Test
public void test() throws Exception {
  Configuration conf = new Configuration();
  String bam = getClass().getClassLoader().getResource("test.bam").getFile();
  SeekableStream ss = WrapSeekable.openPath(conf, new Path(bam));
  BAMSplitGuesser bamSplitGuesser = new BAMSplitGuesser(ss, conf);
  long startGuess = bamSplitGuesser.guessNextBAMRecordStart(0, 3 * 0xffff + 0xfffe);
  assertEquals(SAMUtils.findVirtualOffsetOfFirstRecordInBam(new File(bam)), startGuess);
}
 
Example #29
Source File: ReadPileupUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testSimplePileup(){
    final int readlength = 10;
    final byte[] bases1 = Utils.repeatChars('A', readlength);
    final byte[] quals1 = Utils.repeatBytes((byte) 10, readlength);
    final String cigar1 = "10M";

    final byte[] bases2 = Utils.repeatChars('C', readlength);
    final byte[] quals2 = Utils.repeatBytes((byte) 20, readlength);
    final String cigar2 = "5M3I2M";

    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, bases1, quals1, cigar1);
    read1.setName("read1");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, bases2, quals2, cigar2);
    read1.setName("read2");
    final List<GATKRead> reads = Arrays.asList(read1, read2);
    final ReadPileup pu = new ReadPileup(loc, reads, 1);

    Assert.assertNotNull(pu.toString()); //checking non-blowup. We're not making any claims about the format of toString
    Assert.assertEquals(pu.getPileupString('N'), String.format("%s %s N %s%s %s%s",
            loc.getContig(), loc.getStart(), // the position
            (char) read1.getBase(0), (char) read2.getBase(0),  // the bases
            SAMUtils.phredToFastq(read1.getBaseQuality(0)), // base quality in FASTQ format
            SAMUtils.phredToFastq(read2.getBaseQuality(0)))); // base quality in FASTQ format

    Assert.assertEquals(pu.getBases(), new byte[]{(byte) 'A', (byte) 'C'});
    Assert.assertFalse(pu.isEmpty());
    Assert.assertEquals(pu.size(), 2, "size");
    Assert.assertEquals(pu.getBaseCounts(), new int[]{1, 1, 0, 0});
    Assert.assertEquals(pu.getBaseQuals(), new byte[]{10, 20});
    Assert.assertEquals(pu.getLocation(), loc);
    Assert.assertEquals(pu.getMappingQuals(), new int[]{NO_MAPPING_QUALITY, NO_MAPPING_QUALITY});
    Assert.assertEquals(pu.makeFilteredPileup(p -> p.getQual() >= 12).makeFilteredPileup(p -> p.getMappingQual() >= 0).size(), 1, "getBaseAndMappingFilteredPileup");
    Assert.assertEquals(pu.makeFilteredPileup(r -> r.getQual() >= 12).size(), 1, "getBaseFilteredPileup");
    Assert.assertEquals(pu.getNumberOfElements(p -> true), 2);
    Assert.assertEquals(pu.getNumberOfElements(p -> false), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isDeletion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeDeletionStart()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.isBeforeInsertion()), 0);
    Assert.assertEquals(pu.getNumberOfElements(p -> p.getRead().getMappingQuality() == 0), 2);
    Assert.assertEquals(pu.getOffsets(), Arrays.asList(1, 1), "getOffsets");
    Assert.assertEquals(pu.getReadGroupIDs(), new HashSet<String>(Arrays.asList(ArtificialReadUtils.READ_GROUP_ID)), "getReadGroups");
    Assert.assertEquals(pu.getReads(), Arrays.asList(read1, read2), "getReads");
    Assert.assertEquals(pu.getSamples(header), Arrays.asList(new String[]{null}), "getSamples");
    Assert.assertTrue(pu.getPileupForLane("fred").isEmpty());
    Assert.assertTrue(pu.makeFilteredPileup(r -> r.getMappingQual() >= 10).isEmpty());
}
 
Example #30
Source File: PairHMMLikelihoodCalculationEngineUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testComputeLikelihoods(){
    final LikelihoodEngineArgumentCollection LEAC = new LikelihoodEngineArgumentCollection();

    PairHMMLikelihoodCalculationEngine.writeLikelihoodsToFile = true;

    final ReadLikelihoodCalculationEngine lce = new PairHMMLikelihoodCalculationEngine((byte) SAMUtils.MAX_PHRED_SCORE, new PairHMMNativeArguments(),
            PairHMM.Implementation.LOGLESS_CACHING, MathUtils.logToLog10(QualityUtils.qualToErrorProbLog10(LEAC.phredScaledGlobalReadMismappingRate)),
            PairHMMLikelihoodCalculationEngine.PCRErrorModel.CONSERVATIVE);

    final Map<String, List<GATKRead>> perSampleReadList= new HashMap<>();
    final int n = 10 ;
    final GATKRead read1= ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode(n + "M"));
    read1.setMappingQuality(60);
    final String sample1 = "sample1";
    perSampleReadList.put(sample1, Arrays.asList(read1));

    final SampleList samples = new IndexedSampleList(sample1);

    final AssemblyResultSet assemblyResultSet = new AssemblyResultSet();
    final byte[] bases = Strings.repeat("A", n+1).getBytes();
    final Haplotype hap1 = new Haplotype(bases, true);
    hap1.setGenomeLocation(read1);
    assemblyResultSet.add(hap1);

    final byte[] basesModified= bases;
    basesModified[5] = 'C';//different bases
    final Haplotype hap2 = new Haplotype(basesModified, false);
    hap2.setGenomeLocation(read1);//use same loc
    assemblyResultSet.add(hap2);


    final ReadLikelihoods<Haplotype> likes = lce.computeReadLikelihoods(assemblyResultSet, samples, perSampleReadList);
    final LikelihoodMatrix<Haplotype> mtx = likes.sampleMatrix(0);

    Assert.assertEquals(mtx.numberOfAlleles(), 2);
    Assert.assertEquals(mtx.numberOfReads(), 1);
    final double v1 = mtx.get(0, 0);
    final double v2 = mtx.get(1, 0);

    Assert.assertTrue(v1 > v2, "matching haplotype should have a higher likelihood");
    lce.close();
    new File(PairHMMLikelihoodCalculationEngine.LIKELIHOODS_FILENAME).delete();
}