Java Code Examples for htsjdk.samtools.SAMUtils

The following examples show how to use htsjdk.samtools.SAMUtils. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: picard   Source File: CalculateReadGroupChecksum.java    License: MIT License 6 votes vote down vote up
@Override
protected int doWork() {
    final File output =
            OUTPUT == null
                    ? new File(INPUT.getParentFile(), getOutputFileName(INPUT))
                    : OUTPUT;

    IOUtil.assertFileIsWritable(output);
    final String hashText = SAMUtils.calculateReadGroupRecordChecksum(INPUT, REFERENCE_SEQUENCE);

    try {
        final FileWriter outputWriter = new FileWriter(output);
        outputWriter.write(hashText);
        outputWriter.close();
    } catch (final IOException ioe) {
        throw new PicardException(
                "Could not write the computed hash (" + hashText + ") to the output file: " + ioe.getMessage(), ioe);
    }
    return 0;
}
 
Example 2
Source Project: picard   Source File: UmiUtil.java    License: MIT License 6 votes vote down vote up
/**
 * Determines if the read represented by a SAM record belongs to the top or bottom strand
 * or if it cannot determine strand position due to one of the reads being unmapped.
 * Top strand is defined as having a read 1 unclipped 5' coordinate
 * less than the read 2 unclipped 5' coordinate.  If a read is unmapped
 * we do not attempt to determine the strand to which the read or its mate belongs.
 * If the mate belongs to a different contig from the read, then the reference
 * index for the read and its mate is used in leu of the unclipped 5' coordinate.
 * @param rec Record to determine top or bottom strand
 * @return Top or bottom strand, unknown if it cannot be determined.
 */
static ReadStrand getStrand(final SAMRecord rec) {
    if (rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag()) {
        return ReadStrand.UNKNOWN;
    }

    // If the read pair are aligned to different contigs we use
    // the reference index to determine relative 5' coordinate ordering.
    // Both the read and its mate should not have their unmapped flag set to true.
    if (!rec.getReferenceIndex().equals(rec.getMateReferenceIndex())) {
        if (rec.getFirstOfPairFlag() == (rec.getReferenceIndex() < rec.getMateReferenceIndex())) {
            return ReadStrand.TOP;
        } else {
            return ReadStrand.BOTTOM;
        }
    }

    final int read5PrimeStart = (rec.getReadNegativeStrandFlag()) ? rec.getUnclippedEnd() : rec.getUnclippedStart();
    final int mate5PrimeStart = (rec.getMateNegativeStrandFlag()) ? SAMUtils.getMateUnclippedEnd(rec) : SAMUtils.getMateUnclippedStart(rec);

    if (rec.getFirstOfPairFlag() == (read5PrimeStart <= mate5PrimeStart)) {
        return ReadStrand.TOP;
    } else {
        return ReadStrand.BOTTOM;
    }
}
 
Example 3
Source Project: picard   Source File: MultiHitAlignedReadIterator.java    License: MIT License 6 votes vote down vote up
/**
 *
 * @param querynameOrderIterator
 * @param primaryAlignmentSelectionStrategy Algorithm for selecting primary alignment when it is not clear from
 *                                          the input what should be primary.
 */
MultiHitAlignedReadIterator(final CloseableIterator<SAMRecord> querynameOrderIterator,
                            final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy) {
    this.primaryAlignmentSelectionStrategy = primaryAlignmentSelectionStrategy;
    peekIterator = new PeekableIterator<SAMRecord>(new FilteringSamIterator(querynameOrderIterator,
            new SamRecordFilter() {
                // Filter unmapped reads.
                public boolean filterOut(final SAMRecord record) {
                    return record.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(record.getCigar());
                }
                public boolean filterOut(final SAMRecord first, final SAMRecord second) {
                    return ((first.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(first.getCigar()))
                            && (second.getReadUnmappedFlag() || SAMUtils.cigarMapsNoBasesToRef(second.getCigar())));
                }
            }));


    advance();
}
 
Example 4
Source Project: gatk   Source File: BAQ.java    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void initializeCachedData() {
    for ( int i = 0; i < 256; i++ )
        for ( int j = 0; j < 256; j++ )
            for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
                EPSILONS[i][j][q] = 1.0;
            }

    for ( char b1 : "ACGTacgt".toCharArray() ) {
        for ( char b2 : "ACGTacgt".toCharArray() ) {
            for ( int q = 0; q <= SAMUtils.MAX_PHRED_SCORE; q++ ) {
                double qual = qual2prob[q < minBaseQual ? minBaseQual : q];
                double e = Character.toLowerCase(b1) == Character.toLowerCase(b2) ? 1 - qual : qual * EM;
                EPSILONS[(byte)b1][(byte)b2][q] = e;
            }
        }
    }
}
 
Example 5
@Test
public void testUsingOriginalQualities() throws Exception {
    final MeanQualityByCycleSpark.HistogramGenerator hg = new MeanQualityByCycleSpark.HistogramGenerator(true);
    Assert.assertEquals(hg.useOriginalQualities, true);

    GATKRead read1 = ArtificialReadUtils.createArtificialRead("aa".getBytes(), new byte[]{50, 50}, "2M");
    hg.addRead(read1);
    Assert.assertEquals(hg.firstReadCountsByCycle, new long[0]);
    assertEqualsDoubleArray(hg.firstReadTotalsByCycle, new double[0], 1e-05);
    Assert.assertEquals(hg.secondReadCountsByCycle, new long[0]);
    assertEqualsDoubleArray(hg.secondReadTotalsByCycle, new double[0], 1e-05);

    GATKRead read2 = ArtificialReadUtils.createArtificialRead("aa".getBytes(), new byte[]{50, 50}, "2M");
    read2.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(new byte[]{30, 40}));
    hg.addRead(read2);

    Assert.assertEquals(hg.firstReadCountsByCycle, new long[]{0, 1, 1});
    assertEqualsDoubleArray(hg.firstReadTotalsByCycle, new double[]{0, 30, 40}, 1e-05);
    Assert.assertEquals(hg.secondReadCountsByCycle, new long[]{0, 0, 0});
    assertEqualsDoubleArray(hg.secondReadTotalsByCycle, new double[]{0, 0, 0}, 1e-05);
}
 
Example 6
Source Project: cramtools   Source File: Read.java    License: Apache License 2.0 6 votes vote down vote up
void reset(EvidenceRecord evidenceRecord) {
	this.evidenceRecord = evidenceRecord;

	negative = "-".equals(evidenceRecord.Strand);

	baseBuf.clear();
	scoreBuf.clear();
	if (evidenceRecord.Strand.equals("+")) {
		baseBuf.put(evidenceRecord.Sequence.getBytes());
		scoreBuf.put(SAMUtils.fastqToPhred(evidenceRecord.Scores));
	} else {
		byte[] bytes = evidenceRecord.Sequence.getBytes();
		SequenceUtil.reverseComplement(bytes);
		baseBuf.put(bytes);
		bytes = SAMUtils.fastqToPhred(evidenceRecord.Scores);
		SequenceUtil.reverseQualities(bytes);
		scoreBuf.put(bytes);
	}
	baseBuf.flip();
	scoreBuf.flip();

	firstHalf.clear();
	secondHalf.clear();
}
 
Example 7
Source Project: cramtools   Source File: Read.java    License: Apache License 2.0 6 votes vote down vote up
SAMRecord firstSAMRecord(SAMFileHeader header) {
	SAMRecord r = new SAMRecord(header);
	r.setReadName(evidenceRecord.getReadName());
	r.setReferenceName(evidenceRecord.Chromosome);
	r.setAlignmentStart(Integer.valueOf(evidenceRecord.OffsetInReference) + 1);
	r.setMappingQuality(Integer.valueOf(evidenceRecord.ScoreAllele0));
	r.setReadPairedFlag(true);
	r.setReadUnmappedFlag(false);
	r.setReadNegativeStrandFlag(negative);
	r.setFirstOfPairFlag(evidenceRecord.side == 0);
	r.setSecondOfPairFlag(!r.getFirstOfPairFlag());

	r.setCigar(new Cigar(Utils.toCigarOperatorList(firstHalf.samCigarElements)));

	r.setReadBases(Utils.toByteArray(firstHalf.readBasesBuf));
	r.setBaseQualities(Utils.toByteArray(firstHalf.readScoresBuf));

	r.setAttribute("GC", Utils.toString(firstHalf.gcList));
	r.setAttribute("GS", Utils.toString(firstHalf.gsBuf));
	r.setAttribute("GQ", SAMUtils.phredToFastq(Utils.toByteArray(firstHalf.gqBuf)));

	return r;
}
 
Example 8
Source Project: cramtools   Source File: Read.java    License: Apache License 2.0 6 votes vote down vote up
SAMRecord secondSAMRecord(SAMFileHeader header) {
	SAMRecord r = new SAMRecord(header);
	r.setReadName(evidenceRecord.getReadName());
	r.setReferenceName(evidenceRecord.Chromosome);
	r.setAlignmentStart(Integer.valueOf(evidenceRecord.MateOffsetInReference) + 1);
	r.setMappingQuality(Integer.valueOf(evidenceRecord.ScoreAllele0));
	r.setReadPairedFlag(true);
	r.setReadUnmappedFlag(false);
	r.setReadNegativeStrandFlag(negative);
	r.setFirstOfPairFlag(evidenceRecord.side == 1);
	r.setSecondOfPairFlag(!r.getFirstOfPairFlag());

	r.setCigar(new Cigar(Utils.toCigarOperatorList(secondHalf.samCigarElements)));

	r.setReadBases(Utils.toByteArray(secondHalf.readBasesBuf));
	r.setBaseQualities(Utils.toByteArray(secondHalf.readScoresBuf));

	r.setAttribute("GC", Utils.toString(secondHalf.gcList));
	r.setAttribute("GS", Utils.toString(secondHalf.gsBuf));
	r.setAttribute("GQ", SAMUtils.phredToFastq(Utils.toByteArray(secondHalf.gqBuf)));

	return r;
}
 
Example 9
private void writeDebugLikelihoods(final GATKRead processedRead, final Haplotype haplotype, final double log10l){
    // Note: the precision of log10l in the debug output is only ~6 digits (ie., not all digits are necessarily printed)
    likelihoodsStream.printf("%s %s %s %s %s %s %f%n",
            haplotype.getBaseString(),
            new String(processedRead.getBases()),
            SAMUtils.phredToFastq(processedRead.getBaseQualities()),
            SAMUtils.phredToFastq(ReadUtils.getBaseInsertionQualities(processedRead)),
            SAMUtils.phredToFastq(ReadUtils.getBaseDeletionQualities(processedRead)),
            SAMUtils.phredToFastq(constantGCP),
            log10l);
}
 
Example 10
Source Project: picard   Source File: ClusterDataToSamConverter.java    License: MIT License 5 votes vote down vote up
private String getBarcodeQuality(ClusterData cluster) {
    final StringJoiner barcodeQ = new StringJoiner(MOLECULAR_INDEX_QUALITY_DELIMITER);

    for (int sampleBarcodeIndex : sampleBarcodeIndices) {
        barcodeQ.add(SAMUtils.phredToFastq(cluster.getRead(sampleBarcodeIndex).getQualities()));
    }
    return barcodeQ.toString();
}
 
Example 11
Source Project: picard   Source File: IlluminaBasecallsToFastq.java    License: MIT License 5 votes vote down vote up
private void makeFastqRecords(final FastqRecord[] recs, final int[] indices,
                              final ClusterData cluster, final boolean appendReadNumberSuffix) {
    for (short i = 0; i < indices.length; ++i) {
        final ReadData readData = cluster.getRead(indices[i]);
        final String readBases = StringUtil.bytesToString(readData.getBases()).replace('.', 'N');
        final String readName = readNameEncoder.generateReadName(cluster, appendReadNumberSuffix ? i + 1 : null);
        recs[i] = new FastqRecord(
                readName,
                readBases,
                null,
                SAMUtils.phredToFastq(readData.getQualities())
        );
    }
}
 
Example 12
Source Project: picard   Source File: FastqToSam.java    License: MIT License 5 votes vote down vote up
/** Based on the type of quality scores coming in, converts them to a numeric byte[] in phred scale. */
void convertQuality(final byte[] quals, final FastqQualityFormat version) {
    switch (version)  {
        case Standard:
            SAMUtils.fastqToPhred(quals);
            break ;
        case Solexa:
            solexaQualityConverter.convertSolexaQualityCharsToPhredBinary(quals);
            break ;
        case Illumina:
            solexaQualityConverter.convertSolexa_1_3_QualityCharsToPhredBinary(quals);
            break ;
        }
}
 
Example 13
Source Project: picard   Source File: BestEndMapqPrimaryAlignmentStrategy.java    License: MIT License 5 votes vote down vote up
public int compare(final SAMRecord rec1, final SAMRecord rec2) {
    if (rec1.getReadUnmappedFlag()) {
        if (rec2.getReadUnmappedFlag()) return 0;
        else return 1;
    } else if (rec2.getReadUnmappedFlag()) {
        return -1;
    }
    return -SAMUtils.compareMapqs(rec1.getMappingQuality(), rec2.getMappingQuality());
}
 
Example 14
public void considerBest(final SAMRecord rec) {
    if (bestMapq == -1) {
        bestMapq = rec.getMappingQuality();
        bestAlignments.add(rec);
    } else {
        final int cmp = SAMUtils.compareMapqs(bestMapq, rec.getMappingQuality());
        if (cmp < 0) {
            bestMapq = rec.getMappingQuality();
            bestAlignments.clear();
            bestAlignments.add(rec);
        } else if (cmp == 0) {
            bestAlignments.add(rec);
        }
    }
}
 
Example 15
public void considerBest(final SAMRecord firstEnd, final SAMRecord secondEnd) {
    final int thisPairMapq = SAMUtils.combineMapqs(firstEnd.getMappingQuality(), secondEnd.getMappingQuality());
    final int thisDistance = CoordMath.getLength(Math.min(firstEnd.getAlignmentStart(), secondEnd.getAlignmentStart()),
            Math.max(firstEnd.getAlignmentEnd(), secondEnd.getAlignmentEnd()));
    if (thisDistance > bestDistance || (thisDistance == bestDistance && thisPairMapq > bestPairMapq)) {
        bestDistance = thisDistance;
        bestPairMapq = thisPairMapq;
        bestAlignmentPairs.clear();
        bestAlignmentPairs.add(new AbstractMap.SimpleEntry<SAMRecord, SAMRecord>(firstEnd, secondEnd));
    } else if (thisDistance == bestDistance && thisPairMapq == bestPairMapq) {
        bestAlignmentPairs.add(new AbstractMap.SimpleEntry<SAMRecord, SAMRecord>(firstEnd, secondEnd));
    }
}
 
Example 16
Source Project: picard   Source File: CleanSamTester.java    License: MIT License 5 votes vote down vote up
protected void test() {
    try {
        final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);

        // Validate it has the expected cigar
        validator.setIgnoreWarnings(true);
        validator.setVerbose(true, 1000);
        validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
        SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
        SamReader samReader = factory.open(getOutput());
        final SAMRecordIterator iterator = samReader.iterator();
        while (iterator.hasNext()) {
            final SAMRecord rec = iterator.next();
            Assert.assertEquals(rec.getCigarString(), expectedCigar);
            if (SAMUtils.hasMateCigar(rec)) {
                Assert.assertEquals(SAMUtils.getMateCigarString(rec), expectedCigar);
            }
        }
        CloserUtil.close(samReader);

        // Run validation on the output file
        samReader = factory.open(getOutput());
        final boolean validated = validator.validateSamFileVerbose(samReader, null);
        CloserUtil.close(samReader);

        Assert.assertTrue(validated, "ValidateSamFile failed");
    } finally {
        IOUtil.recursiveDelete(getOutputDir().toPath());
    }
}
 
Example 17
Source Project: Hadoop-BAM   Source File: TestBAMSplitGuesser.java    License: MIT License 5 votes vote down vote up
@Test
public void test() throws Exception {
  Configuration conf = new Configuration();
  String bam = getClass().getClassLoader().getResource("test.bam").getFile();
  SeekableStream ss = WrapSeekable.openPath(conf, new Path(bam));
  BAMSplitGuesser bamSplitGuesser = new BAMSplitGuesser(ss, conf);
  long startGuess = bamSplitGuesser.guessNextBAMRecordStart(0, 3 * 0xffff + 0xfffe);
  assertEquals(SAMUtils.findVirtualOffsetOfFirstRecordInBam(new File(bam)), startGuess);
}
 
Example 18
Source Project: gatk   Source File: SATagBuilder.java    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns a list of artificial GATKReads corresponding to the reads described by the SA tag in read.
 * Fills as much information as can be inferred from the original read onto the artificial read copies
 * This function is only used for testing (e.g. testGetArtificialReadsBasedOnSATag, testEmptyNMTagSupport)
 *
 * @param header the header to be used in constructing artificial reads
 */
public List<GATKRead> getArtificialReadsBasedOnSATag(SAMFileHeader header) {
    List<GATKRead> output = new ArrayList<>(supplementaryReads.size());
    GATKRead readCopy = read.copy();
    readCopy.setAttribute("SA", getTag());
    SAMRecord record = readCopy.convertToSAMRecord(header);

    List<SAMRecord> readRecords = SAMUtils.getOtherCanonicalAlignments(record);
    for (SAMRecord artificialRead : readRecords) {
        output.add(new SAMRecordToGATKReadAdapter(artificialRead));
    }

    return output;
}
 
Example 19
@Override
public int compare(final SAMRecord rec1, final SAMRecord rec2) {
    if (rec1.getReadUnmappedFlag()) {
        if (rec2.getReadUnmappedFlag()) return 0;
        else return 1;
    } else if (rec2.getReadUnmappedFlag()) {
        return -1;
    }
    return -SAMUtils.compareMapqs(rec1.getMappingQuality(), rec2.getMappingQuality());
}
 
Example 20
public void considerBest(final SAMRecord rec) {
    if (bestMapq == -1) {
        bestMapq = rec.getMappingQuality();
        bestAlignments.add(rec);
    } else {
        final int cmp = SAMUtils.compareMapqs(bestMapq, rec.getMappingQuality());
        if (cmp < 0) {
            bestMapq = rec.getMappingQuality();
            bestAlignments.clear();
            bestAlignments.add(rec);
        } else if (cmp == 0) {
            bestAlignments.add(rec);
        }
    }
}
 
Example 21
public void considerBest(final SAMRecord firstEnd, final SAMRecord secondEnd) {
    final int thisPairMapq = SAMUtils.combineMapqs(firstEnd.getMappingQuality(), secondEnd.getMappingQuality());
    final int thisDistance = CoordMath.getLength(Math.min(firstEnd.getAlignmentStart(), secondEnd.getAlignmentStart()),
            Math.max(firstEnd.getAlignmentEnd(), secondEnd.getAlignmentEnd()));
    if (thisDistance > bestDistance || (thisDistance == bestDistance && thisPairMapq > bestPairMapq)) {
        bestDistance = thisDistance;
        bestPairMapq = thisPairMapq;
        bestAlignmentPairs.clear();
        bestAlignmentPairs.add(new AbstractMap.SimpleEntry<>(firstEnd, secondEnd));
    } else if (thisDistance == bestDistance && thisPairMapq == bestPairMapq) {
        bestAlignmentPairs.add(new AbstractMap.SimpleEntry<>(firstEnd, secondEnd));
    }
}
 
Example 22
@Test(dataProvider = "sequenceStrings")
public void testApply(final String quals_in, final String basesOut, final String qualsOut, final String cigarOut) throws Exception {
    final String basesIn = "CTCAAGTACAAGCTGATCCAGACCTACAGGGTGATGTCATTAGAGGCACTGATAACACACACACTATGGGGTGGGGGTGGACAGTTCCCCACTGCAATCC";
    final BaseQualityClipReadTransformer trans = new BaseQualityClipReadTransformer(15);
    final Cigar cigar = new Cigar();
    cigar.add(new CigarElement(basesIn.length(), CigarOperator.MATCH_OR_MISMATCH));
    final GATKRead readIn = ArtificialReadUtils.createArtificialRead(basesIn.getBytes(),SAMUtils.fastqToPhred(quals_in), cigar.toString());
    final GATKRead readOut = trans.apply(readIn);
    Assert.assertEquals(readOut.getBases(),basesOut.getBytes());
    Assert.assertEquals(readOut.getBaseQualities(),SAMUtils.fastqToPhred(qualsOut));
    Assert.assertEquals(readOut.getCigar().toString(), cigarOut);
}
 
Example 23
@Test(dataProvider = "sequenceStrings")
public void testTest(final String quals_in, final String test_out) throws Exception {
    final BaseQualityReadTransformer filter = new BaseQualityReadTransformer(15);
    final byte[] bases = quals_in.getBytes().clone();
    Arrays.fill(bases,(byte)'A');
    final GATKRead read_in = ArtificialReadUtils.createArtificialRead(bases, SAMUtils.fastqToPhred(quals_in), "*");
    final GATKRead test_i = filter.apply(read_in);
    Assert.assertEquals(test_out,test_i.getBasesString());
}
 
Example 24
Source Project: gatk   Source File: BAQUnitTest.java    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testBAQQualRange() {
    BAQ baq = new BAQ(1.0e-3, 0.1, 7, (byte) 4);         // matches current samtools parameters
    final byte ref = (byte) 'A';
    final byte alt = (byte) 'A';

    for (int i = 0; i <= SAMUtils.MAX_PHRED_SCORE; i++) {
        Assert.assertTrue(baq.calcEpsilon(ref, alt, (byte) i) >= 0.0, "Failed to get baq epsilon range");
    }
}
 
Example 25
@Test(groups = {"R", "spark"})
public void testAccumulators() throws Exception {
    final long[] qs= new long[128];
    final long[] oqs= new long[128];

    final Counts counts = new Counts(false);
    final long[] initQs = counts.getQualCounts();
    final long[] initOQs = counts.getOrigQualCounts();
    Assert.assertEquals(initQs, qs);
    Assert.assertEquals(initOQs, oqs);

    final GATKRead read1 = ArtificialReadUtils.createArtificialRead("aa".getBytes(), new byte[]{50, 50}, "2M");
    read1.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(new byte[]{30, 40}));
    counts.addRead(read1);

    qs[50]+=2;//there are two bases in the read with a quality score of 50
    oqs[30]+=1;
    oqs[40]+=1;
    final long[] oneQs = counts.getQualCounts();
    final long[] oneOQs = counts.getOrigQualCounts();
    Assert.assertEquals(oneQs, qs);
    Assert.assertEquals(oneOQs, oqs);

    final Counts counts2 = new Counts(false);

    final GATKRead read2 = ArtificialReadUtils.createArtificialRead("aa".getBytes(), new byte[]{51, 51}, "2M");
    read2.setAttribute(SAMTag.OQ.name(), SAMUtils.phredToFastq(new byte[]{31, 41}));
    counts2.addRead(read2);

    counts.merge(counts2);
    qs[51]+=2;//there are two bases in the read with a quality score of 51
    oqs[31]+=1;  //new read OQ
    oqs[41]+=1;  //new read OQ
    final long[] mergedQs = counts.getQualCounts();
    final long[] mergedOQs = counts.getOrigQualCounts();
    Assert.assertEquals(mergedQs, qs);
    Assert.assertEquals(mergedOQs, oqs);
}
 
Example 26
Source Project: cramtools   Source File: EvidenceRecord.java    License: Apache License 2.0 5 votes vote down vote up
public static EvidenceRecord fromString(String line) {
	String[] words = line.split("\t");

	EvidenceRecord r = new EvidenceRecord();
	r.line = line;
	int i = 0;
	r.IntervalId = words[i++];
	r.Chromosome = words[i++];
	r.Slide = words[i++];
	r.Lane = words[i++];
	r.FileNumInLane = words[i++];
	r.DnbOffsetInLaneFile = words[i++];
	r.AlleleIndex = words[i++];
	r.Side = words[i++];
	r.Strand = words[i++];
	r.OffsetInAllele = words[i++];
	r.AlleleAlignment = words[i++];
	r.OffsetInReference = words[i++];
	r.ReferenceAlignment = words[i++];
	r.MateOffsetInReference = words[i++];
	r.MateReferenceAlignment = words[i++];
	r.MappingQuality = words[i++];
	r.ScoreAllele0 = words[i++];
	r.ScoreAllele1 = words[i++];
	r.ScoreAllele2 = words[i++];
	r.Sequence = words[i++];
	r.Scores = words[i++];

	r.interval = Integer.valueOf(r.IntervalId);
	r.negativeStrand = "+".equals(r.Strand) ? false : true;
	r.side = "L".equals(r.Side) ? 0 : 1;
	r.name = String.format("%s-%s-%s:%s", r.Slide, r.Lane, r.FileNumInLane, r.DnbOffsetInLaneFile);
	r.mapq = Integer.valueOf(SAMUtils.fastqToPhred(r.MappingQuality.charAt(0)));
	r.pos = Integer.valueOf(r.OffsetInReference);

	return r;
}
 
Example 27
Source Project: cramtools   Source File: TestQualityScorePreservation.java    License: Apache License 2.0 5 votes vote down vote up
@Ignore("Broken test.")
@Test
public void test3() {
	String line1 = "98573 1107 20 1 60 100M = 999587 -415 CTGGTCTTAGTTCCGCAAGTGGGTATATATAAAGGCTCAAAATCAATCTTTATATTGACATCTCTCTACTTATTTGTGTTGTCTGATGCTCATATTGTAG ::A<<[email protected];[email protected]HHHHDFHHHHHGHHHHHHHHHHHH";
	String line2 = "98738 1187 20 18 29 99M1S = 1000253 432 AGCGGGGATATATAAAGGCTCAAAATTACTTTTTATATGGACAACTCTCTACTGCTTTGAGATGACTGATACTCATATTGATGGAGCTTTATCAAGAAAT !\"#$%&'()*+-./0'''''''''''#'#'#'''''''#''''#'''''''''##''''#'#''#'''''#'''''''''##''''#''##''''''''?";
	String seqName = "20";
	List<String> lines = Arrays.asList(new String[] { line2, line1 });

	byte[] ref = "CTGGTCTTAGTTCCGCAAGTGGGTATATATAAAGGCTCAAAATCAATCTTTATATTGACATCTCTCTACTTATTTGTGTTGTCTGATGCTCATATTGTAGGAGATTCCTCAAGAAAGG"
			.getBytes();
	ReferenceTracks tracks = new ReferenceTracks(0, seqName, ref);
	QualityScorePreservation p = new QualityScorePreservation("R8-N40-M40-D40");

	for (String line : lines) {
		SAMRecord record = buildSAMRecord(seqName, line);

		Sam2CramRecordFactory f = new Sam2CramRecordFactory(ref, record.getHeader(), CramVersions.CRAM_v3);
		CramCompressionRecord cramRecord = f.createCramRecord(record);

		p.addQualityScores(record, cramRecord, tracks);
		if (!cramRecord.isForcePreserveQualityScores()) {
			CramNormalizer.restoreQualityScores((byte) 30, Collections.singletonList(cramRecord));
		}

		StringBuffer sb = new StringBuffer();
		sb.append(record.getBaseQualityString());
		sb.append("\n");
		sb.append(SAMUtils.phredToFastq(cramRecord.qualityScores));

		assertArrayEquals(sb.toString(), record.getBaseQualities(), cramRecord.qualityScores);
	}
}
 
Example 28
Source Project: cramtools   Source File: TestQualityScorePreservation.java    License: Apache License 2.0 5 votes vote down vote up
@Ignore("Broken test.")
@Test
public void test4() {
	String line2 = "98738 1187 20 18 29 99M1S = 1000253 432 AGCGGGGATATATAAAGGCTCAAAATTACTTTTTATATGGACAACTCTCTACTGCTTTGAGATGACTGATACTCATATTGATGGAGCTTTATCAAGAAAT !\"#$%&'()*+-./0'''''''''''#'#'#'''''''#''''#'''''''''##''''#'#''#'''''#'''''''''##''''#''##''''''''?";
	String seqName = "20";
	List<String> lines = Arrays.asList(new String[] { line2 });

	byte[] ref = "CTGGTCTTAGTTCCGCAAGTGGGTATATATAAAGGCTCAAAATCAATCTTTATATTGACATCTCTCTACTTATTTGTGTTGTCTGATGCTCATATTGTAGGAGATTCCTCAAGAAAGG"
			.getBytes();
	ReferenceTracks tracks = new ReferenceTracks(0, seqName, ref);
	QualityScorePreservation p = new QualityScorePreservation("R40X10-N40-U40");
	for (int i = 0; i < ref.length; i++)
		tracks.addCoverage(i + 1, 66);

	for (String line : lines) {
		SAMRecord record = buildSAMRecord(seqName, line);

		Sam2CramRecordFactory f = new Sam2CramRecordFactory(ref, record.getHeader(), CramVersions.CRAM_v3);
		CramCompressionRecord cramRecord = f.createCramRecord(record);

		p.addQualityScores(record, cramRecord, tracks);
		if (!cramRecord.isForcePreserveQualityScores()) {
			CramNormalizer.restoreQualityScores((byte) 30, Collections.singletonList(cramRecord));
		}

		StringBuffer sb = new StringBuffer();
		sb.append(record.getBaseQualityString());
		sb.append("\n");
		sb.append(SAMUtils.phredToFastq(cramRecord.qualityScores));

		assertArrayEquals(sb.toString(), record.getBaseQualities(), cramRecord.qualityScores);
	}
}
 
Example 29
@Test
public void testComputeLikelihoods(){
    final LikelihoodEngineArgumentCollection LEAC = new LikelihoodEngineArgumentCollection();

    PairHMMLikelihoodCalculationEngine.writeLikelihoodsToFile = true;

    final ReadLikelihoodCalculationEngine lce = new PairHMMLikelihoodCalculationEngine((byte) SAMUtils.MAX_PHRED_SCORE, new PairHMMNativeArguments(),
            PairHMM.Implementation.LOGLESS_CACHING, MathUtils.logToLog10(QualityUtils.qualToErrorProbLog10(LEAC.phredScaledGlobalReadMismappingRate)),
            PairHMMLikelihoodCalculationEngine.PCRErrorModel.CONSERVATIVE);

    final Map<String, List<GATKRead>> perSampleReadList= new HashMap<>();
    final int n = 10 ;
    final GATKRead read1= ArtificialReadUtils.createArtificialRead(TextCigarCodec.decode(n + "M"));
    read1.setMappingQuality(60);
    final String sample1 = "sample1";
    perSampleReadList.put(sample1, Arrays.asList(read1));

    final SampleList samples = new IndexedSampleList(sample1);

    final AssemblyResultSet assemblyResultSet = new AssemblyResultSet();
    final byte[] bases = Strings.repeat("A", n+1).getBytes();
    final Haplotype hap1 = new Haplotype(bases, true);
    hap1.setGenomeLocation(read1);
    assemblyResultSet.add(hap1);

    final byte[] basesModified= bases;
    basesModified[5] = 'C';//different bases
    final Haplotype hap2 = new Haplotype(basesModified, false);
    hap2.setGenomeLocation(read1);//use same loc
    assemblyResultSet.add(hap2);


    final ReadLikelihoods<Haplotype> likes = lce.computeReadLikelihoods(assemblyResultSet, samples, perSampleReadList);
    final LikelihoodMatrix<Haplotype> mtx = likes.sampleMatrix(0);

    Assert.assertEquals(mtx.numberOfAlleles(), 2);
    Assert.assertEquals(mtx.numberOfReads(), 1);
    final double v1 = mtx.get(0, 0);
    final double v2 = mtx.get(1, 0);

    Assert.assertTrue(v1 > v2, "matching haplotype should have a higher likelihood");
    lce.close();
    new File(PairHMMLikelihoodCalculationEngine.LIKELIHOODS_FILENAME).delete();
}
 
Example 30
Source Project: picard   Source File: ClusterDataToSamConverter.java    License: MIT License 4 votes vote down vote up
/**
 * Creates the SAMRecord for each read in the cluster
 */
public IlluminaBasecallsToSam.SAMRecordsForCluster convertClusterToOutputRecord(final ClusterData cluster) {

    final IlluminaBasecallsToSam.SAMRecordsForCluster ret = new IlluminaBasecallsToSam.SAMRecordsForCluster(outputRecordsPerCluster);
    final String readName = readNameEncoder.generateReadName(cluster, null); // Use null here to prevent /1 or /2 suffixes on read name.

    // Get and transform the unmatched barcode, if any, to store with the reads
    String unmatchedBarcode = null;
    if (hasSampleBarcode && (this.barcodePopulationStrategy == PopulateBarcode.ALWAYS ||
            this.barcodePopulationStrategy == PopulateBarcode.ORPHANS_ONLY &&
                    cluster.getMatchedBarcode() == null ||
            this.barcodePopulationStrategy == PopulateBarcode.INEXACT_MATCH &&
                    !IlluminaUtil.byteArrayToString(getBarcodeSeqs(cluster),"").equals(cluster.getMatchedBarcode()))) {
        unmatchedBarcode = getUnmatchedBarcode(cluster);
    }

    String barcodeQuality = null;
    if (unmatchedBarcode != null && includeQualitiesWithBarcode) {
        barcodeQuality = getBarcodeQuality(cluster);
    }


    final List<String> molecularIndexes = new ArrayList<>();
    final List<String> molecularIndexQualities = new ArrayList<>();
    if (hasMolecularBarcode) {
        for (int molecularBarcodeIndice : molecularBarcodeIndices) {
            molecularIndexes.add(convertMissingToNoCall(new String(cluster.getRead(molecularBarcodeIndice).getBases())));
            molecularIndexQualities.add(SAMUtils.phredToFastq(cluster.getRead(molecularBarcodeIndice).getQualities()));
        }
    }

    final SAMRecord firstOfPair = createSamRecord(
            cluster.getRead(templateIndices[0]),
            readName,
            cluster.isPf(),
            true,
            unmatchedBarcode,
            barcodeQuality,
            molecularIndexes,
            molecularIndexQualities);
    ret.records[0] = firstOfPair;


    SAMRecord secondOfPair = null;

    if (isPairedEnd) {
        secondOfPair = createSamRecord(
                cluster.getRead(templateIndices[1]),
                readName,
                cluster.isPf(),
                false,
                unmatchedBarcode,
                barcodeQuality,
                molecularIndexes,
                molecularIndexQualities);
        ret.records[1] = secondOfPair;
    }

    if (adapterMarker != null) {
        // Clip the read
        if (isPairedEnd) {
            adapterMarker.adapterTrimIlluminaPairedReads(firstOfPair, secondOfPair);
        } else {
            adapterMarker.adapterTrimIlluminaSingleRead(firstOfPair);
        }
    }
    return ret;
}