htsjdk.samtools.fastq.FastqRecord Java Examples

The following examples show how to use htsjdk.samtools.fastq.FastqRecord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
private SAMRecord createSamRecord(final SAMFileHeader header, final String baseName, final FastqRecord frec, final boolean paired) {
    final SAMRecord srec = new SAMRecord(header);
    srec.setReadName(baseName);
    srec.setReadString(frec.getReadString());
    srec.setReadUnmappedFlag(true);
    srec.setAttribute(ReservedTagConstants.READ_GROUP_ID, READ_GROUP_NAME);
    final byte[] quals = StringUtil.stringToBytes(frec.getBaseQualityString());
    convertQuality(quals, QUALITY_FORMAT);
    for (final byte qual : quals) {
        final int uQual = qual & 0xff;
        if (uQual < MIN_Q || uQual > MAX_Q) {
            throw new PicardException("Base quality " + uQual + " is not in the range " + MIN_Q + ".." +
            MAX_Q + " for read " + frec.getReadHeader());
        }
    }
    srec.setBaseQualities(quals);

    if (paired) {
        srec.setReadPairedFlag(true);
        srec.setMateUnmappedFlag(true);
    }
    return srec ;
}
 
Example #2
Source File: SamToFastqWithTags.java    From picard with MIT License 6 votes vote down vote up
private void writeTagRecords(final SAMRecord read, final Integer mateNumber, final List<FastqWriter> tagWriters) {
    if (SEQUENCE_TAG_GROUP.isEmpty()) {
        return;
    }

    final String seqHeader = mateNumber == null ? read.getReadName() : read.getReadName() + "/" + mateNumber;

    for (int i = 0; i < SEQUENCE_TAG_GROUP.size(); i++) {
        final String tmpTagSep = SPLIT_SEPARATOR_TAGS.get(i);
        final String[] sequenceTagsToWrite = SPLIT_SEQUENCE_TAGS.get(i);
        final String newSequence = String.join(tmpTagSep, Arrays.stream(sequenceTagsToWrite)
                .map(tag -> assertTagExists(read, tag))
                .collect(Collectors.toList()));

        final String tmpQualSep = StringUtils.repeat(TAG_SPLIT_QUAL, tmpTagSep.length());
        final String[] qualityTagsToWrite = SPLIT_QUALITY_TAGS.get(i);
        final String newQual = QUALITY_TAG_GROUP.isEmpty() ? StringUtils.repeat(TAG_SPLIT_QUAL, newSequence.length()) :
                String.join(tmpQualSep, Arrays.stream(qualityTagsToWrite)
                        .map(tag -> assertTagExists(read, tag))
                        .collect(Collectors.toList()));
        FastqWriter writer = tagWriters.get(i);
        writer.write(new FastqRecord(seqHeader, newSequence, "", newQual));
    }
}
 
Example #3
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 5 votes vote down vote up
private void makeFastqRecords(final FastqRecord[] recs, final int[] indices,
                              final ClusterData cluster, final boolean appendReadNumberSuffix) {
    for (short i = 0; i < indices.length; ++i) {
        final ReadData readData = cluster.getRead(indices[i]);
        final String readBases = StringUtil.bytesToString(readData.getBases()).replace('.', 'N');
        final String readName = readNameEncoder.generateReadName(cluster, appendReadNumberSuffix ? i + 1 : null);
        recs[i] = new FastqRecord(
                readName,
                readBases,
                null,
                SAMUtils.phredToFastq(readData.getQualities())
        );
    }
}
 
Example #4
Source File: FastqToSam.java    From picard with MIT License 5 votes vote down vote up
/** Creates a simple SAM file from a single fastq file. */
protected int doUnpaired(final FastqReader freader, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for ( ; freader.hasNext()  ; readCount++) {
        final FastqRecord frec = freader.next();
        final SAMRecord srec = createSamRecord(writer.getFileHeader(), SequenceUtil.getSamReadNameFromFastqHeader(frec.getReadHeader()) , frec, false) ;
        srec.setReadPairedFlag(false);
        writer.addAlignment(srec);
        progress.record(srec);
    }

    return readCount;
}
 
Example #5
Source File: FastqToSam.java    From picard with MIT License 5 votes vote down vote up
/** More complicated method that takes two fastq files and builds pairing information in the SAM. */
protected int doPaired(final FastqReader freader1, final FastqReader freader2, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for ( ; freader1.hasNext() && freader2.hasNext() ; readCount++) {
        final FastqRecord frec1 = freader1.next();
        final FastqRecord frec2 = freader2.next();

        final String frec1Name = SequenceUtil.getSamReadNameFromFastqHeader(frec1.getReadHeader());
        final String frec2Name = SequenceUtil.getSamReadNameFromFastqHeader(frec2.getReadHeader());
        final String baseName = getBaseName(frec1Name, frec2Name, freader1, freader2);

        final SAMRecord srec1 = createSamRecord(writer.getFileHeader(), baseName, frec1, true) ;
        srec1.setFirstOfPairFlag(true);
        srec1.setSecondOfPairFlag(false);
        writer.addAlignment(srec1);
        progress.record(srec1);

        final SAMRecord srec2 = createSamRecord(writer.getFileHeader(), baseName, frec2, true) ;
        srec2.setFirstOfPairFlag(false);
        srec2.setSecondOfPairFlag(true);
        writer.addAlignment(srec2);
        progress.record(srec2);
    }

    if (freader1.hasNext() || freader2.hasNext()) {
        throw new PicardException("Input paired fastq files must be the same length");
    }

    return readCount;
}
 
Example #6
Source File: SamToFastqTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "clippingTests")
public void testClipping(final String clippingAction, final String bases1_1, final String quals1_1, final String bases1_2, final String quals1_2,
                         final String bases2_1, final String quals2_1, final String bases2_2, final String quals2_2, final String testName) throws IOException {
    final File samFile = new File(TEST_DATA_DIR, CLIPPING_TEST_DATA) ;
    final File f1 = File.createTempFile("clippingtest1", "fastq");
    final File f2 = File.createTempFile("clippingtest2", "fastq");
    f1.deleteOnExit();
    f2.deleteOnExit();

    if (clippingAction != null) {
        convertFile(new String[]{
            "INPUT="            + samFile.getAbsolutePath(),
            "FASTQ="            + f1.getAbsolutePath(),
            "SECOND_END_FASTQ=" + f2.getAbsolutePath(),
            "CLIPPING_ACTION="  + clippingAction,
            "CLIPPING_ATTRIBUTE=" + "XT"
        });
    } else {
        convertFile(new String[]{
            "INPUT="            + samFile.getAbsolutePath(),
            "FASTQ="            + f1.getAbsolutePath(),
            "SECOND_END_FASTQ=" + f2.getAbsolutePath(),
        });
    }

    Iterator<FastqRecord> it = new FastqReader(f1).iterator();
    FastqRecord first = it.next();
    Assert.assertEquals(first.getReadString(), bases1_1, testName);
    Assert.assertEquals(first.getBaseQualityString(), quals1_1, testName);
    FastqRecord second = it.next();
    Assert.assertEquals(second.getReadString(), bases1_2, testName);
    Assert.assertEquals(second.getBaseQualityString(), quals1_2, testName);
    it = new FastqReader(f2).iterator();
    first = it.next();
    Assert.assertEquals(first.getReadString(), bases2_1, testName);
    Assert.assertEquals(first.getBaseQualityString(), quals2_1, testName);
    second = it.next();
    Assert.assertEquals(second.getReadString(), bases2_2, testName);
    Assert.assertEquals(second.getBaseQualityString(), quals2_2, testName);
}
 
Example #7
Source File: SamToFastqTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "trimmedData")
public void testTrimming(final String samFilename, final int read1Trim,
                         final int read1MaxBases, final int expectedRead1Length, final int read2Trim,
                         final int read2MaxBases, final int expectedRead2Length) throws IOException {

    final File samFile = new File(TEST_DATA_DIR, samFilename);
    final File pair1File = newTempFastqFile("pair1");
    final File pair2File = newTempFastqFile("pair2");
    pair1File.deleteOnExit();
    pair2File.deleteOnExit();

    convertFile(new String[]{
          "INPUT=" + samFile.getAbsolutePath(),
          "FASTQ=" + pair1File.getAbsolutePath(),
          "SECOND_END_FASTQ=" + pair2File.getAbsolutePath(),
          "READ1_TRIM=" + read1Trim,
          "READ1_MAX_BASES_TO_WRITE=" + read1MaxBases,
          "READ2_TRIM=" + read2Trim,
          "READ2_MAX_BASES_TO_WRITE=" + read2MaxBases
    });

    for (final FastqRecord first : new FastqReader(pair1File)) {
        Assert.assertEquals(first.getReadString().length(), expectedRead1Length, "Incorrect read length");
        Assert.assertEquals(first.getBaseQualityString().length(), expectedRead1Length, "Incorrect quality string length");
    }
    for (final FastqRecord second : new FastqReader(pair2File)) {
        Assert.assertEquals(second.getReadString().length(), expectedRead2Length, "Incorrect read length");
        Assert.assertEquals(second.getBaseQualityString().length(), expectedRead2Length, "Incorrect quality string length");
    }
}
 
Example #8
Source File: SamToFastqTest.java    From picard with MIT License 5 votes vote down vote up
protected static Set<String> createFastqReadHeaderSet(final File file) {
    final Set<String> set = new HashSet<String>();
    final FastqReader freader = new FastqReader(file);
    while (freader.hasNext()) {
        final FastqRecord frec = freader.next();
        set.add(frec.getReadName());
    }
    return set ;
}
 
Example #9
Source File: PSFilterTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static final GATKRead fastqRecordToGATKRead(final FastqRecord rec) {
    final GATKRead read = new SAMRecordToGATKReadAdapter(new SAMRecord(null));
    read.setName(rec.getReadName());
    read.setBaseQualities(rec.getBaseQualities());
    read.setBases(rec.getReadBases());
    return read;
}
 
Example #10
Source File: PBSIMFastqReader.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * get the next entry of PBSim output
 *
 * @return null if no more entries, otherwise a new instance of SimulatedReadPair (read1 is valid and read2 is null)
 */
@Override
public SimulatedReadPair getNextReadPair() throws IOException {
    if (fastq.hasNext()) {
        if (!maf.hasNext()) throw new RuntimeException(); //should formally use zipped iterator

        FastqRecord fastqEntry = fastq.next();
        MafRecord mafEntry = maf.next();

        SimulatedRead read = new SimulatedRead();
        read.fragment = 1; //always read-1 since there is no pair-ended-ness
        read.setReadId(fastqEntry.getReadHeader());

        read.sequence = fastqEntry.getReadString();
        read.quality = fastqEntry.getBaseQualityString();

        if (mafEntry.size() != 2) throw new RuntimeException("unexpected MAF data");
        if (!mafEntry.get(0).src.equals("ref")) throw new RuntimeException("unexpected MAF data");
        if (!mafEntry.get(1).src.equals(read.getReadId())) throw new RuntimeException("unmatched read names");

        //read name is S%d_%d, where the first integer is CHR index and second integer is read number
        final String[] tags = read.getReadId().substring(1).split("_");
        if (tags.length != 2) throw new RuntimeException("unexpected MAF data");

        final GenomeLocation loc = new GenomeLocation(
                idx2Chr.get(Integer.parseInt(tags[0])),
                mafEntry.get(0).start0 + 1, // 0-base to 1-base conversion
                mafEntry.get(0).strand ? 0 : 1); // MAF's "+" maps to 0, "-" to 1

        read.locs1.add(loc);
        read.origLocs1.add(loc);
        read.alignedBases1 = mafEntry.get(1).size;

        return new SimulatedReadPair(read);
    } else {
        return null;
    }
}
 
Example #11
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 4 votes vote down vote up
private void write(final FastqWriter[] writers, final FastqRecord[] records) {
    for (int i = 0; i < writers.length; ++i) {
        writers[i].write(records[i]);
    }
}
 
Example #12
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 4 votes vote down vote up
FastqRecordsForCluster(final int numTemplates, final int numSampleBarcodes, final int numMolecularBarcodes) {
    templateRecords = new FastqRecord[numTemplates];
    sampleBarcodeRecords = new FastqRecord[numSampleBarcodes];
    molecularBarcodeRecords = new FastqRecord[numMolecularBarcodes];
}
 
Example #13
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 4 votes vote down vote up
private void encodeArray(final FastqRecord[] recs) {
    for (final FastqRecord rec : recs) {
        writer.write(rec);
    }
}
 
Example #14
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 4 votes vote down vote up
private void decodeArray(final FastqRecord[] recs) {
    for (int i = 0; i < recs.length; ++i) {
        recs[i] = reader.next();
    }
}
 
Example #15
Source File: SamToFastq.java    From picard with MIT License 4 votes vote down vote up
private void writeRecord(final SAMRecord read, final Integer mateNumber, final FastqWriter writer,
                         final int basesToTrim, final Integer maxBasesToWrite) {
    final String seqHeader = mateNumber == null ? read.getReadName() : read.getReadName() + "/" + mateNumber;
    String readString = read.getReadString();
    String baseQualities = read.getBaseQualityString();

    // If we're clipping, do the right thing to the bases or qualities
    if (CLIPPING_ATTRIBUTE != null) {
        Integer clipPoint = (Integer) read.getAttribute(CLIPPING_ATTRIBUTE);
        if (clipPoint != null && clipPoint < CLIPPING_MIN_LENGTH) {
            clipPoint = Math.min(readString.length(), CLIPPING_MIN_LENGTH);
        }

        if (clipPoint != null) {
            if (CLIPPING_ACTION.equalsIgnoreCase(CLIP_TRIM)) {
                readString = clip(readString, clipPoint, null, !read.getReadNegativeStrandFlag());
                baseQualities = clip(baseQualities, clipPoint, null, !read.getReadNegativeStrandFlag());
            } else if (CLIPPING_ACTION.equalsIgnoreCase(CLIP_TO_N)) {
                readString = clip(readString, clipPoint, CLIP_TO_N.charAt(0), !read.getReadNegativeStrandFlag());
            } else {
                final char newQual = SAMUtils.phredToFastq(new byte[]{(byte) Integer.parseInt(CLIPPING_ACTION)}).charAt(0);
                baseQualities = clip(baseQualities, clipPoint, newQual, !read.getReadNegativeStrandFlag());
            }
        }
    }

    if (RE_REVERSE && read.getReadNegativeStrandFlag()) {
        readString = SequenceUtil.reverseComplement(readString);
        baseQualities = StringUtil.reverseString(baseQualities);
    }

    if (basesToTrim > 0) {
        readString = readString.substring(basesToTrim);
        baseQualities = baseQualities.substring(basesToTrim);
    }

    // Perform quality trimming if desired, making sure to leave at least one base!
    if (QUALITY != null) {
        final byte[] quals = SAMUtils.fastqToPhred(baseQualities);
        final int qualityTrimIndex = Math.max(1, TrimmingUtil.findQualityTrimPoint(quals, QUALITY));
        if (qualityTrimIndex < quals.length) {
            readString = readString.substring(0, qualityTrimIndex);
            baseQualities = baseQualities.substring(0, qualityTrimIndex);
        }
    }

    if (maxBasesToWrite != null && maxBasesToWrite < readString.length()) {
        readString = readString.substring(0, maxBasesToWrite);
        baseQualities = baseQualities.substring(0, maxBasesToWrite);
    }

    writer.write(new FastqRecord(seqHeader, readString, "", baseQualities));
}