htsjdk.samtools.fastq.FastqReader Java Examples

The following examples show how to use htsjdk.samtools.fastq.FastqReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
/**
 * Looks at fastq input(s) and attempts to determine the proper quality format
 *
 * Closes the reader(s) by side effect
 *
 * @param reader1 The first fastq input
 * @param reader2 The second fastq input, if necessary. To not use this input, set it to null
 * @param expectedQuality If provided, will be used for sanity checking. If left null, autodetection will occur
 */
public static FastqQualityFormat determineQualityFormat(final FastqReader reader1, final FastqReader reader2, final FastqQualityFormat expectedQuality) {
    final QualityEncodingDetector detector = new QualityEncodingDetector();

    if (reader2 == null) {
        detector.add(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, reader1);
    } else {
        detector.add(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, reader1, reader2);
        reader2.close();
    }

    reader1.close();

    final FastqQualityFormat qualityFormat =  detector.generateBestGuess(QualityEncodingDetector.FileContext.FASTQ, expectedQuality);
    if (detector.isDeterminationAmbiguous()) {
        LOG.warn("Making ambiguous determination about fastq's quality encoding; more than one format possible based on observed qualities.");
    }
    LOG.info(String.format("Auto-detected quality format as: %s.", qualityFormat));

    return qualityFormat;
}
 
Example #2
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
/** Returns read baseName and asserts correct pair read name format:
 * <ul>
 * <li> Paired reads must either have the exact same read names or they must contain at least one "/"
 * <li> and the First pair read name must end with "/1" and second pair read name ends with "/2"
 * <li> The baseName (read name part before the /) must be the same for both read names
 * <li> If the read names are exactly the same but end in "/2" or "/1" then an exception will be thrown
 * </ul>
 */
String getBaseName(final String readName1, final String readName2, final FastqReader freader1, final FastqReader freader2) {
    String [] toks = getReadNameTokens(readName1, 1, freader1);
    final String baseName1 = toks[0] ;
    final String num1 = toks[1] ;

    toks = getReadNameTokens(readName2, 2, freader2);
    final String baseName2 = toks[0] ;
    final String num2 = toks[1];

    if (!baseName1.equals(baseName2)) {
        throw new PicardException(String.format("In paired mode, read name 1 (%s) does not match read name 2 (%s)", baseName1,baseName2));
    }

    final boolean num1Blank = StringUtil.isBlank(num1);
    final boolean num2Blank = StringUtil.isBlank(num2);
    if (num1Blank || num2Blank) {
        if(!num1Blank) throw new PicardException(error(freader1,"Pair 1 number is missing (" +readName1+ "). Both pair numbers must be present or neither."));       //num1 != blank and num2   == blank
        else if(!num2Blank) throw new PicardException(error(freader2, "Pair 2 number is missing (" +readName2+ "). Both pair numbers must be present or neither.")); //num1 == blank and num =2 != blank
    } else {
        if (!num1.equals("1")) throw new PicardException(error(freader1,"Pair 1 number must be 1 ("+readName1+")"));
        if (!num2.equals("2")) throw new PicardException(error(freader2,"Pair 2 number must be 2 ("+readName2+")"));
    }

    return baseName1 ;
}
 
Example #3
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
/** Breaks up read name into baseName and number separated by the last / */
private String [] getReadNameTokens(final String readName, final int pairNum, final FastqReader freader) {
    if(readName.equals("")) throw new PicardException(error(freader,"Pair read name "+pairNum+" cannot be empty: "+readName));

    final int idx = readName.lastIndexOf('/');
    final String[] result = new String[2];

    if (idx == -1) {
        result[0] = readName;
        result[1] = null;
    } else {
        result[1] = readName.substring(idx+1, readName.length()); // should be a 1 or 2

        if(!result[1].equals("1") && !result[1].equals("2")) {    //if not a 1 or 2 then names must be identical
            result[0] = readName;
            result[1] = null;
        }
        else {
            result[0] = readName.substring(0,idx); // baseName
        }
    }

    return result ;
}
 
Example #4
Source File: FastqToSam.java    From picard with MIT License 5 votes vote down vote up
/** Creates a simple SAM file from a single fastq file. */
protected int doUnpaired(final FastqReader freader, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for ( ; freader.hasNext()  ; readCount++) {
        final FastqRecord frec = freader.next();
        final SAMRecord srec = createSamRecord(writer.getFileHeader(), SequenceUtil.getSamReadNameFromFastqHeader(frec.getReadHeader()) , frec, false) ;
        srec.setReadPairedFlag(false);
        writer.addAlignment(srec);
        progress.record(srec);
    }

    return readCount;
}
 
Example #5
Source File: FastqToSam.java    From picard with MIT License 5 votes vote down vote up
/** More complicated method that takes two fastq files and builds pairing information in the SAM. */
protected int doPaired(final FastqReader freader1, final FastqReader freader2, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for ( ; freader1.hasNext() && freader2.hasNext() ; readCount++) {
        final FastqRecord frec1 = freader1.next();
        final FastqRecord frec2 = freader2.next();

        final String frec1Name = SequenceUtil.getSamReadNameFromFastqHeader(frec1.getReadHeader());
        final String frec2Name = SequenceUtil.getSamReadNameFromFastqHeader(frec2.getReadHeader());
        final String baseName = getBaseName(frec1Name, frec2Name, freader1, freader2);

        final SAMRecord srec1 = createSamRecord(writer.getFileHeader(), baseName, frec1, true) ;
        srec1.setFirstOfPairFlag(true);
        srec1.setSecondOfPairFlag(false);
        writer.addAlignment(srec1);
        progress.record(srec1);

        final SAMRecord srec2 = createSamRecord(writer.getFileHeader(), baseName, frec2, true) ;
        srec2.setFirstOfPairFlag(false);
        srec2.setSecondOfPairFlag(true);
        writer.addAlignment(srec2);
        progress.record(srec2);
    }

    if (freader1.hasNext() || freader2.hasNext()) {
        throw new PicardException("Input paired fastq files must be the same length");
    }

    return readCount;
}
 
Example #6
Source File: SamToFastqTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "clippingTests")
public void testClipping(final String clippingAction, final String bases1_1, final String quals1_1, final String bases1_2, final String quals1_2,
                         final String bases2_1, final String quals2_1, final String bases2_2, final String quals2_2, final String testName) throws IOException {
    final File samFile = new File(TEST_DATA_DIR, CLIPPING_TEST_DATA) ;
    final File f1 = File.createTempFile("clippingtest1", "fastq");
    final File f2 = File.createTempFile("clippingtest2", "fastq");
    f1.deleteOnExit();
    f2.deleteOnExit();

    if (clippingAction != null) {
        convertFile(new String[]{
            "INPUT="            + samFile.getAbsolutePath(),
            "FASTQ="            + f1.getAbsolutePath(),
            "SECOND_END_FASTQ=" + f2.getAbsolutePath(),
            "CLIPPING_ACTION="  + clippingAction,
            "CLIPPING_ATTRIBUTE=" + "XT"
        });
    } else {
        convertFile(new String[]{
            "INPUT="            + samFile.getAbsolutePath(),
            "FASTQ="            + f1.getAbsolutePath(),
            "SECOND_END_FASTQ=" + f2.getAbsolutePath(),
        });
    }

    Iterator<FastqRecord> it = new FastqReader(f1).iterator();
    FastqRecord first = it.next();
    Assert.assertEquals(first.getReadString(), bases1_1, testName);
    Assert.assertEquals(first.getBaseQualityString(), quals1_1, testName);
    FastqRecord second = it.next();
    Assert.assertEquals(second.getReadString(), bases1_2, testName);
    Assert.assertEquals(second.getBaseQualityString(), quals1_2, testName);
    it = new FastqReader(f2).iterator();
    first = it.next();
    Assert.assertEquals(first.getReadString(), bases2_1, testName);
    Assert.assertEquals(first.getBaseQualityString(), quals2_1, testName);
    second = it.next();
    Assert.assertEquals(second.getReadString(), bases2_2, testName);
    Assert.assertEquals(second.getBaseQualityString(), quals2_2, testName);
}
 
Example #7
Source File: SamToFastqTest.java    From picard with MIT License 5 votes vote down vote up
@Test(dataProvider = "trimmedData")
public void testTrimming(final String samFilename, final int read1Trim,
                         final int read1MaxBases, final int expectedRead1Length, final int read2Trim,
                         final int read2MaxBases, final int expectedRead2Length) throws IOException {

    final File samFile = new File(TEST_DATA_DIR, samFilename);
    final File pair1File = newTempFastqFile("pair1");
    final File pair2File = newTempFastqFile("pair2");
    pair1File.deleteOnExit();
    pair2File.deleteOnExit();

    convertFile(new String[]{
          "INPUT=" + samFile.getAbsolutePath(),
          "FASTQ=" + pair1File.getAbsolutePath(),
          "SECOND_END_FASTQ=" + pair2File.getAbsolutePath(),
          "READ1_TRIM=" + read1Trim,
          "READ1_MAX_BASES_TO_WRITE=" + read1MaxBases,
          "READ2_TRIM=" + read2Trim,
          "READ2_MAX_BASES_TO_WRITE=" + read2MaxBases
    });

    for (final FastqRecord first : new FastqReader(pair1File)) {
        Assert.assertEquals(first.getReadString().length(), expectedRead1Length, "Incorrect read length");
        Assert.assertEquals(first.getBaseQualityString().length(), expectedRead1Length, "Incorrect quality string length");
    }
    for (final FastqRecord second : new FastqReader(pair2File)) {
        Assert.assertEquals(second.getReadString().length(), expectedRead2Length, "Incorrect read length");
        Assert.assertEquals(second.getBaseQualityString().length(), expectedRead2Length, "Incorrect quality string length");
    }
}
 
Example #8
Source File: SamToFastqTest.java    From picard with MIT License 5 votes vote down vote up
protected static Set<String> createFastqReadHeaderSet(final File file) {
    final Set<String> set = new HashSet<String>();
    final FastqReader freader = new FastqReader(file);
    while (freader.hasNext()) {
        final FastqRecord frec = freader.next();
        set.add(frec.getReadName());
    }
    return set ;
}
 
Example #9
Source File: PSFilterTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static final List<GATKRead> getReadsFromFastq(final File fastqFile) {
    try (final FastqReader reader = new FastqReader(fastqFile)) {
        final List<GATKRead> readList = new ArrayList<>();
        reader.forEachRemaining(read -> readList.add(fastqRecordToGATKRead(read)));
        return readList;
    }
}
 
Example #10
Source File: IlluminaBasecallsToFastq.java    From picard with MIT License 4 votes vote down vote up
@Override
public void setInputStream(final InputStream is) {
    reader = new FastqReader(new BufferedReader(new InputStreamReader(is)));
}
 
Example #11
Source File: FastqToSam.java    From picard with MIT License 4 votes vote down vote up
private FastqReader fileToFastqReader(final File file) {
    return new FastqReader(file, ALLOW_AND_IGNORE_EMPTY_LINES);
}
 
Example #12
Source File: FastqToSam.java    From picard with MIT License 4 votes vote down vote up
/** Little utility to give error messages corresponding to line numbers in the input files. */
private String error(final FastqReader freader, final String str) {
    return str +" at line "+freader.getLineNumber() +" in file "+freader.getFile().getAbsolutePath();
}
 
Example #13
Source File: FastqToSamTest.java    From picard with MIT License 4 votes vote down vote up
@BeforeClass
public static void beforeClass() throws IOException {
    final File dummyFile = newTempFile("dummy");
    freader1 = new FastqReader(dummyFile);
    freader2 = new FastqReader(dummyFile);
}
 
Example #14
Source File: PBSIMFastqReader.java    From varsim with BSD 2-Clause "Simplified" License 4 votes vote down vote up
public PBSIMFastqReader(final BufferedReader brRef, final BufferedReader brMAF, final BufferedReader brFastq, final boolean forceFiveBaseEncoding) throws IOException {
    maf = new MafReader(brMAF);
    fastq = new FastqReader(brFastq);
    idx2Chr = new Idx2Chr(brRef);
}
 
Example #15
Source File: FastqToSam.java    From picard with MIT License 2 votes vote down vote up
/**
 * Handles the FastqToSam execution on the FastqReader(s).
 *
 * In some circumstances it might be useful to circumvent the command line based instantiation of this
 * class, however note that there is no handholding or guardrails to running in this manner.
 *
 * It is the caller's responsibility to close the reader(s)
 *
 * @param reader1 The FastqReader for the first fastq file
 * @param reader2 The second FastqReader if applicable. Pass in null if only using a single reader
 * @param writer The SAMFileWriter where the new SAM file is written
 *
 */
public void makeItSo(final FastqReader reader1, final FastqReader reader2, final SAMFileWriter writer) {
    final int readCount = (reader2 == null) ?  doUnpaired(reader1, writer) : doPaired(reader1, reader2, writer);
    LOG.info("Processed " + readCount + " fastq reads");
}