htsjdk.samtools.util.CloserUtil Java Examples

The following examples show how to use htsjdk.samtools.util.CloserUtil. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MaskReferenceSequence.java    From Drop-seq with MIT License 6 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(this.REFERENCE_SEQUENCE);
	IOUtil.assertFileIsWritable(this.OUTPUT);
	// validate that an index is present for the reference sequence, since it's required.
	final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE, true, true);
	if (!ref.isIndexed())
		throw new IllegalStateException ("Input fasta must be indexed.  You can do this by using samtools faidx to create an index");

	FastaSequenceFileWriter writer = new FastaSequenceFileWriter(OUTPUT, OUTPUT_LINE_LENGTH);
	if (this.CONTIG_PATTERN_TO_IGNORE!=null && !this.CONTIG_PATTERN_TO_IGNORE.isEmpty()) processByWholeContig(ref, writer, this.CONTIG_PATTERN_TO_IGNORE);
	if (this.INTERVALS!=null) processByPartialContig(ref, writer, this.INTERVALS);

	CloserUtil.close(ref);
	CloserUtil.close(writer);
	return 0;

}
 
Example #2
Source File: MMapBackedIteratorFactory.java    From picard with MIT License 6 votes vote down vote up
private static ByteBuffer getBuffer(final File binaryFile) {
    final ByteBuffer buf;
    try {
        final FileInputStream is = new FileInputStream(binaryFile);
        final FileChannel channel = is.getChannel();
        final long fileSize = channel.size();
        buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize);
        buf.order(ByteOrder.LITTLE_ENDIAN);
        CloserUtil.close(channel);
        CloserUtil.close(is);
    } catch (IOException e) {
        throw new PicardException("IOException opening cluster binary file " + binaryFile, e);
    }

    return buf;
}
 
Example #3
Source File: CountUnmatchedSampleIndices.java    From Drop-seq with MIT License 6 votes vote down vote up
@Override
public Boolean call() throws Exception {
    for (final File barcodeFile: new IterableAdapter<>(barcodeFiles)) {
        LOG.info("Processing", barcodeFile);
        final TabbedInputParser parser = new TabbedInputParser(false, barcodeFile);
        for (final String[] row : parser) {
            ++totalReads;
            if ("N".equals(row[1])) {
                ++totalUnmatchedReads;
                map.computeIfAbsent(row[0], k -> new LongAdder()).increment();
            }
        }
        CloserUtil.close(parser);
    }
    return true;
}
 
Example #4
Source File: GatherReadQualityMetrics.java    From Drop-seq with MIT License 6 votes vote down vote up
public Map<String, ReadQualityMetrics> gatherMetrics(final File inputSamOrBamFile) {
	ProgressLogger p = new ProgressLogger(this.log);
       Map<String, ReadQualityMetrics> result = new TreeMap<>();

	ReadQualityMetrics globalMetrics = new ReadQualityMetrics(this.MINIMUM_MAPPING_QUALITY, this.GLOBAL, true);

	SamReader in = SamReaderFactory.makeDefault().open(INPUT);

	for (final SAMRecord r : in)
		if (!r.getReadFailsVendorQualityCheckFlag() || INCLUDE_NON_PF_READS) {
               p.record(r);

               globalMetrics.addRead(r);
               // gather per tag metrics if required.
               result = addMetricsPerTag(r, result);
           }

	CloserUtil.close(in);
	result.put(this.GLOBAL, globalMetrics);
	return (result);
}
 
Example #5
Source File: DetectBeadSynthesisErrors.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * For each problematic cell, replace cell barcodes positions with N.
 * Take the replaced bases and prepend them to the UMI, and trim the last <X> bases off the end of the UMI.
 */
private void cleanBAM (final Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions, final Map<String, String> intendedSequenceMap) {
	log.info("Cleaning BAM");
       final SamHeaderAndIterator headerAndIterator = SamFileMergeUtil.mergeInputs(INPUT, true);
	SamHeaderUtil.addPgRecord(headerAndIterator.header, this);

	SAMFileWriter writer= new SAMFileWriterFactory().setCreateIndex(CREATE_INDEX).makeSAMOrBAMWriter(headerAndIterator.header, true, OUTPUT);
	ProgressLogger pl = new ProgressLogger(log);
	for (SAMRecord r: new IterableAdapter<>(headerAndIterator.iterator)) {
		pl.record(r);
		r=padCellBarcodeFix(r, errorBarcodesWithPositions, intendedSequenceMap, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.EXTREME_BASE_RATIO);
		if (r!=null)
			writer.addAlignment(r);
	}
	CloserUtil.close(headerAndIterator.iterator);
	writer.close();
}
 
Example #6
Source File: SamFormatConverter.java    From picard with MIT License 6 votes vote down vote up
/**
 * Convert a file from one of sam/bam/cram format to another based on the extension of output.
 *
 * @param input             input file in one of sam/bam/cram format
 * @param output            output to write converted file to, the conversion is based on the extension of this filename
 * @param referenceSequence the reference sequence to use, necessary when reading/writing cram
 * @param createIndex       whether or not an index should be written alongside the output file
 */
public static void convert(final File input, final File output, final File referenceSequence, final Boolean createIndex) {
    IOUtil.assertFileIsReadable(input);
    IOUtil.assertFileIsWritable(output);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, output, referenceSequence);
    if (createIndex && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class));
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }
    CloserUtil.close(reader);
    writer.close();
}
 
Example #7
Source File: SequenceDictionaryIntersectionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testNoMatch() {
    final File leftDictFile = new File(TESTDATA_DIR, CHR_BASENAME + "sam");
    final SamReader leftDict = SamReaderFactory.makeDefault().open(leftDictFile);
    try {
        final SAMSequenceDictionary nonMatchingDict = new SAMSequenceDictionary(
                leftDict.getFileHeader().getSequenceDictionary().getSequences().stream().
                        map((s) -> new SAMSequenceRecord("xyz_" + s.getSequenceName(), s.getSequenceLength())).
                        collect(Collectors.toList()));
        SequenceDictionaryIntersection sdi = new SequenceDictionaryIntersection(
                leftDict, leftDictFile.getName(), nonMatchingDict, "with prefixes");
        Assert.assertEquals(sdi.getIntersection().size(), 0);
        System.out.println("Terse with descriptions: " + sdi.message(false));
        System.out.println("Verbose with descriptions: " + sdi.message(true));

        sdi = new SequenceDictionaryIntersection(leftDict, nonMatchingDict);
        Assert.assertEquals(sdi.getIntersection().size(), 0);
        System.out.println("Terse without descriptions: " + sdi.message(false) + "\n");
        System.out.println("Verbose without descriptions: " + sdi.message(true) + "\n");
    } finally {
        CloserUtil.close(leftDict);
    }
}
 
Example #8
Source File: ReplaceSamHeader.java    From picard with MIT License 6 votes vote down vote up
private void standardReheader(final SAMFileHeader replacementHeader) {
    final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT);
    if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) {
        throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() +
                ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
    }
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT);

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class));
    for (final SAMRecord rec : recordReader) {
        rec.setHeader(replacementHeader);
        writer.addAlignment(rec);
        progress.record(rec);
    }
    writer.close();
    CloserUtil.close(recordReader);
}
 
Example #9
Source File: GatherBamFiles.java    From picard with MIT License 6 votes vote down vote up
/**
 * Simple implementation of a gather operations that uses SAMFileReaders and Writers in order to concatenate
 * multiple BAM files.
 */
private static void gatherNormally(final List<File> inputs, final File output, final boolean createIndex, final boolean createMd5,
                                   final File referenceFasta) {
    final SAMFileHeader header;
    {
        header = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).getFileHeader(inputs.get(0));
    }

    final SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(createIndex).setCreateMd5File(createMd5).makeSAMOrBAMWriter(header, true, output);

    for (final File f : inputs) {
        log.info("Gathering " + f.getAbsolutePath());
        final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f);
        for (final SAMRecord rec : in) out.addAlignment(rec);
        CloserUtil.close(in);
    }

    out.close();
}
 
Example #10
Source File: BciFileFaker.java    From picard with MIT License 6 votes vote down vote up
public void fakeBciFile(final File bci, final List<Integer> expectedTiles) throws IOException {
    tiles = expectedTiles;
    final FileOutputStream fileOutputStream = new FileOutputStream(bci);
    final FileChannel channel = fileOutputStream.getChannel();
    final ByteBuffer buffer = ByteBuffer.allocate(8 * expectedTiles.size());
    buffer.order(ByteOrder.LITTLE_ENDIAN);

    fakeFile(buffer);
    buffer.flip();

    channel.write(buffer);
    channel.force(true);

    CloserUtil.close(channel);
    CloserUtil.close(fileOutputStream);
}
 
Example #11
Source File: TestUtils.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Test if two text files are the same, ignoring "#" character lines.
 * @param expected
 * @param actual
 * @return
 */
public static boolean testFilesSame (final File expected, final File actual) {
	TabbedInputParser e = new TabbedInputParser(true, expected);
	TabbedInputParser a = new TabbedInputParser(true, actual);
	while (e.hasNext() && a.hasNext()) {
		e.next();
		a.next();
		String le = e.getCurrentLine();
		String la = a.getCurrentLine();
		if (!le.equals(la)) {
			CloserUtil.close(e);
			CloserUtil.close(a);
			return false;
		}
	}
	CloserUtil.close(e);
	CloserUtil.close(a);
	// one of the files is incomplete.
	if (e.hasNext() || a.hasNext()) 
		return false;		
	return true;
}
 
Example #12
Source File: GTFReader.java    From Drop-seq with MIT License 6 votes vote down vote up
public OverlapDetector<GeneFromGTF> load() {
     final FilteringGTFParser parser = new FilteringGTFParser(gtfFlatFile);
     final GeneFromGTFBuilder geneBuilder = new GeneFromGTFBuilder(parser);
     CloserUtil.close(parser);
     final OverlapDetector<GeneFromGTF> overlapDetector = new OverlapDetector<>(0, 0);

     int longestInterval = 0;
     int numIntervalsOver1MB = 0;

     while (geneBuilder.hasNext())
try {
             // Can throw AnnotationException
             GeneFromGTF gene = geneBuilder.next();
             overlapDetector.addLhs(gene, gene);
             if (gene.length() > longestInterval) longestInterval = gene.length();
             if (gene.length() > 1000000) ++numIntervalsOver1MB;
         } catch (AnnotationException e) {
             LOG.info(e.getMessage() + " -- skipping");
         }
     LOG.debug("Longest gene: " + longestInterval + "; number of genes > 1MB: " + numIntervalsOver1MB);
     LOG.debug("Total number of genes loaded [" + overlapDetector.getAll().size() +"]");
     return overlapDetector;
 }
 
Example #13
Source File: AggregatedTagOrderIteratorTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test (enabled=true)
public void testGetCellBatch() {
	File f = new File("testdata/org/broadinstitute/transcriptome/barnyard/5cell3gene.bam");
	String cellTag = "ZC";
       final Iterator<List<SAMRecord>> iter = filterSortAndGroupByTagsAndQuality(f, cellTag);

	List<String>sortingTags = new ArrayList<String>();
	
	sortingTags.add(cellTag);

	while (iter.hasNext()) {
		List<SAMRecord> recs=iter.next();
		SAMRecord r = recs.iterator().next();
		int setSize = recs.size();
		String cell = r.getStringAttribute(cellTag);
		int expectedSize = getCellBatchExpectedSize(cell);
		
		Assert.assertTrue(testAllRecordsSamTags(recs, sortingTags, expectedSize, setSize));
		
		Assert.assertEquals(expectedSize, setSize);
	}
       CloserUtil.close(iter);
}
 
Example #14
Source File: DetectBeadSubstitutionErrorsTest.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
  * Very lame test -- just confirms that program doesn't crash and return 0 exit status.
  */
 @Test
 public void testBasic() throws IOException {
     final DetectBeadSubstitutionErrors clp = new DetectBeadSubstitutionErrors();
     clp.INPUT = Arrays.asList(TEST_FILE);
     clp.OUTPUT_REPORT = File.createTempFile("DetectBeadSubstitutionErrorsTest.", ".substitution_report.txt");
     clp.OUTPUT_REPORT.deleteOnExit();
     clp.OUTPUT = File.createTempFile("DetectBeadSubstitutionErrorsTest.", ".sam");
     clp.OUTPUT.deleteOnExit();
     clp.OUTPUT_SUMMARY = File.createTempFile("DetectBeadSubstitutionErrorsTest.", "substitution_summary.txt");
     clp.OUTPUT_SUMMARY.deleteOnExit();
     Assert.assertEquals(clp.doWork(), 0);
     final TabbedTextFileWithHeaderParser parser = new TabbedTextFileWithHeaderParser(clp.OUTPUT_REPORT);
     int numRows = 0;
     for (final TabbedTextFileWithHeaderParser.Row row : parser) {
         ++numRows;
         Assert.assertEquals(row.getField("intended_barcode"), BIG_BARCODE);
         final String neighbor_barcode = row.getField("neighbor_barcode");
         Assert.assertTrue(SMALL_BARCODES.contains(neighbor_barcode));
         Assert.assertTrue(Boolean.parseBoolean(row.getField("repaired")));
     }
     final SamReader reader = SamReaderFactory.makeDefault().open(clp.OUTPUT);
     for (final SAMRecord rec : reader)
Assert.assertEquals(rec.getStringAttribute("XC"), BIG_BARCODE);
     CloserUtil.close(reader);
 }
 
Example #15
Source File: CollapseBarcodesInPlace.java    From Drop-seq with MIT License 6 votes vote down vote up
public void processOnlyPrimary () {
       final SamHeaderAndIterator inputs = openInputs();
	CloseableIterator<SAMRecord> inputSam = inputs.iterator;
	SAMFileHeader header = inputs.header;
	header.addComment("Edit distance collapsed tag " +  this.PRIMARY_BARCODE + " to new tag " + this.OUT_BARCODE+ " with edit distance "+ this.EDIT_DISTANCE);
       SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, this.OUTPUT);

	// gather up the barcodes that exist in the BAM
       final SamHeaderAndIterator inputs2 = openInputs();
	ObjectCounter<String> barcodes = new BamTagHistogram().getBamTagCounts(inputs2.iterator, this.PRIMARY_BARCODE,this.MINIMUM_MAPPING_QUALITY, this.FILTER_PCR_DUPLICATES);
       CloserUtil.close(inputs2.iterator);

	// filter barcodes by #reds in each barcode.
	barcodes=filterBarcodesByNumReads(barcodes, this.MIN_NUM_READS_NONCORE);

	// collapse them
	Map<String, String> childParentBarcodes=collapseBarcodes(this.MIN_NUM_READS_CORE, this.NUM_CORE_BARCODES, barcodes, this.FIND_INDELS, this.EDIT_DISTANCE);
	// iterate through the reads and retag with the proper reads.
	// log.info("STUFF");
	retagReads(inputSam, writer, childParentBarcodes, this.PRIMARY_BARCODE, this.OUT_BARCODE);
	// collapsed.size();

	CloserUtil.close(inputSam);
	writer.close();
}
 
Example #16
Source File: BaseDistributionAtReadPosition.java    From Drop-seq with MIT License 6 votes vote down vote up
BaseDistributionMetricCollection gatherBaseQualities (final File input, final String tag) {
	ProgressLogger pl = new ProgressLogger(this.log);
	SamReader inputSam = SamReaderFactory.makeDefault().open(input);

	BaseDistributionMetricCollection c = new BaseDistributionMetricCollection();
	// MAIN_LOOP:
	for (final SAMRecord samRecord : inputSam) {
		pl.record(samRecord);
		if (samRecord.isSecondaryOrSupplementary()) continue;
		String b = samRecord.getStringAttribute(tag);
		if (b==null) continue;

		byte [] bases = b.getBytes();
		for (int i=0; i<bases.length; i++) {
			char base = (char) (bases[i]);
			int idx=i+1;
			c.addBase(base, idx);
		}
	}

	CloserUtil.close(inputSam);
	return (c);

}
 
Example #17
Source File: PeekableGroupingIteratorTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test(enabled=true)
public void testVsGrouping () {
	Comparator<String> cmp = String.CASE_INSENSITIVE_ORDER;

	String [] data = {"A", "A", "A", "B", "B", "C", "C", "C", "D"};

	GroupingIterator<String> gi = new GroupingIterator<>(Arrays.stream(data).iterator(), cmp);

	PeekableGroupingIterator<String> pi = new PeekableGroupingIterator<>(Arrays.stream(data).iterator(), cmp);

	while (gi.hasNext()){
		List<String> giResult = gi.next();
		List<String> piResult = new ArrayList<>();
		// load up the first result if available.
		if (pi.hasNext()) piResult.add(pi.next());
		// deal with subsequent entries in the group.
		while (pi.hasNextInGroup())
			piResult.add(pi.next());
		Assert.assertEquals(giResult.size(), piResult.size());
		Assert.assertEquals(giResult, piResult);
	}
	CloserUtil.close(gi);
	CloserUtil.close(pi);


}
 
Example #18
Source File: SortingIteratorFactory.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 *
 * @param componentType Required because of Java generic syntax limitations.
 * @param underlyingIterator All records are pulled from this iterator, which is then closed if closeable.
 * @param comparator Defines sort order.
 * @param codec For spilling to temp files
 * @param maxRecordsInRam
 * @param progressLogger Pass null if not interested in logging.
 * @return An iterator in the order defined by comparator, that will produce all the records from underlyingIterator.
 */
public static <T> CloseableIterator<T> create(final Class<T> componentType,
                                              final Iterator<T> underlyingIterator,
                                              final Comparator<T> comparator,
                                              final SortingCollection.Codec<T> codec,
                                              final int maxRecordsInRam,
                                              final ProgressCallback progressLogger) {

    SortingCollection<T> sortingCollection =
            SortingCollection.newInstance(componentType, codec, comparator, maxRecordsInRam);

    while (underlyingIterator.hasNext()) {
        final T rec = underlyingIterator.next();
        if (progressLogger != null)
progressLogger.logProgress(rec);
        sortingCollection.add(rec);
    }
    CloseableIterator<T> ret = sortingCollection.iterator();
    CloserUtil.close(underlyingIterator);
    return ret;
}
 
Example #19
Source File: DetectBeadSubstitutionErrors.java    From Drop-seq with MIT License 6 votes vote down vote up
private void repairBAM (final BottomUpCollapseResult result) {

		final SamHeaderAndIterator headerAndIterator = SamFileMergeUtil.mergeInputs(this.INPUT, true);
		SamHeaderUtil.addPgRecord(headerAndIterator.header, this);
		headerAndIterator.header.addComment("Bottom-up edit distance collapse tag " + this.CELL_BARCODE_TAG +" with edit distance " + this.EDIT_DISTANCE+ " filtering ambiguous neighbors=" + this.FILTER_AMBIGUOUS);
		SAMFileWriter writer= new SAMFileWriterFactory().setCreateIndex(CREATE_INDEX).makeSAMOrBAMWriter(headerAndIterator.header, true, OUTPUT);

		ProgressLogger pl = new ProgressLogger(log);
		log.info("Repairing BAM");
		for (SAMRecord r: new IterableAdapter<>(headerAndIterator.iterator)) {
			pl.record(r);
			r=repairBarcode(r, result);
			if (r!=null)
				writer.addAlignment(r);
		}
		CloserUtil.close(headerAndIterator.iterator);
		writer.close();
		log.info("Repair Complete");
	}
 
Example #20
Source File: SetNmMdAndUqTags.java    From picard with MIT License 6 votes vote down vote up
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);

    if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new SAMException("Input must be coordinate-sorted for this program to run. Found: " + reader.getFileHeader().getSortOrder());
    }

    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
    writer.setProgressLogger(
            new ProgressLogger(log, (int) 1e7, "Wrote", "records"));

    final ReferenceSequenceFileWalker refSeqWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);

    StreamSupport.stream(reader.spliterator(), false)
            .peek(rec -> fixRecord(rec, refSeqWalker))
            .forEach(writer::addAlignment);
    CloserUtil.close(reader);
    writer.close();
    return 0;
}
 
Example #21
Source File: ClusterIntensityFileReader.java    From picard with MIT License 6 votes vote down vote up
/**
 * Prepare to parse a CIF or CNF file.
 * @param file The file to be parsed.
 */
public ClusterIntensityFileReader(final File file) {
    try {
        this.file = file;
        final FileInputStream is = new FileInputStream(this.file);
        final FileChannel channel = is.getChannel();
        final long fileSize = channel.size();
        buf = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize);
        buf.order(ByteOrder.LITTLE_ENDIAN);
        CloserUtil.close(channel);
        CloserUtil.close(is);
        final byte [] headerBytes = new byte[HEADER_SIZE];
        buf.get(headerBytes);
        this.header = new ClusterIntensityFileHeader(headerBytes, this.file);
    } catch (IOException e) {
        throw new PicardException("IOException opening cluster intensity file " + file, e);
    }
    cycleSize = NUM_CHANNELS * header.numClusters * header.elementSize;
    channelSize = header.numClusters * header.elementSize;
}
 
Example #22
Source File: TileIndex.java    From picard with MIT License 6 votes vote down vote up
TileIndex(final File tileIndexFile) {
    try {
        this.tileIndexFile = tileIndexFile;
        final InputStream is = IOUtil.maybeBufferInputStream(new FileInputStream(tileIndexFile));
        final ByteBuffer buf = ByteBuffer.allocate(8);
        buf.order(ByteOrder.LITTLE_ENDIAN);
        int absoluteRecordIndex = 0;
        int numTiles = 0;
        while (readTileIndexRecord(buf.array(), buf.capacity(), is)) {
            buf.rewind();
            buf.limit(buf.capacity());
            final int tile = buf.getInt();
            // Note: not handling unsigned ints > 2^31, but could if one of these exceptions is thrown.
            if (tile < 0) throw new PicardException("Tile number too large in " + tileIndexFile.getAbsolutePath());
            final int numClusters = buf.getInt();
            if (numClusters < 0) throw new PicardException("Cluster size too large in " + tileIndexFile.getAbsolutePath());
            tiles.add(new TileIndexRecord(tile, numClusters, absoluteRecordIndex, numTiles++));
            absoluteRecordIndex += numClusters;
        }
        CloserUtil.close(is);
    } catch (final IOException e) {
        throw new PicardException("Problem reading " + tileIndexFile.getAbsolutePath(), e);
    }
}
 
Example #23
Source File: TrimStartingSequenceTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test
public void testTrimStartingSequenceClp() throws IOException {
    final TrimStartingSequence clp = new TrimStartingSequence();
    clp.INPUT = INPUT;
    clp.OUTPUT = File.createTempFile("TrimStartingSequenceTest.", ".sam");
    clp.OUTPUT.deleteOnExit();
    clp.OUTPUT_SUMMARY = File.createTempFile("TrimStartingSequenceTest.", ".summary");
    clp.OUTPUT_SUMMARY.deleteOnExit();
    clp.SEQUENCE = "AAGCAGTGGTATCAACGCAGAGTGAATGGG";
    clp.MISMATCHES=0;
    clp.NUM_BASES=5;
    Assert.assertEquals(clp.doWork(), 0);

    // Confirm that the expected stuff is trimmed.
    final SamReader outputReader = SamReaderFactory.makeDefault().open(clp.OUTPUT);
    final SAMRecordIterator outputIterator = outputReader.iterator();
    final SamReader inputReader = SamReaderFactory.makeDefault().open(clp.INPUT);
    for (final SAMRecord inputRec: inputReader) {
        Assert.assertTrue(outputIterator.hasNext());
        final SAMRecord outputRec = outputIterator.next();
        final String inputBases = inputRec.getReadString();
        final String outputBases = outputRec.getReadString();
        Assert.assertTrue(inputBases.endsWith(outputBases));
        final String trimmedBases = inputBases.substring(0, inputBases.length() - outputBases.length());
        Assert.assertTrue(clp.SEQUENCE.endsWith(trimmedBases));
    }
    Assert.assertFalse(outputIterator.hasNext());
    CloserUtil.close(Arrays.asList(outputReader, inputReader));
}
 
Example #24
Source File: CleanSam.java    From picard with MIT License 5 votes vote down vote up
/**
 * Do the work after command line has been parsed.
 * RuntimeException may be thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE);
    if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) {
        factory.validationStringency(ValidationStringency.LENIENT);
    }
    final SamReader reader = factory.open(INPUT);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
    final CloseableIterator<SAMRecord> it = reader.iterator();
    final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class));

    // If the read (or its mate) maps off the end of the alignment, clip it
    while (it.hasNext()) {
        final SAMRecord rec = it.next();

        // If the read (or its mate) maps off the end of the alignment, clip it
        AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec);

        // check the read's mapping quality
        if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) {
            rec.setMappingQuality(0);
        }

        writer.addAlignment(rec);
        progress.record(rec);
    }

    writer.close();
    it.close();
    CloserUtil.close(reader);
    return 0;
}
 
Example #25
Source File: SequenceDictionaryIntersectionTest.java    From Drop-seq with MIT License 5 votes vote down vote up
@Test(expectedExceptions = IllegalArgumentException.class)
public void testBadObject() {
    final File leftDictFile = new File(TESTDATA_DIR, CHR_BASENAME + "sam");
    final SamReader leftDict = SamReaderFactory.makeDefault().open(leftDictFile);
    try {
        SequenceDictionaryIntersection sdi = new SequenceDictionaryIntersection(
                leftDict, leftDictFile.getName(), "Just a string", "not a sequence dictionary");

    } finally {
        CloserUtil.close(leftDict);;
    }
}
 
Example #26
Source File: DetectBeadSubstitutionErrors.java    From Drop-seq with MIT License 5 votes vote down vote up
private void writeReport (final BottomUpCollapseResult result, final BottomUpCollapseResult resultClean, final UMIsPerCellResult umiResult) {
	PrintStream outReport = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.OUTPUT_REPORT));

	// write comments section, each line starts with a "#"
	outReport.println("# FILTER_AMBIGUOUS="+FILTER_AMBIGUOUS);
	outReport.println("# MIN_UMIS_PER_CELL="+MIN_UMIS_PER_CELL);
	outReport.println("# UMI_BIAS_THRESHOLD="+MIN_UMIS_PER_CELL);
	outReport.println("# EDIT_DISTANCE="+MIN_UMIS_PER_CELL);
	outReport.println("#");
	outReport.println("# TOTAL_BARCODES_TESTED="+umiResult.getNumCellBarocodesTested());
	outReport.println("# BARCODES_COLLAPSED="+result.getUnambiguousSmallBarcodes().size());
	outReport.println("# ESTIMATED_UMIS_COLLAPSED="+getTotalAmbiguousUMIs(result.getUnambiguousSmallBarcodes(), umiResult.getUmisPerCell()));
	outReport.println("# AMBIGUOUS_BARCODES="+result.getAmbiguousBarcodes().size());
	outReport.println("# ESTIMATED_AMBIGUOUS_UMIS="+getTotalAmbiguousUMIs(result.getAmbiguousBarcodes(), umiResult.getUmisPerCell()));
	outReport.println("# POLY_T_BIASED_BARCODES="+umiResult.getPolyTBiasedBarcodes());
	outReport.println("# POLY_T_BIASED_BARRCODE_UMIS="+umiResult.getPolyTBiasedUMIs());
	outReport.println("# POLY_T_POSITION="+umiResult.getPolyTPosition());

	/// write header
	String [] header= {"intended_barcode", "neighbor_barcode", "intended_size", "neighbor_size", "position", "intended_base", "neighbor_base", "repaired"};
	outReport.println(StringUtil.join("\t", header));

	ObjectCounter<String> umiCounts=umiResult.getUmisPerCell();
	Iterator<String> smalls = result.getUnambiguousSmallBarcodes().iterator();
	while (smalls.hasNext()) {
		String small=smalls.next();
		String large = result.getLargerRelatedBarcode(small);
		BarcodeSubstitutionPair p = new BarcodeSubstitutionPair(large, small);
		String cleanLarger = resultClean.getLargerRelatedBarcode(small);
		boolean repaired = cleanLarger!=null;

		String [] body = {large, small, Integer.toString(umiCounts.getCountForKey(large)), Integer.toString(umiCounts.getCountForKey(small)),
				Integer.toString(p.getPosition()+1), p.getIntendedBase(), p.getNeighborBase(), Boolean.toString(repaired).toUpperCase()};
		outReport.println(StringUtil.join("\t", body));
	}

	CloserUtil.close(outReport);
}
 
Example #27
Source File: AggregatedTagOrderIteratorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
/****
 * SOME TESTS BY #READS ON A LARGER BAM
 */

@Test (enabled=true)
public void testGetCellGeneBatch() {
	File f = new File("testdata/org/broadinstitute/transcriptome/barnyard/5cell3gene.bam");
       String cellTag = "ZC";
       String geneExonTag = "GE";
       final Iterator<List<SAMRecord>> iter = filterSortAndGroupByTagsAndQuality(f, geneExonTag, cellTag);

	List<String>sortingTags = new ArrayList<String>();

	sortingTags.add(geneExonTag);
	sortingTags.add(cellTag);


	while (iter.hasNext()) {
		Collection<SAMRecord> recs=iter.next();
		SAMRecord r = recs.iterator().next();
		int setSize = recs.size();
		String cell = r.getStringAttribute(cellTag);
		String geneExon = r.getStringAttribute(geneExonTag); 
		int expectedSize = getCellGeneBatchExpectedSize(geneExon, cell);
		Assert.assertTrue(testAllRecordsSamTags(recs, sortingTags, expectedSize, setSize));
		Assert.assertEquals(expectedSize, setSize);
	}
	CloserUtil.close(iter);
}
 
Example #28
Source File: MultiLevelCollectorTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "variedAccumulationLevels")
public void multilevelCollectorTest(final Set<MetricAccumulationLevel> accumulationLevels) {
    final SamReader in = SamReaderFactory.makeDefault().open(TESTFILE);
    final RecordCountMultiLevelCollector collector = new RecordCountMultiLevelCollector(accumulationLevels, in.getFileHeader().getReadGroups());

    for (final SAMRecord rec : in) {
        collector.acceptRecord(rec, null);
    }

    collector.finish();

    int totalProcessed = 0;
    int totalMetrics = 0;
    for(final MetricAccumulationLevel level : accumulationLevels) {
        final Map<String, Integer> keyToMetrics = accumulationLevelToPerUnitReads.get(level);
        for(final Map.Entry<String, Integer> entry : keyToMetrics.entrySet()) {
            final TotalNumberMetric metric = collector.getUnitsToMetrics().get(entry.getKey());
            Assert.assertEquals(entry.getValue(), metric.TALLY);
            Assert.assertTrue(metric.FINISHED);
            totalProcessed += metric.TALLY;
            totalMetrics   += 1;
        }
    }

    Assert.assertEquals(collector.getUnitsToMetrics().size(), totalMetrics);
    Assert.assertEquals(totalProcessed, collector.getNumProcessed());
    CloserUtil.close(in);
}
 
Example #29
Source File: CreateSnpIntervalFromVcf.java    From Drop-seq with MIT License 5 votes vote down vote up
private static SAMSequenceDictionary getSequenceDictionary(final File sd) {
	SamReader reader = SamReaderFactory.makeDefault().open(sd);
	SAMSequenceDictionary dict = reader.getFileHeader().getSequenceDictionary();
	// final SAMFileReader r = new SAMFileReader(sd);
	// SAMSequenceDictionary dict = r.getFileHeader().getSequenceDictionary();
	CloserUtil.close(reader);

	return dict;
}
 
Example #30
Source File: MatrixMarketReader.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
public void close() throws IOException {
    if (!closed) {
        closed = true;
        CloserUtil.close(reader);
    }
}