Java Code Examples for htsjdk.samtools.util.ProgressLogger

The following examples show how to use htsjdk.samtools.util.ProgressLogger. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Drop-seq   Source File: EnhanceGTFRecords.java    License: MIT License 6 votes vote down vote up
/**
 * @param records GTFRecords, potentially from many genes
 * @return The input records, plus gene, transcript, intron and conserved_intron records.
 */
public List<GTFRecord> enhanceGTFRecords (Iterator<GTFRecord> records) {
       final ProgressLogger progressLogger = new ProgressLogger(LOG, 100, "enhancing", "genes");
       List<GTFRecord> result = new ArrayList<>();
       final GeneFromGTFBuilder geneBuilder = new GeneFromGTFBuilder(records);
       while (geneBuilder.hasNext()) {
           try {
               progressLogger.record();
               GeneFromGTF gene = geneBuilder.next();
               progressLogger.record(gene.getName(), 0);
               LOG.debug("Enhancing Gene [" + gene.getName() + "]");
               result.addAll(enhanceGene(gene));
           } catch (AnnotationException e) {
               LOG.info(e.getMessage() + " -- skipping");
           }
       }
	return result;
}
 
Example 2
Source Project: Drop-seq   Source File: GatherReadQualityMetrics.java    License: MIT License 6 votes vote down vote up
public Map<String, ReadQualityMetrics> gatherMetrics(final File inputSamOrBamFile) {
	ProgressLogger p = new ProgressLogger(this.log);
       Map<String, ReadQualityMetrics> result = new TreeMap<>();

	ReadQualityMetrics globalMetrics = new ReadQualityMetrics(this.MINIMUM_MAPPING_QUALITY, this.GLOBAL, true);

	SamReader in = SamReaderFactory.makeDefault().open(INPUT);

	for (final SAMRecord r : in)
		if (!r.getReadFailsVendorQualityCheckFlag() || INCLUDE_NON_PF_READS) {
               p.record(r);

               globalMetrics.addRead(r);
               // gather per tag metrics if required.
               result = addMetricsPerTag(r, result);
           }

	CloserUtil.close(in);
	result.put(this.GLOBAL, globalMetrics);
	return (result);
}
 
Example 3
Source Project: Drop-seq   Source File: BamTagHistogram.java    License: MIT License 6 votes vote down vote up
public ObjectCounter<String> getBamTagCounts (final Iterator<SAMRecord> iterator, final String tag, final int readQuality, final boolean filterPCRDuplicates) {
    ProgressLogger pl = new ProgressLogger(log, 10000000);

    ObjectCounter<String> counter = new ObjectCounter<>();

    for (final SAMRecord r : new IterableAdapter<>(iterator)) {
        pl.record(r);
        if (filterPCRDuplicates && r.getDuplicateReadFlag()) continue;
        if (r.getMappingQuality()<readQuality) continue;
        if (r.isSecondaryOrSupplementary()) continue;
        //String s1 = r.getStringAttribute(tag);
        String s1 = getAnyTagAsString(r, tag);
        if (s1!=null && s1!="") counter.increment(s1); // if the tag doesn't have a value, don't increment it.

    }
    return (counter);
}
 
Example 4
Source Project: Drop-seq   Source File: SingleCellRnaSeqMetricsCollector.java    License: MIT License 6 votes vote down vote up
RnaSeqMetricsCollector getRNASeqMetricsCollector(final String cellBarcodeTag, final List<String> cellBarcodes, final File inBAM,
  		final RnaSeqMetricsCollector.StrandSpecificity strand, final double rRNAFragmentPCT, final int readMQ,
  		final File annotationsFile, final File rRNAIntervalsFile) {

  	CollectorFactory factory = new CollectorFactory(inBAM, strand, rRNAFragmentPCT, annotationsFile, rRNAIntervalsFile);
RnaSeqMetricsCollector collector=  factory.getCollector(cellBarcodes);
List<SAMReadGroupRecord> rg = factory.getReadGroups(cellBarcodes);

      // iterate by cell barcodes.  Skip all the reads without cell barcodes.
CloseableIterator<SAMRecord> iter = getReadsInTagOrder (inBAM, cellBarcodeTag, rg, cellBarcodes, readMQ);

      ProgressLogger p = new ProgressLogger(log, 1000000, "Accumulating metrics");
while (iter.hasNext()) {
	SAMRecord r = iter.next();
	String cellBarcode = r.getStringAttribute(cellBarcodeTag);
	r.setAttribute("RG", cellBarcode);
          p.record(r);
   	collector.acceptRecord(r, null);
}

collector.finish();
return (collector);
  }
 
Example 5
Source Project: Drop-seq   Source File: BaseDistributionAtReadPosition.java    License: MIT License 6 votes vote down vote up
BaseDistributionMetricCollection gatherBaseQualities (final File input, final String tag) {
	ProgressLogger pl = new ProgressLogger(this.log);
	SamReader inputSam = SamReaderFactory.makeDefault().open(input);

	BaseDistributionMetricCollection c = new BaseDistributionMetricCollection();
	// MAIN_LOOP:
	for (final SAMRecord samRecord : inputSam) {
		pl.record(samRecord);
		if (samRecord.isSecondaryOrSupplementary()) continue;
		String b = samRecord.getStringAttribute(tag);
		if (b==null) continue;

		byte [] bases = b.getBytes();
		for (int i=0; i<bases.length; i++) {
			char base = (char) (bases[i]);
			int idx=i+1;
			c.addBase(base, idx);
		}
	}

	CloserUtil.close(inputSam);
	return (c);

}
 
Example 6
Source Project: Drop-seq   Source File: FilterBamByTag.java    License: MIT License 6 votes vote down vote up
/**
 * Just work through the reads one at a time.
 * @param in
 * @param out
 */
void processUnpairedMode (final SamReader in, final SAMFileWriter out, final Set<String> values, final File summaryFile, Integer mapQuality ,boolean allowPartial) {
	FilteredReadsMetric m = new FilteredReadsMetric();
	ProgressLogger progLog = new ProgressLogger(log);
	for (final SAMRecord r : in) {
		progLog.record(r);
		boolean filterFlag = filterRead(r, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
		if (!filterFlag) { 
			out.addAlignment(r);
			m.READS_ACCEPTED++;
		}
		else {
			m.READS_REJECTED++;
		}
	}
	writeSummary(summaryFile, m);
	CloserUtil.close(in);
	out.close();
	reportAndCheckFilterResults(m);
}
 
Example 7
Source Project: Drop-seq   Source File: SamRecordSortingIteratorFactory.java    License: MIT License 6 votes vote down vote up
/**
   * @param progressLogger pass null if not interested in progress.
   * @return An iterator with all the records from underlyingIterator, in order defined by comparator.
   */
  public static CloseableIterator<SAMRecord> create(final SAMFileHeader header,
                                         final Iterator<SAMRecord> underlyingIterator,
                                         final Comparator<SAMRecord> comparator,
                                         final ProgressLogger progressLogger) {
      final SortingIteratorFactory.ProgressCallback<SAMRecord> progressCallback;
      if (progressLogger != null)
	progressCallback = new SortingIteratorFactory.ProgressCallback<SAMRecord>() {
              @Override
              public void logProgress(final SAMRecord record) {
                  progressLogger.record(record);
              }
          };
else
	progressCallback = null;
      return SortingIteratorFactory.create(SAMRecord.class,
              underlyingIterator, comparator, new BAMRecordCodec(header),
              SAMFileWriterImpl.getDefaultMaxRecordsInRam(),
              progressCallback);
  }
 
Example 8
Source Project: Drop-seq   Source File: CustomBAMIterators.java    License: MIT License 6 votes vote down vote up
public static CloseableIterator<SAMRecord> getReadsInTagOrder (SamReader reader, String primaryTag) {
	// SamReader reader = SamReaderFactory.makeDefault().open(bamFile);
	SAMSequenceDictionary dict= reader.getFileHeader().getSequenceDictionary();
	List<SAMProgramRecord> programs =reader.getFileHeader().getProgramRecords();
	
	final SAMFileHeader writerHeader = new SAMFileHeader();
       writerHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
       writerHeader.setSequenceDictionary(dict);
       for (SAMProgramRecord spr : programs) {
       	writerHeader.addProgramRecord(spr);
       }
       final ProgressLogger progressLogger = new ProgressLogger(log, 1000000);
       log.info("Reading in records for TAG name sorting");
       final CloseableIterator<SAMRecord> result =
               SamRecordSortingIteratorFactory.create(writerHeader, reader.iterator(), new StringTagComparator(primaryTag), progressLogger);

	log.info("Sorting finished.");
	return (result);
}
 
Example 9
Source Project: Drop-seq   Source File: CustomBAMIterators.java    License: MIT License 6 votes vote down vote up
/**
 * If the file is sorter in query name order, return an iterator over
 * the file.  Otherwise, sort records in queryname order and return an iterator over those records.
 * @return An iterator over the records in the file, in queryname order.
 */

public static CloseableIterator<SAMRecord> getQuerynameSortedRecords(SamReader reader) {
	if (reader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) {
		return reader.iterator();
	}
	log.info("Input SAM/BAM not in queryname order, sorting...");
	SAMSequenceDictionary dict= reader.getFileHeader().getSequenceDictionary();
	List<SAMProgramRecord> programs =reader.getFileHeader().getProgramRecords();
	
	final SAMFileHeader writerHeader = new SAMFileHeader();
       writerHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
       writerHeader.setSequenceDictionary(dict);
       for (SAMProgramRecord spr : programs) {
       	writerHeader.addProgramRecord(spr);
       }
       log.info("Reading in records for query name sorting");
       final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting reads in query name order");
       final CloseableIterator<SAMRecord> result =
               SamRecordSortingIteratorFactory.create(writerHeader, reader.iterator(), new SAMRecordQueryNameComparator(), progressLogger);
       log.info("Sorting finished.");
       return result; 
}
 
Example 10
Source Project: Drop-seq   Source File: CollapseTagWithContext.java    License: MIT License 6 votes vote down vote up
private PeekableGroupingIterator<SAMRecord> orderReadsByTagsPeekable (final SamReader reader, final String collapseTag, final List<String> contextTag, final int mapQuality, String outTag, ObjectSink<SAMRecord> uninformativeReadsSink) {
	// SORT on the context tags.
	StringTagComparator [] comparators = contextTag.stream().map(x -> new StringTagComparator(x)).toArray(StringTagComparator[]::new);
	final MultiComparator<SAMRecord> multiComparator = new MultiComparator<>(comparators);
	
	// set up filters.
       MapQualityPredicate mapQualityPredicate = CollapseTagWithContext.getMapQualityPredicate(mapQuality);
       RequiredTagPredicate requiredTagPredicate = CollapseTagWithContext.getRequiredTagPredicate(collapseTag, contextTag);
       
       // log progress on read iteration
       ProgressLogger progressLogger = new ProgressLogger(log);
       ProgressLoggingIterator progressLoggingIter = new ProgressLoggingIterator(reader.iterator(), progressLogger);
       
       // apply a default result tag to all reads - this is useful for reads that are not in the analysis and automatically sunk to the writer.
       DefaultTaggingIterator iter = new DefaultTaggingIterator(progressLoggingIter.iterator(), collapseTag, outTag);
       
       // reads that don't pass the filter are sunk, reads that pass are sorted and grouped.
	InformativeReadFilter filter = new InformativeReadFilter(iter, uninformativeReadsSink, mapQualityPredicate, requiredTagPredicate);				
			
	// sort and group the relevant data
	CloseableIterator<SAMRecord> sortedIter = SamRecordSortingIteratorFactory.create(
               reader.getFileHeader(), filter.iterator(), multiComparator, null);
	PeekableGroupingIterator<SAMRecord> groupedIterator = new PeekableGroupingIterator<>(sortedIter, multiComparator);		
	return groupedIterator;
	
}
 
Example 11
Source Project: Drop-seq   Source File: DetectBeadSubstitutionErrors.java    License: MIT License 6 votes vote down vote up
private void repairBAM (final BottomUpCollapseResult result) {

		final SamHeaderAndIterator headerAndIterator = SamFileMergeUtil.mergeInputs(this.INPUT, true);
		SamHeaderUtil.addPgRecord(headerAndIterator.header, this);
		headerAndIterator.header.addComment("Bottom-up edit distance collapse tag " + this.CELL_BARCODE_TAG +" with edit distance " + this.EDIT_DISTANCE+ " filtering ambiguous neighbors=" + this.FILTER_AMBIGUOUS);
		SAMFileWriter writer= new SAMFileWriterFactory().setCreateIndex(CREATE_INDEX).makeSAMOrBAMWriter(headerAndIterator.header, true, OUTPUT);

		ProgressLogger pl = new ProgressLogger(log);
		log.info("Repairing BAM");
		for (SAMRecord r: new IterableAdapter<>(headerAndIterator.iterator)) {
			pl.record(r);
			r=repairBarcode(r, result);
			if (r!=null)
				writer.addAlignment(r);
		}
		CloserUtil.close(headerAndIterator.iterator);
		writer.close();
		log.info("Repair Complete");
	}
 
Example 12
Source Project: Drop-seq   Source File: DetectBeadSynthesisErrors.java    License: MIT License 6 votes vote down vote up
/**
 * For a single cell barcode, gather up all the reads/UMIs to test for UMI errors.
 * @param umiCollectionList A collection of UMIs for a cell.
 * @param umiStringCache A cache to save space on UMIs - there are only 4^length possible UMIs, and length is usually 8-9, so UMIs are often repeated.
 * @param prog A progress logger.
 * @return A BeadSynthesisErrorData object for a single cell barcode across all UMIs.
 */
private BeadSynthesisErrorData buildBeadSynthesisErrorData (final List<UMICollection> umiCollectionList, final StringInterner umiStringCache, final ProgressLogger prog) {
	final String cellBarcode = umiCollectionList.get(0).getCellBarcode();
	BeadSynthesisErrorData bsed = new BeadSynthesisErrorData(cellBarcode);
	for (final UMICollection umis : umiCollectionList) {
		int transcriptCounts = umis.getDigitalExpression(1, 1, false);
		int readCounts = umis.getDigitalExpression(1, 1, true);
		Collection<String> umiCol = umis.getMolecularBarcodes();
		umiCol = getUMIsFromCache(umiCol, umiStringCache);
		bsed.addUMI(umiCol);
		bsed.incrementReads(readCounts);
		bsed.incrementTranscripts(transcriptCounts);
		prog.record(null, 0);
	}
	return bsed;
}
 
Example 13
Source Project: Drop-seq   Source File: DetectBeadSynthesisErrors.java    License: MIT License 6 votes vote down vote up
/**
 * For each problematic cell, replace cell barcodes positions with N.
 * Take the replaced bases and prepend them to the UMI, and trim the last <X> bases off the end of the UMI.
 */
private void cleanBAM (final Map<String, BeadSynthesisErrorData> errorBarcodesWithPositions, final Map<String, String> intendedSequenceMap) {
	log.info("Cleaning BAM");
       final SamHeaderAndIterator headerAndIterator = SamFileMergeUtil.mergeInputs(INPUT, true);
	SamHeaderUtil.addPgRecord(headerAndIterator.header, this);

	SAMFileWriter writer= new SAMFileWriterFactory().setCreateIndex(CREATE_INDEX).makeSAMOrBAMWriter(headerAndIterator.header, true, OUTPUT);
	ProgressLogger pl = new ProgressLogger(log);
	for (SAMRecord r: new IterableAdapter<>(headerAndIterator.iterator)) {
		pl.record(r);
		r=padCellBarcodeFix(r, errorBarcodesWithPositions, intendedSequenceMap, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG, this.EXTREME_BASE_RATIO);
		if (r!=null)
			writer.addAlignment(r);
	}
	CloserUtil.close(headerAndIterator.iterator);
	writer.close();
}
 
Example 14
Source Project: picard   Source File: GtcToVcf.java    License: MIT License 6 votes vote down vote up
private void fillContexts(final SortingCollection<VariantContext> contexts, final InfiniumGTCFile gtcFile,
                          final Build37ExtendedIlluminaManifest manifest, final InfiniumEGTFile egtFile) {
    final ProgressLogger progressLogger = new ProgressLogger(log, 100000, "sorted");

    final Iterator<Build37ExtendedIlluminaManifestRecord> iterator = manifest.extendedIterator();
    int gtcIndex = 0;

    int numVariantsWritten = 0;

    while (iterator.hasNext()) {
        final Build37ExtendedIlluminaManifestRecord record = iterator.next();

        if (!record.isBad()) {
            InfiniumGTCRecord gtcRecord = gtcFile.getRecord(gtcIndex);
            VariantContext context = makeVariantContext(record, gtcRecord, egtFile, progressLogger);
            numVariantsWritten++;
            contexts.add(context);
        }
        gtcIndex++;
    }

    log.info(numVariantsWritten + " Variants were written to file");
    log.info(gtcFile.getNumberOfSnps() + " SNPs in the GTC file");
    log.info(manifest.getNumAssays() + " Variants on the " + manifest.getDescriptorFileName() + " genotyping array manifest file");
}
 
Example 15
Source Project: picard   Source File: SetNmMdAndUqTags.java    License: MIT License 6 votes vote down vote up
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);

    if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new SAMException("Input must be coordinate-sorted for this program to run. Found: " + reader.getFileHeader().getSortOrder());
    }

    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
    writer.setProgressLogger(
            new ProgressLogger(log, (int) 1e7, "Wrote", "records"));

    final ReferenceSequenceFileWalker refSeqWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);

    StreamSupport.stream(reader.spliterator(), false)
            .peek(rec -> fixRecord(rec, refSeqWalker))
            .forEach(writer::addAlignment);
    CloserUtil.close(reader);
    writer.close();
    return 0;
}
 
Example 16
Source Project: picard   Source File: SamFormatConverter.java    License: MIT License 6 votes vote down vote up
/**
 * Convert a file from one of sam/bam/cram format to another based on the extension of output.
 *
 * @param input             input file in one of sam/bam/cram format
 * @param output            output to write converted file to, the conversion is based on the extension of this filename
 * @param referenceSequence the reference sequence to use, necessary when reading/writing cram
 * @param createIndex       whether or not an index should be written alongside the output file
 */
public static void convert(final File input, final File output, final File referenceSequence, final Boolean createIndex) {
    IOUtil.assertFileIsReadable(input);
    IOUtil.assertFileIsWritable(output);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).open(input);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader(), true, output, referenceSequence);
    if (createIndex && writer.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new PicardException("Can't CREATE_INDEX unless sort order is coordinate");
    }

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(SamFormatConverter.class));
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }
    CloserUtil.close(reader);
    writer.close();
}
 
Example 17
Source Project: picard   Source File: SortSam.java    License: MIT License 6 votes vote down vote up
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    ;
    reader.getFileHeader().setSortOrder(SORT_ORDER.getSortOrder());
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), false, OUTPUT);
    writer.setProgressLogger(
            new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));

    final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Read");
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }

    log.info("Finished reading inputs, merging and writing to output now.");

    CloserUtil.close(reader);
    writer.close();
    return 0;
}
 
Example 18
Source Project: picard   Source File: ReplaceSamHeader.java    License: MIT License 6 votes vote down vote up
private void standardReheader(final SAMFileHeader replacementHeader) {
    final SamReader recordReader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(ValidationStringency.SILENT).open(INPUT);
    if (replacementHeader.getSortOrder() != recordReader.getFileHeader().getSortOrder()) {
        throw new PicardException("Sort orders of INPUT (" + recordReader.getFileHeader().getSortOrder().name() +
                ") and HEADER (" + replacementHeader.getSortOrder().name() + ") do not agree.");
    }
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(replacementHeader, true, OUTPUT);

    final ProgressLogger progress = new ProgressLogger(Log.getInstance(ReplaceSamHeader.class));
    for (final SAMRecord rec : recordReader) {
        rec.setHeader(replacementHeader);
        writer.addAlignment(rec);
        progress.record(rec);
    }
    writer.close();
    CloserUtil.close(recordReader);
}
 
Example 19
Source Project: picard   Source File: OpticalDuplicateFinder.java    License: MIT License 6 votes vote down vote up
private void fillGraphFromAGroup(final List<? extends PhysicalLocation> wholeList, final List<Integer> groupList, final boolean logProgress, final ProgressLogger progressLoggerForKeeper, final int distance, final GraphUtils.Graph<Integer> opticalDistanceRelationGraph) {

        for (int i = 0; i < groupList.size(); i++) {
            final int iIndex = groupList.get(i);
            final PhysicalLocation currentLoc = wholeList.get(iIndex);
            // The main point of adding this log and if statement (also below) is a workaround a bug in the JVM
            // which causes a deep exception (https://github.com/broadinstitute/picard/issues/472).
            // It seems that this is related to https://bugs.openjdk.java.net/browse/JDK-8033717 which
            // was closed due to non-reproducibility. We came across a bam file that evoked this error
            // every time we tried to duplicate-mark it. The problem seemed to be a duplicate-set of size 500,000,
            // and this loop seemed to kill the JVM for some reason. This logging statement (and the one in the
            // loop below) solved the problem.
            if (logProgress) {
                progressLoggerForKeeper.record(String.format("%d", currentLoc.getReadGroup()), currentLoc.getX());
            }

            for (int j = i + 1; j < groupList.size(); j++) {
                final int jIndex = groupList.get(j);
                final PhysicalLocation other = wholeList.get(jIndex);

                if (closeEnoughShort(currentLoc, other, distance)) {
                    opticalDistanceRelationGraph.addEdge(iIndex, jIndex);
                }
            }
        }
    }
 
Example 20
Source Project: picard   Source File: SortVcf.java    License: MIT License 6 votes vote down vote up
/**
 * Merge the inputs and sort them by adding each input's content to a single SortingCollection.
 * <p/>
 * NB: It would be better to have a merging iterator as in MergeSamFiles, as this would perform better for pre-sorted inputs.
 * Here, we are assuming inputs are unsorted, and so adding their VariantContexts iteratively is fine for now.
 * MergeVcfs exists for simple merging of presorted inputs.
 *
 * @param readers      - a list of VCFFileReaders, one for each input VCF
 * @param outputHeader - The merged header whose information we intend to use in the final output file
 */
private SortingCollection<VariantContext> sortInputs(final List<VCFFileReader> readers, final VCFHeader outputHeader) {
    final ProgressLogger readProgress = new ProgressLogger(log, 25000, "read", "records");

    // NB: The default MAX_RECORDS_IN_RAM may not be appropriate here. VariantContexts are smaller than SamRecords
    // We would have to play around empirically to find an appropriate value. We are not performing this optimization at this time.
    final SortingCollection<VariantContext> sorter =
            SortingCollection.newInstance(
                    VariantContext.class,
                    new VCFRecordCodec(outputHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT),
                    outputHeader.getVCFRecordComparator(),
                    MAX_RECORDS_IN_RAM,
                    TMP_DIR);
    int readerCount = 1;
    for (final VCFFileReader reader : readers) {
        log.info("Reading entries from input file " + readerCount);
        for (final VariantContext variantContext : reader) {
            sorter.add(variantContext);
            readProgress.record(variantContext.getContig(), variantContext.getStart());
        }
        reader.close();
        readerCount++;
    }
    return sorter;
}
 
Example 21
Source Project: Drop-seq   Source File: TagReadWithInterval.java    License: MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT);
	SAMFileHeader header = inputSam.getFileHeader();
       header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
	SamHeaderUtil.addPgRecord(header, this);

	SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, OUTPUT);

	IntervalList loci = IntervalList.fromFile(this.INTERVALS);
	OverlapDetector<Interval> od =getOverlapDetector (loci);
	ProgressLogger processLogger = new ProgressLogger(log);

	for (SAMRecord record: inputSam) {
		processLogger.record(record);

		if (record.getReadUnmappedFlag()==false)
			// use alignment blocks instead of start/end to properly deal with split reads mapped over exon/exon boundaries.
			record=tagRead(record, od);
		else
			record.setAttribute(this.TAG, null);
		writer.addAlignment(record);

	}

	CloserUtil.close(inputSam);
	writer.close();

	return(0);


}
 
Example 22
Source Project: Drop-seq   Source File: CompareDropSeqAlignments.java    License: MIT License 5 votes vote down vote up
private Iterator<SAMRecord> getQueryNameSortedData (final List<File> bamFiles) {
	SamReaderFactory factory= SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE);
	
	SamHeaderAndIterator headerIterator= SamFileMergeUtil.mergeInputs(bamFiles, false, factory);
			
	if (headerIterator.header.getSortOrder().equals(SortOrder.queryname))
		return headerIterator.iterator;
	log.info("Input SAM/BAM not in queryname order, sorting...");
       final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting reads in query name order");
       final CloseableIterator<SAMRecord> result = SamRecordSortingIteratorFactory.create(headerIterator.header, headerIterator.iterator, READ_NAME_COMPARATOR, progressLogger);
       log.info("Sorting finished.");
       return result;
}
 
Example 23
Source Project: Drop-seq   Source File: BaseDistributionAtReadPosition.java    License: MIT License 5 votes vote down vote up
BaseDistributionMetricCollection gatherBaseQualities (final File input, final int readNumber) {
	ProgressLogger p = new ProgressLogger(this.log);

	SamReader inputSam = SamReaderFactory.makeDefault().open(input);
	BaseDistributionMetricCollection c = new BaseDistributionMetricCollection();

	MAIN_LOOP:
	for (final SAMRecord samRecord : inputSam) {

		p.record(samRecord);
		if (samRecord.isSecondaryOrSupplementary()) continue;
		boolean readPaired = samRecord.getReadPairedFlag();

		boolean firstRead=false;
		if (!readPaired & readNumber==2)
			continue;
		else if (!readPaired & readNumber==1)
			firstRead=true;
		else
			firstRead = samRecord.getFirstOfPairFlag();

		// if you're looking for the first read and this isn't, or looking for the 2nd read and this isn't, then go to the next read.
		if ((firstRead && readNumber!=1) || (!firstRead && readNumber==1)) continue MAIN_LOOP;

		byte [] bases = samRecord.getReadBases();

		for (int i=0; i<bases.length; i++) {
			char base = (char) (bases[i]);
			int idx=i+1;
			c.addBase(base, idx);
		}
	}

	CloserUtil.close(inputSam);
	return (c);


}
 
Example 24
Source Project: Drop-seq   Source File: FilterBamByTag.java    License: MIT License 5 votes vote down vote up
/**
 *
 * @param in
 * @param out
 * @param values
 */
void processPairedMode (final SamReader in, final SAMFileWriter out, final Set<String> values, final File summaryFile, Integer mapQuality, boolean allowPartial) {
	ProgressLogger progLog = new ProgressLogger(log);
	FilteredReadsMetric m = new FilteredReadsMetric();
	
	PeekableIterator<SAMRecord> iter = new PeekableIterator<>(CustomBAMIterators.getQuerynameSortedRecords(in));
	while (iter.hasNext()) {
		SAMRecord r1 = iter.next();
		progLog.record(r1);
		boolean filterFlag1 = filterRead(r1, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
		
		SAMRecord r2 = null;
		if (iter.hasNext()) r2 = iter.peek();
		// check for r2 being null in case the last read is unpaired.
		if (r2!=null && r1.getReadName().equals(r2.getReadName())) {
			// paired read found.
			progLog.record(r2);
			r2=iter.next();
			boolean filterFlag2 = filterRead(r2, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
			// if in accept tag mode, if either read shouldn't be filterd accept the pair
			// if in reject mode, if both reads shouldn't be filtered to accept the pair.
			if ((!filterFlag1 || !filterFlag2 & this.ACCEPT_TAG) || (!filterFlag1 && !filterFlag2 & !this.ACCEPT_TAG)) {
				out.addAlignment(r1);
				out.addAlignment(r2);
				m.READS_ACCEPTED+=2;
			} else 
				m.READS_REJECTED+=2;
		} else if (!filterFlag1) {
			out.addAlignment(r1);
			m.READS_ACCEPTED++;
		} else {
			m.READS_REJECTED++;
		}
	}
	CloserUtil.close(in);
	writeSummary(summaryFile, m);
	out.close();
	reportAndCheckFilterResults(m);
}
 
Example 25
Source Project: picard   Source File: FastqToSam.java    License: MIT License 5 votes vote down vote up
/** Creates a simple SAM file from a single fastq file. */
protected int doUnpaired(final FastqReader freader, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for ( ; freader.hasNext()  ; readCount++) {
        final FastqRecord frec = freader.next();
        final SAMRecord srec = createSamRecord(writer.getFileHeader(), SequenceUtil.getSamReadNameFromFastqHeader(frec.getReadHeader()) , frec, false) ;
        srec.setReadPairedFlag(false);
        writer.addAlignment(srec);
        progress.record(srec);
    }

    return readCount;
}
 
Example 26
Source Project: picard   Source File: FastqToSam.java    License: MIT License 5 votes vote down vote up
/** More complicated method that takes two fastq files and builds pairing information in the SAM. */
protected int doPaired(final FastqReader freader1, final FastqReader freader2, final SAMFileWriter writer) {
    int readCount = 0;
    final ProgressLogger progress = new ProgressLogger(LOG);
    for ( ; freader1.hasNext() && freader2.hasNext() ; readCount++) {
        final FastqRecord frec1 = freader1.next();
        final FastqRecord frec2 = freader2.next();

        final String frec1Name = SequenceUtil.getSamReadNameFromFastqHeader(frec1.getReadHeader());
        final String frec2Name = SequenceUtil.getSamReadNameFromFastqHeader(frec2.getReadHeader());
        final String baseName = getBaseName(frec1Name, frec2Name, freader1, freader2);

        final SAMRecord srec1 = createSamRecord(writer.getFileHeader(), baseName, frec1, true) ;
        srec1.setFirstOfPairFlag(true);
        srec1.setSecondOfPairFlag(false);
        writer.addAlignment(srec1);
        progress.record(srec1);

        final SAMRecord srec2 = createSamRecord(writer.getFileHeader(), baseName, frec2, true) ;
        srec2.setFirstOfPairFlag(false);
        srec2.setSecondOfPairFlag(true);
        writer.addAlignment(srec2);
        progress.record(srec2);
    }

    if (freader1.hasNext() || freader2.hasNext()) {
        throw new PicardException("Input paired fastq files must be the same length");
    }

    return readCount;
}
 
Example 27
Source Project: picard   Source File: CleanSam.java    License: MIT License 5 votes vote down vote up
/**
 * Do the work after command line has been parsed.
 * RuntimeException may be thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE);
    if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) {
        factory.validationStringency(ValidationStringency.LENIENT);
    }
    final SamReader reader = factory.open(INPUT);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
    final CloseableIterator<SAMRecord> it = reader.iterator();
    final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class));

    // If the read (or its mate) maps off the end of the alignment, clip it
    while (it.hasNext()) {
        final SAMRecord rec = it.next();

        // If the read (or its mate) maps off the end of the alignment, clip it
        AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec);

        // check the read's mapping quality
        if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) {
            rec.setMappingQuality(0);
        }

        writer.addAlignment(rec);
        progress.record(rec);
    }

    writer.close();
    it.close();
    CloserUtil.close(reader);
    return 0;
}
 
Example 28
Source Project: picard   Source File: VcfFormatConverter.java    License: MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    final ProgressLogger progress = new ProgressLogger(LOG, 10000);
    
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final VCFFileReader reader = new VCFFileReader(INPUT, REQUIRE_INDEX);
    final VCFHeader header = new VCFHeader(reader.getFileHeader());
    final SAMSequenceDictionary sequenceDictionary = header.getSequenceDictionary();
    if (CREATE_INDEX && sequenceDictionary == null) {
        throw new PicardException("A sequence dictionary must be available in the input file when creating indexed output.");
    }

    final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
            .setOutputFile(OUTPUT)
            .setReferenceDictionary(sequenceDictionary);
    if (CREATE_INDEX)
        builder.setOption(Options.INDEX_ON_THE_FLY);
    else
        builder.unsetOption(Options.INDEX_ON_THE_FLY);
    final VariantContextWriter writer = builder.build();
    writer.writeHeader(header);
    final CloseableIterator<VariantContext> iterator = reader.iterator();

    while (iterator.hasNext()) {
        final VariantContext context = iterator.next();
        writer.add(context);
        progress.record(context.getContig(), context.getStart());
    }

    CloserUtil.close(iterator);
    CloserUtil.close(reader);
    writer.close();

    return 0;
}
 
Example 29
Source Project: picard   Source File: MendelianViolationDetector.java    License: MIT License 5 votes vote down vote up
MendelianViolationDetector(final Set<String> skip_chroms, final Set<String> male_chroms, final Set<String> female_chroms,
                       final double min_het_fraction, final int min_gq, final int min_dp, final List<MendelianViolationMetrics> trios,
                       final List<Interval> parIntervals, final ProgressLogger logger) {
SKIP_CHROMS = skip_chroms;
MALE_CHROMS = male_chroms;
FEMALE_CHROMS = female_chroms;
MIN_HET_FRACTION = min_het_fraction;
MIN_GQ = min_gq;
MIN_DP = min_dp;
this.trios = trios;
this.parIntervals = parIntervals;
this.logger = logger;
familyToViolations = new MendelianViolationsByFamily();
}
 
Example 30
Source Project: picard   Source File: SortVcf.java    License: MIT License 5 votes vote down vote up
private void writeSortedOutput(final VCFHeader outputHeader, final SortingCollection<VariantContext> sortedOutput) {
    final ProgressLogger writeProgress = new ProgressLogger(log, 25000, "wrote", "records");
    final EnumSet<Options> options = CREATE_INDEX ? EnumSet.of(Options.INDEX_ON_THE_FLY) : EnumSet.noneOf(Options.class);
    final VariantContextWriter out = new VariantContextWriterBuilder().
            setReferenceDictionary(outputHeader.getSequenceDictionary()).
            setOptions(options).
            setOutputFile(OUTPUT).build();
    out.writeHeader(outputHeader);
    for (final VariantContext variantContext : sortedOutput) {
        out.add(variantContext);
        writeProgress.record(variantContext.getContig(), variantContext.getStart());
    }
    out.close();
}