Java Code Examples for htsjdk.samtools.metrics.MetricsFile#write()

The following examples show how to use htsjdk.samtools.metrics.MetricsFile#write() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GatherReadQualityMetrics.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Do the work after command line has been parsed. RuntimeException may be
 * thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	Map<String, ReadQualityMetrics> metricsMap = gatherMetrics(INPUT);
	MetricsFile<ReadQualityMetrics, Integer> outFile = new MetricsFile<>();
	outFile.addHistogram(metricsMap.get(this.GLOBAL).getHistogram());
       // Make sure the GLOBAL metrics is added first
       outFile.addMetric(metricsMap.remove(this.GLOBAL));
	for (ReadQualityMetrics metrics: metricsMap.values())
		outFile.addMetric(metrics);
	BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
	outFile.write(w);
	// close properly.
	try {
		w.close();
	} catch (IOException io) {
		throw new TranscriptomeException("Problem writing file", io);
	}
	return 0;
}
 
Example 2
Source File: CollectBaseDistributionByCycle.java    From picard with MIT License 6 votes vote down vote up
@Override
protected void finish() {
    final MetricsFile<BaseDistributionByCycleMetrics, ?> metrics = getMetricsFile();
    hist.addToMetricsFile(metrics);
    metrics.write(OUTPUT);
    if (hist.isEmpty()) {
        log.warn("No valid bases found in input file. No plot will be produced.");
    } else {
        final int rResult = RExecutor.executeFromClasspath("picard/analysis/baseDistributionByCycle.R",
                OUTPUT.getAbsolutePath(),
                CHART_OUTPUT.getAbsolutePath(),
                INPUT.getName(),
                plotSubtitle);
        if (rResult != 0) {
            throw new PicardException("R script nucleotideDistributionByCycle.R failed with return code " + rResult);
        }
    }
}
 
Example 3
Source File: UmiAwareMarkDuplicatesWithMateCigar.java    From picard with MIT License 6 votes vote down vote up
@Override
protected int doWork() {
    // Before we do anything, make sure the UMI_METRICS_FILE can be written to.
    IOUtil.assertFileIsWritable(UMI_METRICS_FILE);

    // Perform Mark Duplicates work
    final int retval = super.doWork();

    // Add results in metrics to the metricsFile
    final MetricsFile<UmiMetrics, Double> metricsFile = getMetricsFile();
    for (final UmiMetrics metric : metrics.values()) {
        metricsFile.addMetric(metric);
    }

    metricsFile.write(UMI_METRICS_FILE);
    return retval;
}
 
Example 4
Source File: CollectIlluminaBasecallingMetrics.java    From picard with MIT License 6 votes vote down vote up
/**
 * Handles completion of metric collection. Metrics are computed from counts of data and written out to a file.
 */
private void onComplete() {
    try {
        final MetricsFile<IlluminaBasecallingMetrics, Comparable<?>> file = getMetricsFile();
        final IlluminaMetricCounts allLaneCounts = new IlluminaMetricCounts(null, null, LANE);
        for (final String s : barcodeToMetricCounts.keySet()) {
            final IlluminaMetricCounts counts = barcodeToMetricCounts.get(s);
            counts.addMetricsToFile(file);
            allLaneCounts.addIlluminaMetricCounts(counts);
        }
        if (!barcodeToMetricCounts.keySet().contains("")) {
            allLaneCounts.addMetricsToFile(file);  // detect non-indexed case
        }
        file.write(OUTPUT);
    } catch (final Exception ex) {
        throw new PicardException("Error writing output file " + OUTPUT.getPath(), ex);
    }
}
 
Example 5
Source File: CollectGcBiasMetrics.java    From picard with MIT License 6 votes vote down vote up
private void writeResultsToFiles() {
    final MetricsFile<GcBiasMetrics, Integer> file = getMetricsFile();
    final MetricsFile<GcBiasDetailMetrics, ?> detailMetricsFile = getMetricsFile();
    final MetricsFile<GcBiasSummaryMetrics, ?> summaryMetricsFile = getMetricsFile();
    multiCollector.addAllLevelsToFile(file);
    final List<GcBiasMetrics> gcBiasMetricsList = file.getMetrics();
    for (final GcBiasMetrics gcbm : gcBiasMetricsList) {
        final List<GcBiasDetailMetrics> gcDetailList = gcbm.DETAILS.getMetrics();
        for (final GcBiasDetailMetrics d : gcDetailList) {
            detailMetricsFile.addMetric(d);
        }
        summaryMetricsFile.addMetric(gcbm.SUMMARY);
    }
    detailMetricsFile.write(OUTPUT);
    summaryMetricsFile.write(SUMMARY_OUTPUT);

    final NumberFormat fmt = NumberFormat.getIntegerInstance();
    fmt.setGroupingUsed(true);
    RExecutor.executeFromClasspath(R_SCRIPT,
            OUTPUT.getAbsolutePath(),
            SUMMARY_OUTPUT.getAbsolutePath(),
            CHART_OUTPUT.getAbsolutePath(),
            String.valueOf(SCAN_WINDOW_SIZE));
}
 
Example 6
Source File: CollectAlignmentSummaryMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override protected void finish() {
    collector.finish();

    final MetricsFile<AlignmentSummaryMetrics, Comparable<?>> file = getMetricsFile();
    collector.addAllLevelsToFile(file);

    file.write(OUTPUT);
}
 
Example 7
Source File: SamComparison.java    From picard with MIT License 5 votes vote down vote up
public void writeReport(final File output, final List<Header> headers) {
    final MetricsFile<SamComparisonMetric, ?> comparisonMetricFile = new MetricsFile<>();

    headers.forEach(comparisonMetricFile::addHeader);
    comparisonMetricFile.addAllMetrics(Collections.singletonList(comparisonMetric));
    comparisonMetricFile.write(output);
}
 
Example 8
Source File: CollectQualityYieldMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void finish() {
    final MetricsFile<QualityYieldMetrics, Integer> metricsFile = getMetricsFile();
    this.collector.finish();
    this.collector.addMetricsToFile(metricsFile);
    metricsFile.write(OUTPUT);
}
 
Example 9
Source File: DigitalExpression.java    From Drop-seq with MIT License 5 votes vote down vote up
public static void writeSummary(final Collection<DESummary> summaryCollection, final MetricsFile<DESummary, Integer> summaryMetricsFile, final File outFile) {
    List<DESummary> sc = new ArrayList<>(summaryCollection);
    Collections.sort(sc, DigitalExpression.TRANSCRIPT_ORDER_DESCENDING);
    for (DESummary z: sc)
        summaryMetricsFile.addMetric(z);
    summaryMetricsFile.write(outFile);
}
 
Example 10
Source File: FilterBamByTag.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * Write the summary output file of reads accepted/rejected.
 * @param summaryOutputFile
 * @param metrics
 */
private void writeSummary (File summaryOutputFile, FilteredReadsMetric metrics) {
	if (summaryOutputFile!=null) {
		MetricsFile<FilteredReadsMetric, Integer> outSummary = getMetricsFile();
		outSummary.addMetric(metrics);
		outSummary.write(summaryOutputFile);		
	}
}
 
Example 11
Source File: FilterBam.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 * Write the summary output file of reads accepted/rejected.
 * @param summaryOutputFile
 * @param metrics
 */
private void writeSummary (File summaryOutputFile, FilteredReadsMetric metrics) {
	if (summaryOutputFile!=null) {
		MetricsFile<FilteredReadsMetric, Integer> outSummary = getMetricsFile();
		outSummary.addMetric(metrics);
		outSummary.write(summaryOutputFile);		
	}
}
 
Example 12
Source File: CollectSamErrorMetrics.java    From picard with MIT License 5 votes vote down vote up
/**
 * Given a "full" BaseCalculator, get all of its metrics and put them in the appropriate Metrics file.
 *
 * @param locusAggregator a BaseCalculator that has been "loaded up" with bases.
 */
private void writeMetricsFileForAggregator(final BaseErrorAggregation locusAggregator) {
    final MetricsFile<ErrorMetric, Integer> file = getMetricsFile();

    ErrorMetric.setPriorError(QualityUtil.getErrorProbabilityFromPhredScore(PRIOR_Q));

    for (final ErrorMetric metric : locusAggregator.getMetrics()) {
        metric.calculateDerivedFields();
        file.addMetric(metric);
    }

    file.write(new File(OUTPUT + "." + locusAggregator.getSuffix()));
}
 
Example 13
Source File: CollectInsertSizeMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override protected void finish() {
    multiCollector.finish();

    final MetricsFile<InsertSizeMetrics, Integer> file = getMetricsFile();
    multiCollector.addAllLevelsToFile(file);

    if(file.getNumHistograms() == 0) {
        //can happen if user sets MINIMUM_PCT = 0.5, etc.
        log.warn("All data categories were discarded because they contained < " + MINIMUM_PCT +
                 " of the total aligned paired data.");
        final InsertSizeMetricsCollector.PerUnitInsertSizeMetricsCollector allReadsCollector = (InsertSizeMetricsCollector.PerUnitInsertSizeMetricsCollector) multiCollector.getAllReadsCollector();
        log.warn("Total mapped pairs in all categories: " + (allReadsCollector == null ? allReadsCollector : allReadsCollector.getTotalInserts()));
    }
    else  {
        file.write(OUTPUT);

        final List<String> plotArgs = new ArrayList<>();
        Collections.addAll(plotArgs, OUTPUT.getAbsolutePath(), Histogram_FILE.getAbsolutePath(), INPUT.getName());

        if (HISTOGRAM_WIDTH != null) {
            plotArgs.add(String.valueOf(HISTOGRAM_WIDTH));
        }
        else if (MIN_HISTOGRAM_WIDTH != null) {
            final int max = (int) file.getAllHistograms().stream().mapToDouble(Histogram::getMax).max().getAsDouble();
            plotArgs.add(String.valueOf(Math.max(max, MIN_HISTOGRAM_WIDTH)));
        }

        final int rResult = RExecutor.executeFromClasspath(Histogram_R_SCRIPT, plotArgs.toArray(new String[0]));
        if (rResult != 0) {
            throw new PicardException("R script " + Histogram_R_SCRIPT + " failed with return code " + rResult);
        }
    }
}
 
Example 14
Source File: CreateVerifyIDIntensityContaminationMetricsFile.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final File metricsFile = new File(OUTPUT + "." + FILE_EXTENSION);
    IOUtil.assertFileIsWritable(metricsFile);

    final MetricsFile<VerifyIDIntensityContaminationMetrics, ?> verifyIDIntensityContaminationMetricsFile = getMetricsFile();

    final Pattern HEADER_PATTERN = Pattern.compile("^ID\\s+%Mix\\s+LLK\\s+LLK0\\s*$");
    final Pattern DASHES_PATTERN = Pattern.compile("^[-]+$");
    final Pattern DATA_PATTERN = Pattern.compile("^(\\d+)\\s+([0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s*$");
    try (BufferedReader br = new BufferedReader(new FileReader(INPUT))) {
        String line;
        line = br.readLine();
        lineMatch(line, HEADER_PATTERN);
        line = br.readLine();
        lineMatch(line, DASHES_PATTERN);
        while ((line = br.readLine()) != null) {
            final Matcher m = lineMatch(line, DATA_PATTERN);
            // Load up and store the metrics
            final VerifyIDIntensityContaminationMetrics metrics = new VerifyIDIntensityContaminationMetrics();
            metrics.ID = Integer.parseInt(m.group(1));
            metrics.PCT_MIX = Double.parseDouble(m.group(2));
            metrics.LLK = Double.parseDouble(m.group(3));
            metrics.LLK0 = Double.parseDouble(m.group(4));

            verifyIDIntensityContaminationMetricsFile.addMetric(metrics);
        }
        verifyIDIntensityContaminationMetricsFile.write(metricsFile);
    } catch (IOException e) {
        throw new PicardException("Error parsing VerifyIDIntensity Output", e);
    }
    return 0;
}
 
Example 15
Source File: CollectRrbsMetrics.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    if (!METRICS_FILE_PREFIX.endsWith(".")) {
        METRICS_FILE_PREFIX = METRICS_FILE_PREFIX + ".";
    }
    final File SUMMARY_OUT = new File(METRICS_FILE_PREFIX + SUMMARY_FILE_EXTENSION);
    final File DETAILS_OUT = new File(METRICS_FILE_PREFIX + DETAIL_FILE_EXTENSION);
    final File PLOTS_OUT = new File(METRICS_FILE_PREFIX + PDF_FILE_EXTENSION);
    assertIoFiles(SUMMARY_OUT, DETAILS_OUT, PLOTS_OUT);

    final SamReader samReader = SamReaderFactory.makeDefault().open(INPUT);
    if (!ASSUME_SORTED && samReader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
        throw new PicardException("The input file " + INPUT.getAbsolutePath() + " does not appear to be coordinate sorted");
    }

    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final ProgressLogger progressLogger = new ProgressLogger(log);

    final RrbsMetricsCollector metricsCollector = new RrbsMetricsCollector(METRIC_ACCUMULATION_LEVEL, samReader.getFileHeader().getReadGroups(),
            C_QUALITY_THRESHOLD, NEXT_BASE_QUALITY_THRESHOLD, MINIMUM_READ_LENGTH, MAX_MISMATCH_RATE);

    for (final SAMRecord samRecord : samReader) {
        progressLogger.record(samRecord);
        if (!samRecord.getReadUnmappedFlag() && !isSequenceFiltered(samRecord.getReferenceName())) {
            final ReferenceSequence referenceSequence = refWalker.get(samRecord.getReferenceIndex());
            metricsCollector.acceptRecord(samRecord, referenceSequence);
        }
    }
    metricsCollector.finish();
    final MetricsFile<RrbsMetrics, Comparable<?>> rrbsMetrics = getMetricsFile();
    metricsCollector.addAllLevelsToFile(rrbsMetrics);

    // Using RrbsMetrics as a way to get both of the metrics objects through the MultiLevelCollector. Once
    // we get it out split it apart to the two separate MetricsFiles and write them to file
    final MetricsFile<RrbsSummaryMetrics, ?> summaryFile = getMetricsFile();
    final MetricsFile<RrbsCpgDetailMetrics, ?> detailsFile = getMetricsFile();
    for (final RrbsMetrics rrbsMetric : rrbsMetrics.getMetrics()) {
        summaryFile.addMetric(rrbsMetric.getSummaryMetrics());
        for (final RrbsCpgDetailMetrics detailMetric : rrbsMetric.getDetailMetrics()) {
            detailsFile.addMetric(detailMetric);
        }
    }
    summaryFile.write(SUMMARY_OUT);
    detailsFile.write(DETAILS_OUT);
    RExecutor.executeFromClasspath(R_SCRIPT, DETAILS_OUT.getAbsolutePath(), SUMMARY_OUT.getAbsolutePath(), PLOTS_OUT.getAbsolutePath());
    CloserUtil.close(samReader);
    return 0;
}
 
Example 16
Source File: MarkIlluminaAdapters.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(METRICS);

    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder();
    SAMFileWriter out = null;
    if (OUTPUT != null) {
        IOUtil.assertFileIsWritable(OUTPUT);
        out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT);
    }

    final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count");

    // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping
    final AdapterPair[] adapters;
    {
        final List<AdapterPair> tmp = new ArrayList<AdapterPair>();
        tmp.addAll(ADAPTERS);
        if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) {
            tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER));
        }
        adapters = tmp.toArray(new AdapterPair[tmp.size()]);
    }

    ////////////////////////////////////////////////////////////////////////
    // Main loop that consumes reads, clips them and writes them to the output
    ////////////////////////////////////////////////////////////////////////
    final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read");
    final SAMRecordIterator iterator = in.iterator();

    final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters).
            setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE).
            setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE).
            setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP).
            setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN);

    while (iterator.hasNext()) {
        final SAMRecord rec = iterator.next();
        final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null;
        rec.setAttribute(ReservedTagConstants.XT, null);

        // Do the clipping one way for PE and another for SE reads
        if (rec.getReadPairedFlag()) {
            // Assert that the input file is in query name order only if we see some PE reads
            if (order != SAMFileHeader.SortOrder.queryname) {
                throw new PicardException("Input BAM file must be sorted by queryname");
            }

            if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName());
            rec2.setAttribute(ReservedTagConstants.XT, null);

            // Assert that we did in fact just get two mate pairs
            if (!rec.getReadName().equals(rec2.getReadName())) {
                throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " +
                        rec.getReadName() + ", " + rec2.getReadName());
            }

            // establish which of pair is first and which second
            final SAMRecord first, second;

            if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) {
                first = rec;
                second = rec2;
            } else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) {
                first = rec2;
                second = rec;
            } else {
                throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName());
            }

            adapterMarker.adapterTrimIlluminaPairedReads(first, second);
        } else {
            adapterMarker.adapterTrimIlluminaSingleRead(rec);
        }

        // Then output the records, update progress and metrics
        for (final SAMRecord r : new SAMRecord[]{rec, rec2}) {
            if (r != null) {
                progress.record(r);
                if (out != null) out.addAlignment(r);

                final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT);
                if (clip != null) histo.increment(r.getReadLength() - clip + 1);
            }
        }
    }

    if (out != null) out.close();

    // Lastly output the metrics to file
    final MetricsFile<?, Integer> metricsFile = getMetricsFile();
    metricsFile.setHistogram(histo);
    metricsFile.write(METRICS);

    CloserUtil.close(in);
    return 0;
}
 
Example 17
Source File: CollectTargetedMetrics.java    From picard with MIT License 4 votes vote down vote up
/**
 * Asserts that files are readable and writable and then fires off an
 * HsMetricsCalculator instance to do the real work.
 */
protected int doWork() {
    for (final File targetInterval : TARGET_INTERVALS) IOUtil.assertFileIsReadable(targetInterval);
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (PER_TARGET_COVERAGE != null) IOUtil.assertFileIsWritable(PER_TARGET_COVERAGE);

    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final IntervalList targetIntervals = IntervalList.fromFiles(TARGET_INTERVALS);

    // Validate that the targets and baits have the same references as the reads file
    SequenceUtil.assertSequenceDictionariesEqual(
            reader.getFileHeader().getSequenceDictionary(),
            targetIntervals.getHeader().getSequenceDictionary());
    SequenceUtil.assertSequenceDictionariesEqual(
            reader.getFileHeader().getSequenceDictionary(),
            getProbeIntervals().getHeader().getSequenceDictionary()
    );

    ReferenceSequenceFile ref = null;
    if (REFERENCE_SEQUENCE != null) {
        IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
        ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(REFERENCE_SEQUENCE);
        SequenceUtil.assertSequenceDictionariesEqual(
                reader.getFileHeader().getSequenceDictionary(), ref.getSequenceDictionary(),
                INPUT, REFERENCE_SEQUENCE
        );
    }

    final COLLECTOR collector = makeCollector(
            METRIC_ACCUMULATION_LEVEL,
            reader.getFileHeader().getReadGroups(),
            ref,
            PER_TARGET_COVERAGE,
            PER_BASE_COVERAGE,
            targetIntervals,
            getProbeIntervals(),
            getProbeSetName(),
            NEAR_DISTANCE
    );

    final ProgressLogger progress = new ProgressLogger(log);
    for (final SAMRecord record : reader) {
        collector.acceptRecord(record, null);
        progress.record(record);
    }

    // Write the output file
    final MetricsFile<METRIC, Integer> metrics = getMetricsFile();
    collector.finish();

    collector.addAllLevelsToFile(metrics);

    metrics.write(OUTPUT);

    if (THEORETICAL_SENSITIVITY_OUTPUT != null) {
        // Write out theoretical sensitivity results.
        final MetricsFile<TheoreticalSensitivityMetrics, ?> theoreticalSensitivityMetrics = getMetricsFile();
        log.info("Calculating theoretical sentitivity at " + ALLELE_FRACTION.size() + " allele fractions.");
        List<TheoreticalSensitivityMetrics> tsm = TheoreticalSensitivity.calculateSensitivities(SAMPLE_SIZE, collector.getDepthHistogram(), collector.getBaseQualityHistogram(), ALLELE_FRACTION);
        theoreticalSensitivityMetrics.addAllMetrics(tsm);
        theoreticalSensitivityMetrics.write(THEORETICAL_SENSITIVITY_OUTPUT);
    }

    CloserUtil.close(reader);
    return 0;
}
 
Example 18
Source File: CollectWgsMetrics.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
    INTERVALS = intervalArugmentCollection.getIntervalFile();
    if (INTERVALS != null) {
        IOUtil.assertFileIsReadable(INTERVALS);
    }
    if (THEORETICAL_SENSITIVITY_OUTPUT != null) {
        IOUtil.assertFileIsWritable(THEORETICAL_SENSITIVITY_OUTPUT);
    }

    // it doesn't make sense for the locus accumulation cap to be lower than the coverage cap
    if (LOCUS_ACCUMULATION_CAP < COVERAGE_CAP) {
        log.warn("Setting the LOCUS_ACCUMULATION_CAP to be equal to the COVERAGE_CAP (" + COVERAGE_CAP + ") because it should not be lower");
        LOCUS_ACCUMULATION_CAP = COVERAGE_CAP;
    }

    // Setup all the inputs
    final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci");
    final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
    final SamReader in = getSamReader();
    final AbstractLocusIterator iterator = getLocusIterator(in);

    final List<SamRecordFilter> filters = new ArrayList<>();
    final CountingFilter adapterFilter = new CountingAdapterFilter();
    final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
    final CountingFilter dupeFilter = new CountingDuplicateFilter();
    final CountingPairedFilter pairFilter = new CountingPairedFilter();
    // The order in which filters are added matters!
    filters.add(new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count reads twice
    filters.add(adapterFilter);
    filters.add(mapqFilter);
    filters.add(dupeFilter);
    if (!COUNT_UNPAIRED) {
        filters.add(pairFilter);
    }
    iterator.setSamFilters(filters);
    iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases
    iterator.setIncludeNonPfReads(false);

    final AbstractWgsMetricsCollector<?> collector = getCollector(COVERAGE_CAP, getIntervalsToExamine());
    final WgsMetricsProcessor processor = getWgsMetricsProcessor(progress, refWalker, iterator, collector);
    processor.processFile();

    final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
    processor.addToMetricsFile(out, INCLUDE_BQ_HISTOGRAM, dupeFilter, adapterFilter, mapqFilter, pairFilter);
    out.write(OUTPUT);

    if (THEORETICAL_SENSITIVITY_OUTPUT != null) {
        // Write out theoretical sensitivity results.
        final MetricsFile<TheoreticalSensitivityMetrics, ?> theoreticalSensitivityMetrics = getMetricsFile();
        log.info("Calculating theoretical sentitivity at " + ALLELE_FRACTION.size() + " allele fractions.");
        List<TheoreticalSensitivityMetrics> tsm = TheoreticalSensitivity.calculateSensitivities(SAMPLE_SIZE, collector.getUnfilteredDepthHistogram(), collector.getUnfilteredBaseQHistogram(), ALLELE_FRACTION);
        theoreticalSensitivityMetrics.addAllMetrics(tsm);
        theoreticalSensitivityMetrics.write(THEORETICAL_SENSITIVITY_OUTPUT);
    }

    return 0;
}
 
Example 19
Source File: CollectVariantCallingMetrics.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(DBSNP);
    if (TARGET_INTERVALS != null) IOUtil.assertFileIsReadable(TARGET_INTERVALS);
    if (SEQUENCE_DICTIONARY != null) IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY.toPath());

    final boolean requiresIndex = this.TARGET_INTERVALS != null || this.THREAD_COUNT > 1;
    final VCFFileReader variantReader = new VCFFileReader(INPUT, requiresIndex);
    final VCFHeader vcfHeader = variantReader.getFileHeader();
    CloserUtil.close(variantReader);

    final SAMSequenceDictionary sequenceDictionary =
            SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY == null ? INPUT.toPath() : SEQUENCE_DICTIONARY.toPath());

    final IntervalList targetIntervals = (TARGET_INTERVALS == null) ? null : IntervalList.fromFile(TARGET_INTERVALS).uniqued();

    log.info("Loading dbSNP file ...");
    final DbSnpBitSetUtil.DbSnpBitSets dbsnp = DbSnpBitSetUtil.createSnpAndIndelBitSets(DBSNP, sequenceDictionary, targetIntervals, Optional.of(log));

    log.info("Starting iteration of variants.");

    final VariantProcessor.Builder<CallingMetricAccumulator, CallingMetricAccumulator.Result> builder =
            VariantProcessor.Builder
                    .generatingAccumulatorsBy(() -> {
                        CallingMetricAccumulator accumulator = GVCF_INPUT ? new GvcfMetricAccumulator(dbsnp) : new CallingMetricAccumulator(dbsnp);
                        accumulator.setup(vcfHeader);
                        return accumulator;
                    })
                    .combiningResultsBy(CallingMetricAccumulator.Result::merge)
                    .withInput(INPUT)
                    .multithreadingBy(THREAD_COUNT);

    if (targetIntervals != null) {
        builder.limitingProcessedRegionsTo(targetIntervals);
    }

    final CallingMetricAccumulator.Result result = builder.build().process();

    // Fetch and write the metrics.
    final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Integer> detail = getMetricsFile();
    final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Integer> summary = getMetricsFile();
    summary.addMetric(result.summary);
    result.details.forEach(detail::addMetric);

    final String outputPrefix = OUTPUT.getAbsolutePath() + ".";
    detail.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()));
    summary.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()));

    return 0;
}
 
Example 20
Source File: ComputeUMISharing.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    // Make sure this is modifiable
    EDIT_DISTANCE = new ArrayList<>(EDIT_DISTANCE);
    while (EDIT_DISTANCE.size() < COUNT_TAG.size()) {
        EDIT_DISTANCE.add(0);
    }
    parentEditDistanceMatcher = new ParentEditDistanceMatcher(this.COUNT_TAG, this.EDIT_DISTANCE, this.FIND_INDELS, this.NUM_THREADS);

    SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
    final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting");
    Iterator<SAMRecord> iter = reader.iterator();
    if (LOCUS_FUNCTION_LIST.size() > 0) {
        iter = new GeneFunctionIteratorWrapper(iter, this.GENE_NAME_TAG,
                this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, false, this.STRAND_STRATEGY,
                this.LOCUS_FUNCTION_LIST);
    }

    CloseableIterator<SAMRecord> sortedIter = SortingIteratorFactory.create(SAMRecord.class, iter,
            PARENT_CHILD_COMPARATOR, new BAMRecordCodec(reader.getFileHeader()), MAX_RECORDS_IN_RAM,
            (SortingIteratorFactory.ProgressCallback<SAMRecord>) progressLogger::record);

    PeekableIterator<List<SAMRecord>> subgroupIterator =
            new PeekableIterator<List<SAMRecord>>(new GroupingIterator<SAMRecord>(
                    new ProgressLoggingIterator(sortedIter, new ProgressLogger(log, 1000000, "Grouping")),
                    GROUPING_COMPARATOR));

    MetricsFile<UmiSharingMetrics, Integer> outFile = getMetricsFile();
    List<SAMRecord> parentSubgroup = null;
    Set<TagValues> parentTuples = new HashSet<>();

    while (subgroupIterator.hasNext()) {
        if (parentSubgroup == null ||
        !parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).equals(subgroupIterator.peek().get(0).getAttribute(COLLAPSE_TAG))) {
            parentSubgroup = subgroupIterator.next();
            parentTuples = parentEditDistanceMatcher.getValues(parentSubgroup);                
        } else {                
            final List<SAMRecord> childSubgroup = subgroupIterator.next();
            final Set<TagValues> childTuples = parentEditDistanceMatcher.getValues(childSubgroup);                
            final UmiSharingMetrics metrics = new UmiSharingMetrics();
            metrics.PARENT = parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).toString();
            metrics.CHILD = childSubgroup.get(0).getAttribute(UNCOLLAPSED_TAG).toString();
            metrics.NUM_PARENT = parentTuples.size();
            metrics.NUM_CHILD = childTuples.size();
            metrics.NUM_SHARED = parentEditDistanceMatcher.computeNumShared(parentTuples, childTuples);
            metrics.FRAC_SHARED = metrics.NUM_SHARED/(double)metrics.NUM_CHILD;
            outFile.addMetric(metrics);
        }
    }
    BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
    outFile.write(w);
    try {
        w.close();
    } catch (IOException e) {
        throw new RuntimeIOException("Problem writing " + OUTPUT.getAbsolutePath(), e);
    }
    CloserUtil.close(reader);
    return 0;
}