Java Code Examples for htsjdk.samtools.metrics.MetricsFile#getAllHistograms()

The following examples show how to use htsjdk.samtools.metrics.MetricsFile#getAllHistograms() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CollectRnaSeqMetrics.java    From picard with MIT License 6 votes vote down vote up
@Override
protected void finish() {
    collector.finish();

    final MetricsFile<RnaSeqMetrics, Integer> file = getMetricsFile();
    collector.addAllLevelsToFile(file);
    file.write(OUTPUT);

    boolean atLeastOneHistogram = false;
    for (final Histogram<Integer> histo : file.getAllHistograms()) {
        atLeastOneHistogram = atLeastOneHistogram || !histo.isEmpty();
    }
    // Generate the coverage by position plot
    if (CHART_OUTPUT != null && atLeastOneHistogram) {
        final int rResult = RExecutor.executeFromClasspath("picard/analysis/rnaSeqCoverage.R",
                                                           OUTPUT.getAbsolutePath(),
                                                           CHART_OUTPUT.getAbsolutePath(),
                                                           INPUT.getName(),
                                                           this.plotSubtitle);

        if (rResult != 0) {
            throw new PicardException("Problem invoking R to generate plot.");
        }
    }
}
 
Example 2
Source File: MarkDuplicatesSetSizeHistogramTester.java    From picard with MIT License 6 votes vote down vote up
@Override
public void test() throws IOException {
    final MetricsFile<DuplicationMetrics, Double> metricsOutput = testMetrics();

    // Check contents of set size bin against expected values
    if (!expectedSetSizeMap.isEmpty()) {
        boolean checked = false;
        for (final Histogram<Double> histo : metricsOutput.getAllHistograms()) {
            final String label = histo.getValueLabel();
            for (final Double bin : histo.keySet()) {
                final String binStr = String.valueOf(bin);
                final List<String> labelBinStr = Arrays.asList(label, binStr);
                if (expectedSetSizeMap.containsKey(labelBinStr)) {
                    checked = true;
                    Histogram.Bin<Double> binValue = histo.get(bin);
                    final double actual = binValue.getValue();
                    final double expected = expectedSetSizeMap.get(labelBinStr);
                    Assert.assertEquals(actual, expected);
                }
            }
        }
        if (!checked) {
            Assert.fail("Could not not find matching entry for expectedSetSizeMap in metrics.");
        }
    }
}
 
Example 3
Source File: TheoreticalSensitivityTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "TheoreticalSensitivityConstantDepthDataProvider")
public void testSensitivityAtConstantDepth(final double expected, final File metricsFile, final double alleleFraction, final int depth, final int sampleSize, final double tolerance) throws Exception {
    // This tests Theoretical Sensitivity assuming a uniform depth with a distribution of base quality scores.
    // Because this only tests sensitivity at a constant depth, we use this for testing the code at high depths.
    final MetricsFile<?, Integer> metrics = new MetricsFile<>();
    try (final FileReader metricsFileReader = new FileReader(metricsFile)) {
        metrics.read(metricsFileReader);
    }

    final List<Histogram<Integer>> histograms = metrics.getAllHistograms();
    final Histogram<Integer> qualityHistogram = histograms.get(1);

    // We ensure that even using different random seeds we converge to roughly the same value.
    for (int i = 0; i < 3; i++) {
        double result = TheoreticalSensitivity.sensitivityAtConstantDepth(depth, qualityHistogram, 3, sampleSize, alleleFraction, i);
        Assert.assertEquals(result, expected, tolerance);
    }
}
 
Example 4
Source File: TheoreticalSensitivityTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "TheoreticalSensitivityDataProvider")
public void testSensitivity(final double expected, final File metricsFile, final double alleleFraction, final int sampleSize) throws Exception {
    // This tests Theoretical Sensitivity using distributions on both base quality scores
    // and the depth histogram.

    // We use a pretty forgiving tolerance here because for these tests
    // we are not using large enough sample sizes to converge.
    final double tolerance = 0.02;

    final MetricsFile<?, Integer> metrics = new MetricsFile<>();
    try (final FileReader metricsFileReader = new FileReader(metricsFile)) {
        metrics.read(metricsFileReader);
    }

    final List<Histogram<Integer>> histograms = metrics.getAllHistograms();
    final Histogram<Integer> depthHistogram = histograms.get(0);
    final Histogram<Integer> qualityHistogram = histograms.get(1);

    final double result = TheoreticalSensitivity.theoreticalSensitivity(depthHistogram, qualityHistogram, sampleSize, 3, alleleFraction);

    Assert.assertEquals(result, expected, tolerance);
}
 
Example 5
Source File: TheoreticalSensitivityTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "equivalanceHetVsArbitrary")
public void testHetVsArbitrary(final File metricsFile, final double tolerance, final int sampleSize) throws Exception {
    // This test compares Theoretical Sensitivity for arbitrary allele fractions with the theoretical het sensitivity
    // model.  Since allele fraction of 0.5 is equivalent to a het, these should provide the same answer.
    final MetricsFile<?, Integer> metrics = new MetricsFile<>();
    try (final FileReader metricsFileReader = new FileReader(metricsFile)) {
        metrics.read(metricsFileReader);
    }

    final List<Histogram<Integer>> histograms = metrics.getAllHistograms();
    final Histogram<Integer> depthHistogram = histograms.get(0);
    final Histogram<Integer> qualityHistogram = histograms.get(1);

    final double[] qualityDistribution = TheoreticalSensitivity.normalizeHistogram(qualityHistogram);
    final double[] depthDistribution = TheoreticalSensitivity.normalizeHistogram(depthHistogram);

    final double resultFromTS = TheoreticalSensitivity.theoreticalSensitivity(depthHistogram, qualityHistogram, sampleSize, 3, 0.5);
    final double resultFromTHS = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, 3);

    Assert.assertEquals(resultFromTS, resultFromTHS, tolerance);
}
 
Example 6
Source File: TheoreticalSensitivityTest.java    From picard with MIT License 6 votes vote down vote up
@Test(dataProvider = "sumOfGaussiansDataProvider")
public void testDrawSumOfQScores(final File metricsFile, final int altDepth, final double tolerance) throws Exception {
    final MetricsFile<TheoreticalSensitivityMetrics, Integer> metrics = new MetricsFile<>();
    try (final FileReader metricsFileReader = new FileReader(metricsFile)) {
        metrics.read(metricsFileReader);
    }

    final List<Histogram<Integer>> histograms = metrics.getAllHistograms();

    final Histogram<Integer> qualityHistogram = histograms.get(1);
    final TheoreticalSensitivity.RouletteWheel qualityRW = new TheoreticalSensitivity.RouletteWheel(TheoreticalSensitivity.trimDistribution(TheoreticalSensitivity.normalizeHistogram(qualityHistogram)));

    final Random randomNumberGenerator = new Random(51);

    // Calculate mean and deviation of quality score distribution to enable Gaussian sampling below
    final double averageQuality = qualityHistogram.getMean();
    final double standardDeviationQuality = qualityHistogram.getStandardDeviation();

    for (int k = 0; k < 1; k++) {
        int sumOfQualitiesFull = IntStream.range(0, altDepth).map(n -> qualityRW.draw()).sum();
        int sumOfQualities = TheoreticalSensitivity.drawSumOfQScores(altDepth, averageQuality, standardDeviationQuality, randomNumberGenerator.nextGaussian());

        Assert.assertEquals(sumOfQualitiesFull, sumOfQualities, sumOfQualitiesFull * tolerance);
    }
}
 
Example 7
Source File: CollectWgsMetricsWithNonZeroCoverageTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testWithoutIntervals() throws IOException {
    final File input = new File(TEST_DIR, "forMetrics.sam");
    final File outfile = File.createTempFile("test", ".wgs_metrics");
    final File pdffile = File.createTempFile("test", ".wgs_metrics.pdf");
    final File ref = new File(TEST_DIR, "merger.fasta");
    final int sampleSize = 1000;
    outfile.deleteOnExit();
    pdffile.deleteOnExit();
    final String[] args = new String[] {
            "INPUT="  + input.getAbsolutePath(),
            "OUTPUT=" + outfile.getAbsolutePath(),
            "REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
            "SAMPLE_SIZE=" + sampleSize,
            "CHART=" + pdffile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final MetricsFile<WgsMetricsWithNonZeroCoverage , Integer> output = new MetricsFile<>();
    output.read(new FileReader(outfile));

    for (final WgsMetricsWithNonZeroCoverage metrics : output.getMetrics()) {
        if (metrics.CATEGORY == WgsMetricsWithNonZeroCoverage.Category.WHOLE_GENOME) {
            Assert.assertEquals(metrics.GENOME_TERRITORY, 1210);
            Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
            Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
            Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
            Assert.assertEquals(metrics.PCT_1X, 0.107438);
        } else {
            Assert.assertEquals(metrics.GENOME_TERRITORY, 130);
            Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
            Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
            Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
            Assert.assertEquals(metrics.PCT_1X, 1.0);
        }
    }

    for (final Histogram<Integer> histogram : output.getAllHistograms()) {
        if (histogram.getValueLabel().equals("count_WHOLE_GENOME")) {
            Assert.assertEquals(histogram.get(0).getValue(), 1080d);
        } else {
            Assert.assertEquals(histogram.get(0).getValue(), 0d);
        }
        Assert.assertEquals(histogram.get(1).getValue(), 9d);
        Assert.assertEquals(histogram.get(2).getValue(), 35d);
        Assert.assertEquals(histogram.get(3).getValue(), 86d);
        Assert.assertEquals(histogram.get(4).getValue(), 0d);
    }
}
 
Example 8
Source File: CollectWgsMetricsWithNonZeroCoverageTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testWithIntervals() throws IOException {
    final File input = new File(TEST_DIR, "forMetrics.sam");
    final File outfile = File.createTempFile("test", ".wgs_metrics");
    final File pdffile = File.createTempFile("test", ".wgs_metrics.pdf");
    final File ref = new File(TEST_DIR, "merger.fasta");
    final File intervals = new File(TEST_DIR, "largeIntervals.interval_list");
    final int sampleSize = 1000;
    outfile.deleteOnExit();
    pdffile.deleteOnExit();
    final String[] args = new String[] {
            "INPUT="  + input.getAbsolutePath(),
            "OUTPUT=" + outfile.getAbsolutePath(),
            "REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
            "INTERVALS=" + intervals.getAbsolutePath(),
            "SAMPLE_SIZE=" + sampleSize,
            "CHART=" + pdffile.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final MetricsFile<WgsMetricsWithNonZeroCoverage , Integer> output = new MetricsFile<>();
    output.read(new FileReader(outfile));

    for (final WgsMetricsWithNonZeroCoverage metrics : output.getMetrics()) {
        if (metrics.CATEGORY == WgsMetricsWithNonZeroCoverage.Category.WHOLE_GENOME) {
            Assert.assertEquals(metrics.GENOME_TERRITORY, 404);
            Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
            Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
            Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
            Assert.assertEquals(metrics.PCT_1X, 0.321782);
        } else {
            Assert.assertEquals(metrics.GENOME_TERRITORY, 130);
            Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
            Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
            Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
            Assert.assertEquals(metrics.PCT_1X, 1.0);
        }
    }

    for (final Histogram<Integer> histogram : output.getAllHistograms()) {
        if (histogram.getValueLabel().equals("count_WHOLE_GENOME")) {
            Assert.assertEquals(histogram.get(0).getValue(), 274d);
        } else {
            Assert.assertEquals(histogram.get(0).getValue(), 0d);
        }
        Assert.assertEquals(histogram.get(1).getValue(), 9d);
        Assert.assertEquals(histogram.get(2).getValue(), 35d);
        Assert.assertEquals(histogram.get(3).getValue(), 86d);
        Assert.assertEquals(histogram.get(4).getValue(), 0d);

    }
}
 
Example 9
Source File: CollectF1R2CountsIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testHistograms() throws IOException {
    final File outputTarGz = createTempFile("f1r2", ".tar.gz");
    final String sample = "SAMPLE";
    final File sam = createSyntheticSam(30, 1, sample);

    final String[] args = {
            "-R", hg19_chr1_1M_Reference,
            "-I", sam.getAbsolutePath(),
            "-O", outputTarGz.getAbsolutePath()
    };

    runCommandLine(args);

    final File extractedDir = createTempDir("extracted");
    IOUtils.extractTarGz(outputTarGz.toPath(), extractedDir.toPath());

    final File altHistogramFile = F1R2CountsCollector.getAltHistogramsFromExtractedTar(extractedDir).stream().findFirst().get();

    final MetricsFile<?, Integer> referenceSiteMetrics = new MetricsFile<>();
    final Reader in = IOUtil.openFileForBufferedReading(altHistogramFile);
    referenceSiteMetrics.read(in);
    CloserUtil.close(in);

    final MetricsFile<?, Integer> altSiteMetrics = new MetricsFile<>();
    final Reader altMetricsReader = IOUtil.openFileForBufferedReading(altHistogramFile);
    altSiteMetrics.read(altMetricsReader);
    CloserUtil.close(altMetricsReader);

    final List<Histogram<Integer>> histograms = altSiteMetrics.getAllHistograms();

    // TODO: should there be 64*3*2 = 384 histograms or just the non-zero ones?
    final String[] expectedTransitions = new String[]{"CAC_T_F2R1", "TAA_G_F2R1", "AGC_C_F2R1", "ACA_A_F2R1"};
    // Assert.assertEquals(histograms.size(), expectedTransitions.length, "alt histogram must contain the expected number of contexts");

    for (String transition : expectedTransitions ){
        Optional<Histogram<Integer>> histogram = histograms.stream()
                .filter(h -> h.getValueLabel().equals(transition))
                .findFirst();
        Assert.assertTrue(histogram.isPresent(), "histogram must exist");
        Assert.assertEquals((int) histogram.get().getSumOfValues(), 1, "histogram must only contain one read");
    }
}