Java Code Examples for htsjdk.samtools.metrics.MetricsFile#addMetric()

The following examples show how to use htsjdk.samtools.metrics.MetricsFile#addMetric() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GatherReadQualityMetrics.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * Do the work after command line has been parsed. RuntimeException may be
 * thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	Map<String, ReadQualityMetrics> metricsMap = gatherMetrics(INPUT);
	MetricsFile<ReadQualityMetrics, Integer> outFile = new MetricsFile<>();
	outFile.addHistogram(metricsMap.get(this.GLOBAL).getHistogram());
       // Make sure the GLOBAL metrics is added first
       outFile.addMetric(metricsMap.remove(this.GLOBAL));
	for (ReadQualityMetrics metrics: metricsMap.values())
		outFile.addMetric(metrics);
	BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
	outFile.write(w);
	// close properly.
	try {
		w.close();
	} catch (IOException io) {
		throw new TranscriptomeException("Problem writing file", io);
	}
	return 0;
}
 
Example 2
Source File: AlignmentSummaryMetricsCollector.java    From picard with MIT License 6 votes vote down vote up
@Override
public void addMetricsToFile(final MetricsFile<AlignmentSummaryMetrics, Comparable<?>> file) {
    if (firstOfPairCollector.getMetrics().TOTAL_READS > 0) {
        // override how bad cycle is determined for paired reads, it should be
        // the sum of first and second reads
        pairCollector.getMetrics().BAD_CYCLES = firstOfPairCollector.getMetrics().BAD_CYCLES +
                secondOfPairCollector.getMetrics().BAD_CYCLES;

        file.addMetric(firstOfPairCollector.getMetrics());
        file.addMetric(secondOfPairCollector.getMetrics());
        file.addMetric(pairCollector.getMetrics());
    }

    //if there are no reads in any category then we will returned an unpaired alignment summary metric with all zero values
    if (unpairedCollector.getMetrics().TOTAL_READS > 0 || firstOfPairCollector.getMetrics().TOTAL_READS == 0) {
        file.addMetric(unpairedCollector.getMetrics());
    }
}
 
Example 3
Source File: MarkDuplicatesSparkUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Saves the metrics to a file.
 * Note: the SamFileHeader is needed in order to include libraries that didn't have any duplicates.
 * @param result metrics object, potentially pre-initialized with headers,
 */
public static void saveMetricsRDD(final MetricsFile<GATKDuplicationMetrics, Double> result, final SAMFileHeader header, final JavaPairRDD<String, GATKDuplicationMetrics> metricsRDD, final String metricsOutputPath) {
    final LibraryIdGenerator libraryIdGenerator = new LibraryIdGenerator(header);

    final Map<String, GATKDuplicationMetrics> nonEmptyMetricsByLibrary = metricsRDD.collectAsMap();           //Unknown Library
    final Map<String, GATKDuplicationMetrics> emptyMapByLibrary = libraryIdGenerator.getMetricsByLibraryMap();//with null

    final List<String> sortedListOfLibraryNames = new ArrayList<>(Sets.union(emptyMapByLibrary.keySet(), nonEmptyMetricsByLibrary.keySet()));
    sortedListOfLibraryNames.sort(Utils.COMPARE_STRINGS_NULLS_FIRST);
    for (final String library : sortedListOfLibraryNames) {
        //if a non-empty exists, take it, otherwise take from the the empties. This is done to include libraries with zero data in them.
        //But not all libraries are listed in the header (esp in testing data) so we union empty and non-empty
        final GATKDuplicationMetrics metricsToAdd = nonEmptyMetricsByLibrary.containsKey(library) ? nonEmptyMetricsByLibrary.get(library) : emptyMapByLibrary.get(library);
        metricsToAdd.calculateDerivedFields();
        result.addMetric(metricsToAdd);
    }

    if (nonEmptyMetricsByLibrary.size() == 1) {
        result.setHistogram(nonEmptyMetricsByLibrary.values().iterator().next().calculateRoiHistogram());
    }

    MetricsUtils.saveMetrics(result, metricsOutputPath);
}
 
Example 4
Source File: PolyATrimmer.java    From Drop-seq with MIT License 5 votes vote down vote up
private void writeSummary(final Histogram<Integer> h) {

		MetricsFile<TrimMetric, Integer> mf = new MetricsFile<>();
		mf.addHistogram(h);
		TrimMetric tm = new TrimMetric(h);
		mf.addMetric(tm);
		mf.write(this.OUTPUT_SUMMARY);
	}
 
Example 5
Source File: FindMendelianViolations.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    ///////////////////////////////////////////////////////////////////////
    // Test and then load up the inputs
    ///////////////////////////////////////////////////////////////////////
    final boolean outputVcfs = VCF_DIR != null;
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(TRIOS);
    IOUtil.assertFileIsWritable(OUTPUT);
    if (outputVcfs) IOUtil.assertDirectoryIsWritable(VCF_DIR);

    LOG.info("Loading and filtering trios.");

    final MendelianViolationDetector.Result result =
            VariantProcessor.Builder
                    .generatingAccumulatorsBy(this::buildDetector)
                    .withInput(INPUT)
                    .combiningResultsBy(MendelianViolationDetector.Result::merge)
                    .multithreadingBy(THREAD_COUNT)
                    .build()
                    .process();

    // Write out the metrics!
    final MetricsFile<MendelianViolationMetrics, ?> metricsFile = getMetricsFile();
    for (final MendelianViolationMetrics m : result.metrics()) {
        m.calculateDerivedFields();
        metricsFile.addMetric(m);
    }

    metricsFile.write(OUTPUT);
    writeAllViolations(result);

    return 0;
}
 
Example 6
Source File: AlleleFrequencyQC.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public Object  onTraversalSuccess() {

    super.onTraversalSuccess();

    final GATKReportTable table= new GATKReport(outFile).getTable(MODULES_TO_USE.get(0));
    final List<String> columnNames = table.getColumnInfo().stream().map(c -> c.getColumnName()).collect(Collectors.toList());

    // this is a map of allele frequency bin : length 2 list of observed allele frequencies ( one for comp, one for eval )

    final Map<Object, List<Object>> afMap = IntStream.range(0, table.getNumRows()).mapToObj(i -> table.getRow(i)).
            filter(r -> r[columnNames.indexOf("Filter")].equals("called")).
            collect(Collectors.groupingBy(r -> r[columnNames.indexOf("AlleleFrequency")],
                    Collectors.mapping(r -> r[columnNames.indexOf("avgVarAF")], Collectors.toList())));

    final ChiSquaredDistribution dist = new ChiSquaredDistribution(afMap.size()-1);
    final double chiSqValue = calculateChiSquaredStatistic(afMap, allowedVariance);
    final double pVal = 1- dist.cumulativeProbability(chiSqValue);
    final MetricsFile<AlleleFrequencyQCMetric, Integer> metricsFile = new MetricsFile<>();
    final AlleleFrequencyQCMetric metric = new AlleleFrequencyQCMetric();

    metric.SAMPLE = sample;
    metric.CHI_SQ_VALUE =  chiSqValue;
    metric.METRIC_TYPE = "Allele Frequency";
    metric.METRIC_VALUE = pVal;

    metricsFile.addMetric(metric);
    MetricsUtils.saveMetrics(metricsFile,   metricOutput.getAbsolutePath());

    // need the file returned from variant eval in order to run the plotting stuff
    final RScriptExecutor executer = new RScriptExecutor();
    executer.addScript(new Resource(R_SCRIPT, AlleleFrequencyQC.class));
    executer.addArgs(outFile.getAbsolutePath() , metricOutput.getAbsolutePath(), sample);
    executer.exec();

    if (pVal < threshold) {
        logger.error("Allele frequencies between your array VCF and the expected VCF do not match with a significant pvalue of " + pVal);
    }
    return null;
}
 
Example 7
Source File: CreateVerifyIDIntensityContaminationMetricsFile.java    From picard with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    final File metricsFile = new File(OUTPUT + "." + FILE_EXTENSION);
    IOUtil.assertFileIsWritable(metricsFile);

    final MetricsFile<VerifyIDIntensityContaminationMetrics, ?> verifyIDIntensityContaminationMetricsFile = getMetricsFile();

    final Pattern HEADER_PATTERN = Pattern.compile("^ID\\s+%Mix\\s+LLK\\s+LLK0\\s*$");
    final Pattern DASHES_PATTERN = Pattern.compile("^[-]+$");
    final Pattern DATA_PATTERN = Pattern.compile("^(\\d+)\\s+([0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s+([-0-9]*\\.?[0-9]+)\\s*$");
    try (BufferedReader br = new BufferedReader(new FileReader(INPUT))) {
        String line;
        line = br.readLine();
        lineMatch(line, HEADER_PATTERN);
        line = br.readLine();
        lineMatch(line, DASHES_PATTERN);
        while ((line = br.readLine()) != null) {
            final Matcher m = lineMatch(line, DATA_PATTERN);
            // Load up and store the metrics
            final VerifyIDIntensityContaminationMetrics metrics = new VerifyIDIntensityContaminationMetrics();
            metrics.ID = Integer.parseInt(m.group(1));
            metrics.PCT_MIX = Double.parseDouble(m.group(2));
            metrics.LLK = Double.parseDouble(m.group(3));
            metrics.LLK0 = Double.parseDouble(m.group(4));

            verifyIDIntensityContaminationMetricsFile.addMetric(metrics);
        }
        verifyIDIntensityContaminationMetricsFile.write(metricsFile);
    } catch (IOException e) {
        throw new PicardException("Error parsing VerifyIDIntensity Output", e);
    }
    return 0;
}
 
Example 8
Source File: TrimStartingSequence.java    From Drop-seq with MIT License 5 votes vote down vote up
private void writeSummary (final Histogram<Integer> h) {

		MetricsFile<TrimMetric, Integer> mf = new MetricsFile<TrimMetric, Integer>();
		mf.addHistogram(h);
		TrimMetric tm=new TrimMetric(h);
		mf.addMetric(tm);
		mf.write(this.OUTPUT_SUMMARY);
	}
 
Example 9
Source File: AbstractWgsMetricsCollector.java    From picard with MIT License 5 votes vote down vote up
/**
 * Adds collected metrics and depth histogram to file
 * @param file MetricsFile for result of collector's work
 * @param dupeFilter         counting filter for duplicate reads
 * @param adapterFilter      counting filter for adapter reads
 * @param mapqFilter         counting filter for mapping quality
 * @param pairFilter         counting filter for reads without a mapped mate pair
 */
public void addToMetricsFile(final MetricsFile<WgsMetrics, Integer> file,
        final boolean includeBQHistogram,
        final CountingFilter dupeFilter,
        final CountingFilter adapterFilter,
        final CountingFilter mapqFilter,
        final CountingPairedFilter pairFilter) {
    final WgsMetrics metrics = getMetrics(dupeFilter, adapterFilter, mapqFilter, pairFilter);

    // add them to the file
    file.addMetric(metrics);
    file.addHistogram(getHighQualityDepthHistogram());
    if (includeBQHistogram) addBaseQHistogram(file);
}
 
Example 10
Source File: TagReadWithGeneExonFunction.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(this.INPUT);
    IOUtil.assertFileIsReadable(this.ANNOTATIONS_FILE);
    if (this.SUMMARY!=null) IOUtil.assertFileIsWritable(this.SUMMARY);
    IOUtil.assertFileIsWritable(this.OUTPUT);

    SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT);

    SAMFileHeader header = inputSam.getFileHeader();
    SamHeaderUtil.addPgRecord(header, this);
    SAMSequenceDictionary bamDict = header.getSequenceDictionary();

    final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(ANNOTATIONS_FILE, bamDict);
    SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, OUTPUT);

    for (SAMRecord r: inputSam) {
        pl.record(r);

        if (!r.getReadUnmappedFlag())
            r=	setAnnotations(r, geneOverlapDetector);
        writer.addAlignment(r);
    }

    CloserUtil.close(inputSam);
    writer.close();
    if (this.USE_STRAND_INFO) log.info(this.metrics.toString());
    if (SUMMARY==null) return 0;

    //process summary
    MetricsFile<ReadTaggingMetric, Integer> outFile = new MetricsFile<>();
    outFile.addMetric(this.metrics);
    outFile.write(this.SUMMARY);
    return 0;

}
 
Example 11
Source File: TagReadWithGeneFunction.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(this.INPUT);
	IOUtil.assertFileIsReadable(this.ANNOTATIONS_FILE);
	if (this.SUMMARY!=null) IOUtil.assertFileIsWritable(this.SUMMARY);
	IOUtil.assertFileIsWritable(this.OUTPUT);

	SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT);

	SAMFileHeader header = inputSam.getFileHeader();
	SamHeaderUtil.addPgRecord(header, this);
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();

       final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(ANNOTATIONS_FILE, bamDict);
       SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, OUTPUT);

       for (SAMRecord r: inputSam) {
       	pl.record(r);

       	if (!r.getReadUnmappedFlag())
			// r=	setGeneExons(r, geneOverlapDetector, this.ALLOW_MULTI_GENE_READS);
       		r= setAnnotations(r, geneOverlapDetector, this.ALLOW_MULTI_GENE_READS);
       	writer.addAlignment(r);
       }

	CloserUtil.close(inputSam);
	writer.close();
	// if (this.USE_STRAND_INFO) log.info(this.metrics.toString());
	if (SUMMARY==null) return 0;

	//process summary
	MetricsFile<ReadTaggingMetric, Integer> outFile = new MetricsFile<ReadTaggingMetric, Integer>();
	outFile.addMetric(this.metrics);
	outFile.write(this.SUMMARY);
	return 0;

}
 
Example 12
Source File: GcBiasMetricsCollector.java    From picard with MIT License 4 votes vote down vote up
private void addGcDataToFile(final MetricsFile<GcBiasMetrics, Integer> file, final Map<String, GcObject> gcData,
                             final boolean includeDuplicates) {
    for (final Map.Entry<String, GcObject> entry : gcData.entrySet()) {
        final GcObject gcCur = entry.getValue();
        final String gcType = entry.getKey();

        final int[] readsByGc = gcCur.readsByGc;
        final long[] errorsByGc = gcCur.errorsByGc;
        final long[] basesByGc = gcCur.basesByGc;
        final long totalClusters = gcCur.totalClusters;
        final long totalAlignedReads = gcCur.totalAlignedReads;
        final String group = gcCur.group;

        final GcBiasMetrics metrics = new GcBiasMetrics();

        final double totalWindows = sum(windowsByGc);
        final double totalReads = sum(readsByGc);
        final double meanReadsPerWindow = totalReads / totalWindows;

        if (totalAlignedReads > 0) {
            for (int i = 0; i < windowsByGc.length; ++i) {
                final GcBiasDetailMetrics detail = new GcBiasDetailMetrics();
                detail.GC = i;
                detail.WINDOWS = windowsByGc[i];
                detail.READ_STARTS = readsByGc[i];
                if (errorsByGc[i] > 0) {
                    detail.MEAN_BASE_QUALITY = QualityUtil.getPhredScoreFromObsAndErrors(basesByGc[i], errorsByGc[i]);
                }
                if (windowsByGc[i] != 0) {
                    detail.NORMALIZED_COVERAGE = (detail.READ_STARTS / (double) detail.WINDOWS) / meanReadsPerWindow;
                    detail.ERROR_BAR_WIDTH = (Math.sqrt(detail.READ_STARTS) / (double) detail.WINDOWS) / meanReadsPerWindow;
                } else {
                    detail.NORMALIZED_COVERAGE = 0;
                    detail.ERROR_BAR_WIDTH = 0;
                }
                detail.ACCUMULATION_LEVEL = group;
                if (group.equals(ACCUMULATION_LEVEL_READ_GROUP)) {detail.READ_GROUP = gcType;}
                else if (group.equals(ACCUMULATION_LEVEL_SAMPLE)) {detail.SAMPLE = gcType;}
                else if (group.equals(ACCUMULATION_LEVEL_LIBRARY)) {detail.LIBRARY = gcType;}

                detail.READS_USED = includeDuplicates ? READS_USED_ALL : READS_USED_UNIQUE;

                metrics.DETAILS.addMetric(detail);
            }

            // Synthesize the high level summary metrics
            final GcBiasSummaryMetrics summary = new GcBiasSummaryMetrics();
            if (group.equals(ACCUMULATION_LEVEL_READ_GROUP)) {summary.READ_GROUP = gcType;}
            else if (group.equals(ACCUMULATION_LEVEL_SAMPLE)) {summary.SAMPLE = gcType;}
            else if (group.equals(ACCUMULATION_LEVEL_LIBRARY)) {summary.LIBRARY = gcType;}

            summary.READS_USED = includeDuplicates ? READS_USED_ALL : READS_USED_UNIQUE;

            summary.ACCUMULATION_LEVEL = group;
            summary.WINDOW_SIZE = scanWindowSize;
            summary.TOTAL_CLUSTERS = totalClusters;
            summary.ALIGNED_READS = totalAlignedReads;
            summary.GC_NC_0_19 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 0, 19);
            summary.GC_NC_20_39 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 20, 39);
            summary.GC_NC_40_59 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 40, 59);
            summary.GC_NC_60_79 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 60, 79);
            summary.GC_NC_80_100 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 80, 100);

            calculateDropoutMetrics(metrics.DETAILS.getMetrics(), summary);

            metrics.SUMMARY = summary;

            file.addMetric(metrics);
        }
    }
}
 
Example 13
Source File: InsertSizeMetricsCollector.java    From picard with MIT License 4 votes vote down vote up
public void addMetricsToFile(final MetricsFile<InsertSizeMetrics,Integer> file) {
    // get the number of inserts, and the maximum and minimum keys across, across all orientations
    for (final Histogram<Integer> h : this.histograms.values()) {
        totalInserts += h.getCount();
    }
    if (0 == totalInserts) return; // nothing to store

    for(final Map.Entry<SamPairUtil.PairOrientation, Histogram<Integer>> entry : histograms.entrySet()) {
        final SamPairUtil.PairOrientation pairOrientation = entry.getKey();
        final Histogram<Integer> histogram = entry.getValue();
        final double total = histogram.getCount();

        // Only include a category if it has a sufficient percentage of the data in it
        if( total >= totalInserts * minimumPct ) {
            final InsertSizeMetrics metrics = new InsertSizeMetrics();
            metrics.SAMPLE             = this.sample;
            metrics.LIBRARY            = this.library;
            metrics.READ_GROUP         = this.readGroup;
            metrics.PAIR_ORIENTATION   = pairOrientation;
            if (!histogram.isEmpty()) {
                metrics.READ_PAIRS = (long) total;
                metrics.MAX_INSERT_SIZE = (int) histogram.getMax();
                metrics.MIN_INSERT_SIZE = (int) histogram.getMin();
                metrics.MEDIAN_INSERT_SIZE = histogram.getMedian();
                metrics.MODE_INSERT_SIZE = histogram.getMode();
                metrics.MEDIAN_ABSOLUTE_DEVIATION = histogram.getMedianAbsoluteDeviation();

                final double median = histogram.getMedian();
                double covered = 0;
                double low = median;
                double high = median;

                while (low >= histogram.getMin()-1 || high <= histogram.getMax()+1) {
                    final Histogram.Bin<Integer> lowBin = histogram.get((int) low);
                    if (lowBin != null) covered += lowBin.getValue();

                    if (low != high) {
                        final Histogram.Bin<Integer> highBin = histogram.get((int) high);
                        if (highBin != null) covered += highBin.getValue();
                    }

                    final double percentCovered = covered / total;
                    final int distance = (int) (high - low) + 1;
                    if (percentCovered >= 0.1 && metrics.WIDTH_OF_10_PERCENT == 0) metrics.WIDTH_OF_10_PERCENT = distance;
                    if (percentCovered >= 0.2 && metrics.WIDTH_OF_20_PERCENT == 0) metrics.WIDTH_OF_20_PERCENT = distance;
                    if (percentCovered >= 0.3 && metrics.WIDTH_OF_30_PERCENT == 0) metrics.WIDTH_OF_30_PERCENT = distance;
                    if (percentCovered >= 0.4 && metrics.WIDTH_OF_40_PERCENT == 0) metrics.WIDTH_OF_40_PERCENT = distance;
                    if (percentCovered >= 0.5 && metrics.WIDTH_OF_50_PERCENT == 0) metrics.WIDTH_OF_50_PERCENT = distance;
                    if (percentCovered >= 0.6 && metrics.WIDTH_OF_60_PERCENT == 0) metrics.WIDTH_OF_60_PERCENT = distance;
                    if (percentCovered >= 0.7 && metrics.WIDTH_OF_70_PERCENT == 0) metrics.WIDTH_OF_70_PERCENT = distance;
                    if (percentCovered >= 0.8 && metrics.WIDTH_OF_80_PERCENT == 0) metrics.WIDTH_OF_80_PERCENT = distance;
                    if (percentCovered >= 0.9 && metrics.WIDTH_OF_90_PERCENT == 0) metrics.WIDTH_OF_90_PERCENT = distance;
                    if (percentCovered >= 0.95 && metrics.WIDTH_OF_95_PERCENT == 0) metrics.WIDTH_OF_95_PERCENT = distance;
                    if (percentCovered >= 0.99 && metrics.WIDTH_OF_99_PERCENT == 0) metrics.WIDTH_OF_99_PERCENT = distance;

                    --low;
                    ++high;
                }
            }

            // Trim the Histogram down to get rid of outliers that would make the chart useless.
            final Histogram<Integer> trimmedHistogram = histogram; // alias it
            trimmedHistogram.trimByWidth(getWidthToTrimTo(metrics));

            if (!trimmedHistogram.isEmpty()) {
                metrics.MEAN_INSERT_SIZE = trimmedHistogram.getMean();
                metrics.STANDARD_DEVIATION = trimmedHistogram.getStandardDeviation();
            }

            file.addHistogram(trimmedHistogram);
            file.addMetric(metrics);
        }
    }
}
 
Example 14
Source File: PerUnitExampleMultiMetricsCollector.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void addMetricsToFile(final MetricsFile<ExampleMultiMetrics, Integer> file) {
    file.addMetric(metrics);
}
 
Example 15
Source File: DetectBeadSynthesisErrors.java    From Drop-seq with MIT License 4 votes vote down vote up
private void writeSummary(final BeadSynthesisErrorsSummaryMetric summary, final File out) {
	MetricsFile<BeadSynthesisErrorsSummaryMetric, Integer> outFile = new MetricsFile<>();
	outFile.addMetric(summary);
	outFile.addHistogram(summary.getHistogram());
	outFile.write(out);
}
 
Example 16
Source File: SelectCellsByNumTranscripts.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
  protected int doWork() {
      IOUtil.assertFileIsWritable(OUTPUT);
      if (MIN_READS_PER_CELL == null || MIN_READS_PER_CELL < MIN_TRANSCRIPTS_PER_CELL)
	MIN_READS_PER_CELL = MIN_TRANSCRIPTS_PER_CELL;
      final List<String> cellBarcodes;
      if (INPUT_INTERIM_CELLS == null)
	cellBarcodes = new BarcodeListRetrieval().getListCellBarcodesByReadCount(
                  this.INPUT, this.CELL_BARCODE_TAG, this.READ_MQ, this.MIN_READS_PER_CELL, null);
else
	cellBarcodes = readBarcodes(INPUT_INTERIM_CELLS);

      if (OUTPUT_INTERIM_CELLS != null)
	writeBarcodes(OUTPUT_INTERIM_CELLS, cellBarcodes);

      SamHeaderAndIterator headerAndIterator = SamFileMergeUtil.mergeInputs(Collections.singletonList(this.INPUT), false);

      final MapContainer mapContainer;
      if (ORGANISM != null && !ORGANISM.isEmpty()) {
          headerAndIterator = new SamHeaderAndIterator(headerAndIterator.header, new PrefixGeneWithOrganismIterator(headerAndIterator.iterator));
          mapContainer = new MultiOrganismMapContainer(cellBarcodes);
      } else
	mapContainer = new SingleOrganismMapContainer(cellBarcodes);

      // gene/exon tags are sorted first, followed by cells
      UMIIterator umiIterator = new UMIIterator(headerAndIterator, GENE_NAME_TAG, GENE_STRAND_TAG, GENE_FUNCTION_TAG,
      		this.STRAND_STRATEGY, this.LOCUS_FUNCTION_LIST, this.CELL_BARCODE_TAG, this.MOLECULAR_BARCODE_TAG,
      		this.READ_MQ, false, cellBarcodes);


      String gene = null;

      UMICollection batch;
      while ((batch=umiIterator.next())!=null) {
          if (batch.isEmpty())
		continue;
          String currentGene = batch.getGeneName();
          // if just starting the loop
          if (gene==null) gene=currentGene;
          // you've gathered all the data for the gene, write it out and start on the next.
          if (!gene.equals(currentGene))
		mapContainer.addToSummary(gene);
          // Accumulate this gene
          gene=currentGene;
          mapContainer.countExpression(gene, batch.getCellBarcode(), batch.getDigitalExpression(0, this.EDIT_DISTANCE, false));

      }
      // write out remainder
      mapContainer.addToSummary(gene);

      final Map<String, Integer> transcriptsPerCell = mapContainer.getTranscriptCountForCellBarcodesOverTranscriptThreshold(MIN_TRANSCRIPTS_PER_CELL);

      log.info("Found " + transcriptsPerCell.size() + " cells with enough transcripts");

      final Map.Entry<String, Integer> transcriptsPerCellArray[] = transcriptsPerCell.entrySet().toArray(new Map.Entry[transcriptsPerCell.size()]);
      Arrays.sort(transcriptsPerCellArray, new EntryComparator());

      final List<String> finalBarcodes = new ArrayList<>(transcriptsPerCellArray.length);
      for (final Map.Entry<String, Integer> entry: transcriptsPerCellArray)
	finalBarcodes.add(entry.getKey());

      writeBarcodes(OUTPUT, finalBarcodes);
      log.info("Wrote cell barcodes to " + OUTPUT.getAbsolutePath());
      CloserUtil.close(umiIterator);

      if (METRICS != null) {
          final Metrics m = mapContainer.accumulateMetrics(transcriptsPerCell.keySet());
          MetricsFile<Metrics, Integer> out = getMetricsFile();
          out.addMetric(m);
          out.write(METRICS);
      }
      return 0;
  }
 
Example 17
Source File: MultiLevelCollectorTest.java    From picard with MIT License 4 votes vote down vote up
@Override
public void addMetricsToFile(final MetricsFile<TotalNumberMetric, Integer> totalNumberMetricIntegerMetricsFile) {
    totalNumberMetricIntegerMetricsFile.addMetric(metric);
}
 
Example 18
Source File: CollectQualityYieldMetrics.java    From picard with MIT License 4 votes vote down vote up
public void addMetricsToFile(final MetricsFile<QualityYieldMetrics, Integer> metricsFile) {
    metricsFile.addMetric(metrics);
}
 
Example 19
Source File: MultiLevelReducibleCollectorUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
public void addMetricsToFile(final MetricsFile<TotalNumberMetric, Integer> totalNumberMetricIntegerMetricsFile) {
    totalNumberMetricIntegerMetricsFile.addMetric(metric);
}
 
Example 20
Source File: CollectVariantCallingMetrics.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(DBSNP);
    if (TARGET_INTERVALS != null) IOUtil.assertFileIsReadable(TARGET_INTERVALS);
    if (SEQUENCE_DICTIONARY != null) IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY.toPath());

    final boolean requiresIndex = this.TARGET_INTERVALS != null || this.THREAD_COUNT > 1;
    final VCFFileReader variantReader = new VCFFileReader(INPUT, requiresIndex);
    final VCFHeader vcfHeader = variantReader.getFileHeader();
    CloserUtil.close(variantReader);

    final SAMSequenceDictionary sequenceDictionary =
            SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY == null ? INPUT.toPath() : SEQUENCE_DICTIONARY.toPath());

    final IntervalList targetIntervals = (TARGET_INTERVALS == null) ? null : IntervalList.fromFile(TARGET_INTERVALS).uniqued();

    log.info("Loading dbSNP file ...");
    final DbSnpBitSetUtil.DbSnpBitSets dbsnp = DbSnpBitSetUtil.createSnpAndIndelBitSets(DBSNP, sequenceDictionary, targetIntervals, Optional.of(log));

    log.info("Starting iteration of variants.");

    final VariantProcessor.Builder<CallingMetricAccumulator, CallingMetricAccumulator.Result> builder =
            VariantProcessor.Builder
                    .generatingAccumulatorsBy(() -> {
                        CallingMetricAccumulator accumulator = GVCF_INPUT ? new GvcfMetricAccumulator(dbsnp) : new CallingMetricAccumulator(dbsnp);
                        accumulator.setup(vcfHeader);
                        return accumulator;
                    })
                    .combiningResultsBy(CallingMetricAccumulator.Result::merge)
                    .withInput(INPUT)
                    .multithreadingBy(THREAD_COUNT);

    if (targetIntervals != null) {
        builder.limitingProcessedRegionsTo(targetIntervals);
    }

    final CallingMetricAccumulator.Result result = builder.build().process();

    // Fetch and write the metrics.
    final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Integer> detail = getMetricsFile();
    final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Integer> summary = getMetricsFile();
    summary.addMetric(result.summary);
    result.details.forEach(detail::addMetric);

    final String outputPrefix = OUTPUT.getAbsolutePath() + ".";
    detail.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()));
    summary.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()));

    return 0;
}