Java Code Examples for htsjdk.samtools.SAMFileHeader#getReadGroups()

The following examples show how to use htsjdk.samtools.SAMFileHeader#getReadGroups() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: QualityScoreDistributionSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void saveResults(final MetricsFile<?, Byte> metrics,  final SAMFileHeader readsHeader, final String inputFileName) {
    MetricsUtils.saveMetrics(metrics, out);

    if (metrics.getAllHistograms().isEmpty()) {
        logger.warn("No valid bases found in input file.");
    } else if(chartOutput != null){
        // If we're working with a single library, assign that library's name
        // as a suffix to the plot title
        String plotSubtitle = "";
        final List<SAMReadGroupRecord> readGroups = readsHeader.getReadGroups();
        if (readGroups.size() == 1) {
            plotSubtitle = readGroups.get(0).getLibrary();
            if (null == plotSubtitle) {
                plotSubtitle = "";
            }
        }

        // Now run R to generate a chart
        //out is the metrics file
        //chartOutput is the pdf file with the graph
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(getQualityScoreDistributionRScriptResource());
        executor.addArgs(out, chartOutput.getAbsolutePath(), inputFileName, plotSubtitle);
        executor.exec();
    }
}
 
Example 2
Source File: MeanQualityByCycleSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void saveResults(final MetricsFile<?, Integer> metrics, final SAMFileHeader readsHeader, final String inputFileName){
    MetricsUtils.saveMetrics(metrics, out);

    if (metrics.getAllHistograms().isEmpty()) {
        logger.warn("No valid bases found in input file.");
    } else if (chartOutput != null){
        // Now run R to generate a chart

        // If we're working with a single library, assign that library's name
        // as a suffix to the plot title
        final List<SAMReadGroupRecord> readGroups = readsHeader.getReadGroups();

        /*
         * A subtitle for the plot, usually corresponding to a library.
         */
        String plotSubtitle = "";
        if (readGroups.size() == 1) {
            plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
        }
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(getMeanQualityByCycleRScriptResource());
        executor.addArgs(out, chartOutput.getAbsolutePath(), inputFileName, plotSubtitle);
        executor.exec();
    }
}
 
Example 3
Source File: ReadMetadata.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/** This constructor is for testing only.  It applies a single LibraryStatistics object to all libraries. */
@VisibleForTesting
ReadMetadata( final Set<Integer> crossContigIgnoreSet, final SAMFileHeader header,
              final LibraryStatistics stats, final PartitionBounds[] partitionBounds,
              final long nReads, final long maxReadsInPartition, final float coverage ) {
    this.crossContigIgnoreSet = crossContigIgnoreSet;
    contigNameToID = buildContigNameToIDMap(header.getSequenceDictionary());
    contigIDToName = buildContigIDToNameArray(contigNameToID);
    readGroupToLibrary = buildGroupToLibMap(header);
    this.nReads = nReads;
    nRefBases = header.getSequenceDictionary().getSequences()
            .stream().mapToLong(SAMSequenceRecord::getSequenceLength).sum();
    avgReadLen = (int)(coverage * nRefBases / nReads);
    this.maxReadsInPartition = maxReadsInPartition;
    this.coverage = coverage;
    meanBaseQuality = DEFAULT_MEAN_BASE_QUALITY_FOR_TESTING;
    this.partitionBounds = partitionBounds;
    libraryToFragmentStatistics = new HashMap<>(6);
    libraryToFragmentStatistics.put(null, stats);
    for ( final SAMReadGroupRecord readGroupRecord : header.getReadGroups() ) {
        libraryToFragmentStatistics.put(readGroupRecord.getLibrary(), stats);
    }
}
 
Example 4
Source File: Merge.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static SAMFileHeader mergeHeaders(List<RecordSource> sources) {
	SAMFileHeader header = new SAMFileHeader();
	for (RecordSource source : sources) {
		SAMFileHeader h = source.reader.getFileHeader();

		for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) {
			if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null)
				header.addSequence(seq);
		}

		for (SAMProgramRecord pro : h.getProgramRecords()) {
			if (header.getProgramRecord(pro.getProgramGroupId()) == null)
				header.addProgramRecord(pro);
		}

		for (String comment : h.getComments())
			header.addComment(comment);

		for (SAMReadGroupRecord rg : h.getReadGroups()) {
			if (header.getReadGroup(rg.getReadGroupId()) == null)
				header.addReadGroup(rg);
		}
	}

	return header;
}
 
Example 5
Source File: CollectBaseDistributionByCycleSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
protected void saveResults(final MetricsFile<?, Integer> metrics, final SAMFileHeader readsHeader, final String inputFileName) {
    MetricsUtils.saveMetrics(metrics, out);

    if (metrics.getAllHistograms().isEmpty()) {
        logger.warn("No valid bases found in input file.");
    } else if (chartOutput != null) {
        // Now run R to generate a chart

        // If we're working with a single library, assign that library's name
        // as a suffix to the plot title
        final List<SAMReadGroupRecord> readGroups = readsHeader.getReadGroups();

        /*
         * A subtitle for the plot, usually corresponding to a library.
         */
        String plotSubtitle = "";
        if (readGroups.size() == 1) {
            plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
        }
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(getBaseDistributionByCycleRScriptResource());
        executor.addArgs(out, chartOutput.getAbsolutePath(), inputFileName, plotSubtitle);
        executor.exec();
    }
}
 
Example 6
Source File: CollectRnaSeqMetrics.java    From picard with MIT License 6 votes vote down vote up
@Override
protected void setup(final SAMFileHeader header, final File samFile) {

    if (CHART_OUTPUT != null) IOUtil.assertFileIsWritable(CHART_OUTPUT);

    final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadRefFlat(REF_FLAT, header.getSequenceDictionary());
    LOG.info("Loaded " + geneOverlapDetector.getAll().size() + " genes.");

    final Long ribosomalBasesInitialValue = RIBOSOMAL_INTERVALS != null ? 0L : null;
    final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = RnaSeqMetricsCollector.makeOverlapDetector(samFile, header, RIBOSOMAL_INTERVALS, LOG);

    final HashSet<Integer> ignoredSequenceIndices = RnaSeqMetricsCollector.makeIgnoredSequenceIndicesSet(header, IGNORE_SEQUENCE);

    collector = new RnaSeqMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), ribosomalBasesInitialValue,
            geneOverlapDetector, ribosomalSequenceOverlapDetector, ignoredSequenceIndices, MINIMUM_LENGTH, STRAND_SPECIFICITY, RRNA_FRAGMENT_PERCENTAGE,
            true);

    // If we're working with a single library, assign that library's name as a suffix to the plot title
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() == 1) {
        this.plotSubtitle = readGroups.get(0).getLibrary();
        if (null == this.plotSubtitle) this.plotSubtitle = "";
    }
}
 
Example 7
Source File: CollectAlignmentSummaryMetrics.java    From picard with MIT License 6 votes vote down vote up
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(OUTPUT);

    if (header.getSequenceDictionary().isEmpty()) {
        log.warn(INPUT.getAbsoluteFile() + " has no sequence dictionary.  If any reads " +
                "in the file are aligned, then alignment summary metrics collection will fail.");
    }

    if(REFERENCE_SEQUENCE == null && COLLECT_ALIGNMENT_INFORMATION) {
        log.warn("Without a REFERENCE_SEQUENCE, metrics pertaining to mismatch rates will not be collected!");
    }

    collector = new AlignmentSummaryMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), COLLECT_ALIGNMENT_INFORMATION,
            ADAPTER_SEQUENCE, MAX_INSERT_SIZE, EXPECTED_PAIR_ORIENTATIONS, IS_BISULFITE_SEQUENCED);
}
 
Example 8
Source File: CollectBaseDistributionByCycle.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(CHART_OUTPUT);
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() == 1) {
        plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
    }
    hist = new HistogramGenerator();
}
 
Example 9
Source File: CollectInsertSizeMetrics.java    From picard with MIT License 5 votes vote down vote up
@Override protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsWritable(Histogram_FILE);

    //Delegate actual collection to InsertSizeMetricCollector
    multiCollector = new InsertSizeMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), MINIMUM_PCT,
            HISTOGRAM_WIDTH, MIN_HISTOGRAM_WIDTH, DEVIATIONS, INCLUDE_DUPLICATES);
}
 
Example 10
Source File: RevertSam.java    From picard with MIT License 5 votes vote down vote up
/**
 * If we are going to override SAMPLE_ALIAS or LIBRARY_NAME, make sure all the read
 * groups have the same values.
 */
static void validateHeaderOverrides(
        final SAMFileHeader inHeader,
        final String sampleAlias,
        final String libraryName) {

    final List<SAMReadGroupRecord> rgs = inHeader.getReadGroups();
    if (sampleAlias != null || libraryName != null) {
        boolean allSampleAliasesIdentical = true;
        boolean allLibraryNamesIdentical = true;
        for (int i = 1; i < rgs.size(); i++) {
            if (!rgs.get(0).getSample().equals(rgs.get(i).getSample())) {
                allSampleAliasesIdentical = false;
            }
            if (!rgs.get(0).getLibrary().equals(rgs.get(i).getLibrary())) {
                allLibraryNamesIdentical = false;
            }
        }
        if (sampleAlias != null && !allSampleAliasesIdentical) {
            throw new PicardException("Read groups have multiple values for sample.  " +
                    "A value for SAMPLE_ALIAS cannot be supplied.");
        }
        if (libraryName != null && !allLibraryNamesIdentical) {
            throw new PicardException("Read groups have multiple values for library name.  " +
                    "A value for library name cannot be supplied.");
        }
    }
}
 
Example 11
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * @param header combined sam header
 * @return creates a map of read group to sample id
 */
public static Map<String, String> getReadGroupToSampleId(final SAMFileHeader header) {
  final HashMap<String, String> readGroupToSampleMap = new HashMap<>();
  for (final SAMReadGroupRecord rec : header.getReadGroups()) {
    //System.err.println("k=" + rec.getReadGroupId() + " v=" + rec.getSample());
    if (rec.getSample() == null) {
      throw new NoTalkbackSlimException("Read group with ID \"" + rec.getReadGroupId() + "\" does not contain a sample tag.");
    }
    readGroupToSampleMap.put(rec.getReadGroupId(), rec.getSample());
  }
  return readGroupToSampleMap;
}
 
Example 12
Source File: QualityScoreDistribution.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(OUTPUT);
    IOUtil.assertFileIsWritable(CHART_OUTPUT);

    // If we're working with a single library, assign that library's name
    // as a suffix to the plot title
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() == 1) {
        this.plotSubtitle = readGroups.get(0).getLibrary();
        if (null == this.plotSubtitle) this.plotSubtitle = "";
    }
}
 
Example 13
Source File: MeanQualityByCycle.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(CHART_OUTPUT);
    // If we're working with a single library, assign that library's name
    // as a suffix to the plot title
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() == 1) {
        plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
    }
}
 
Example 14
Source File: BAMDiff.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
private boolean compareReadGroups(final SAMFileHeader h1, final SAMFileHeader h2) throws Exception {
  final List<SAMReadGroupRecord> l1 = h1.getReadGroups();
  final List<SAMReadGroupRecord> l2 = h2.getReadGroups();
  if (!compareValues(l1.size(), l2.size(), "Number of read groups")) {
      return false;
  }
  boolean ret = true;
  for (int i = 0; i < l1.size(); ++i) {
      ret = compareReadGroup(l1.get(i), l2.get(i)) && ret;
  }
  return ret;
}
 
Example 15
Source File: LibraryIdGenerator.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public LibraryIdGenerator(final SAMFileHeader header) {
    this.header = header;

    for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
        final String library = getReadGroupLibraryName(readGroup);
        GATKDuplicationMetrics metrics = metricsByLibrary.get(library);
        if (metrics == null) {
            metrics = new GATKDuplicationMetrics();
            metrics.LIBRARY = library;
            metricsByLibrary.put(library, metrics);
        }
    }
}
 
Example 16
Source File: MarkDuplicatesSparkUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Method which generates a map of the readgroups from the header so they can be serialized as indexes
 */
private static Map<String, Short> getHeaderReadGroupIndexMap(final SAMFileHeader header) {
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() > 65535) {
        throw new GATKException("Detected too many read groups in the header, currently MarkDuplicatesSpark only supports up to 65535 unique readgroup IDs but " + readGroups.size() + " were found");
    }
    if (readGroups.size()==0) {
        throw new UserException.BadInput("Sam file header missing Read Group fields. MarkDuplicatesSpark currently requires reads to be labeled with read group tags, please add read groups tags to your reads");
    }
    final Iterator<Short> iterator = IntStream.range(0, readGroups.size()).boxed().map(Integer::shortValue).iterator();
    return Maps.uniqueIndex(iterator, idx -> readGroups.get(idx).getId() );
}
 
Example 17
Source File: RevertSam.java    From picard with MIT License 5 votes vote down vote up
private Map<String, SAMFileHeader> createHeaderMap(
        final SAMFileHeader inHeader,
        final SortOrder sortOrder,
        final boolean removeAlignmentInformation) {

    final Map<String, SAMFileHeader> headerMap = new HashMap<>();
    for (final SAMReadGroupRecord readGroup : inHeader.getReadGroups()) {
        final SAMFileHeader header = createOutHeader(inHeader, sortOrder, removeAlignmentInformation);
        header.addReadGroup(readGroup);
        headerMap.put(readGroup.getId(), header);
    }
    return headerMap;
}
 
Example 18
Source File: ReadMetadata.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static Map<String, String> buildGroupToLibMap( final SAMFileHeader header ) {
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    final int mapCapacity = SVUtils.hashMapCapacity(header.getReadGroups().size());
    final Map<String, String> readGroupToLibraryMap = new HashMap<>(mapCapacity);
    for ( final SAMReadGroupRecord groupRecord : readGroups ) {
        readGroupToLibraryMap.put(groupRecord.getId(), groupRecord.getLibrary());
    }
    return readGroupToLibraryMap;
}
 
Example 19
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Get a list of the sample names mentioned in the header in sorted order.
 * @param header combined sam header
 * @return creates an array of sample names
 */
public static String[] getSampleNames(final SAMFileHeader header) {
  final Set<String> sampleNames = new TreeSet<>();
  for (final SAMReadGroupRecord rec : header.getReadGroups()) {
    if (rec.getSample() != null) {
      sampleNames.add(rec.getSample());
    }
  }
  return sampleNames.toArray(new String[0]);
}
 
Example 20
Source File: RevertSam.java    From picard with MIT License 4 votes vote down vote up
private Map<SAMReadGroupRecord, FastqQualityFormat> createReadGroupFormatMap(
        final SAMFileHeader inHeader,
        final File referenceSequence,
        final ValidationStringency validationStringency,
        final File input,
        final boolean restoreOriginalQualities) {

    final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat = new HashMap<>();

    final SamReaderFactory readerFactory =
            SamReaderFactory.makeDefault().referenceSequence(referenceSequence).validationStringency(validationStringency);
    // Figure out the quality score encoding scheme for each read group.
    for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) {
        final SamRecordFilter filter = new SamRecordFilter() {
            public boolean filterOut(final SAMRecord rec) {
                return !rec.getReadGroup().getId().equals(rg.getId());
            }

            public boolean filterOut(final SAMRecord first, final SAMRecord second) {
                throw new UnsupportedOperationException();
            }
        };
        try (final SamReader reader = readerFactory.open(input)) {
            final FilteringSamIterator filterIterator = new FilteringSamIterator(reader.iterator(), filter);
            if (filterIterator.hasNext()) {
                readGroupToFormat.put(rg, QualityEncodingDetector.detect(
                        QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, filterIterator, restoreOriginalQualities));
            } else {
                log.warn("Skipping read group " + rg.getReadGroupId() + " with no reads");
            }
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }

    for (final SAMReadGroupRecord r : readGroupToFormat.keySet()) {
        log.info("Detected quality format for " + r.getReadGroupId() + ": " + readGroupToFormat.get(r));
    }
    if (readGroupToFormat.values().contains(FastqQualityFormat.Solexa)) {
        throw new PicardException("No quality score encoding conversion implemented for " + FastqQualityFormat.Solexa);
    }

    return readGroupToFormat;
}