htsjdk.samtools.SAMReadGroupRecord Java Examples

The following examples show how to use htsjdk.samtools.SAMReadGroupRecord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SingleCellRnaSeqMetricsCollector.java    From Drop-seq with MIT License 7 votes vote down vote up
RnaSeqMetricsCollector getRNASeqMetricsCollector(final String cellBarcodeTag, final List<String> cellBarcodes, final File inBAM,
  		final RnaSeqMetricsCollector.StrandSpecificity strand, final double rRNAFragmentPCT, final int readMQ,
  		final File annotationsFile, final File rRNAIntervalsFile) {

  	CollectorFactory factory = new CollectorFactory(inBAM, strand, rRNAFragmentPCT, annotationsFile, rRNAIntervalsFile);
RnaSeqMetricsCollector collector=  factory.getCollector(cellBarcodes);
List<SAMReadGroupRecord> rg = factory.getReadGroups(cellBarcodes);

      // iterate by cell barcodes.  Skip all the reads without cell barcodes.
CloseableIterator<SAMRecord> iter = getReadsInTagOrder (inBAM, cellBarcodeTag, rg, cellBarcodes, readMQ);

      ProgressLogger p = new ProgressLogger(log, 1000000, "Accumulating metrics");
while (iter.hasNext()) {
	SAMRecord r = iter.next();
	String cellBarcode = r.getStringAttribute(cellBarcodeTag);
	r.setAttribute("RG", cellBarcode);
          p.record(r);
   	collector.acceptRecord(r, null);
}

collector.finish();
return (collector);
  }
 
Example #2
Source File: SamToFastq.java    From picard with MIT License 6 votes vote down vote up
private File makeReadGroupFile(final SAMReadGroupRecord readGroup, final String preExtSuffix) {
    String fileName = null;
    if (RG_TAG.equalsIgnoreCase("PU")) {
        fileName = readGroup.getPlatformUnit();
    } else if (RG_TAG.equalsIgnoreCase("ID")) {
        fileName = readGroup.getReadGroupId();
    }
    if (fileName == null) {
        throw new PicardException("The selected RG_TAG: " + RG_TAG + " is not present in the bam header.");
    }
    fileName = IOUtil.makeFileNameSafe(fileName);
    if (preExtSuffix != null) {
        fileName += preExtSuffix;
    }
    fileName += COMPRESS_OUTPUTS_PER_RG ? ".fastq.gz" : ".fastq";

    final File result = (OUTPUT_DIR != null)
            ? new File(OUTPUT_DIR, fileName)
            : new File(fileName);
    IOUtil.assertFileIsWritable(result);
    return result;
}
 
Example #3
Source File: HsMetricCollector.java    From picard with MIT License 6 votes vote down vote up
public HsMetricCollector(final Set<MetricAccumulationLevel> accumulationLevels,
                         final List<SAMReadGroupRecord> samRgRecords,
                         final ReferenceSequenceFile refFile,
                         final File perTargetCoverage,
                         final File perBaseCoverage,
                         final IntervalList targetIntervals,
                         final IntervalList probeIntervals,
                         final String probeSetName,
                         final int nearProbeDistance,
                         final int minimumMappingQuality,
                         final int minimumBaseQuality,
                         final boolean clipOverlappingReads,
                         final int coverageCap,
                         final int sampleSize) {
    super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, coverageCap, sampleSize);
}
 
Example #4
Source File: CollectRnaSeqMetrics.java    From picard with MIT License 6 votes vote down vote up
@Override
protected void setup(final SAMFileHeader header, final File samFile) {

    if (CHART_OUTPUT != null) IOUtil.assertFileIsWritable(CHART_OUTPUT);

    final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadRefFlat(REF_FLAT, header.getSequenceDictionary());
    LOG.info("Loaded " + geneOverlapDetector.getAll().size() + " genes.");

    final Long ribosomalBasesInitialValue = RIBOSOMAL_INTERVALS != null ? 0L : null;
    final OverlapDetector<Interval> ribosomalSequenceOverlapDetector = RnaSeqMetricsCollector.makeOverlapDetector(samFile, header, RIBOSOMAL_INTERVALS, LOG);

    final HashSet<Integer> ignoredSequenceIndices = RnaSeqMetricsCollector.makeIgnoredSequenceIndicesSet(header, IGNORE_SEQUENCE);

    collector = new RnaSeqMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), ribosomalBasesInitialValue,
            geneOverlapDetector, ribosomalSequenceOverlapDetector, ignoredSequenceIndices, MINIMUM_LENGTH, STRAND_SPECIFICITY, RRNA_FRAGMENT_PERCENTAGE,
            true);

    // If we're working with a single library, assign that library's name as a suffix to the plot title
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() == 1) {
        this.plotSubtitle = readGroups.get(0).getLibrary();
        if (null == this.plotSubtitle) this.plotSubtitle = "";
    }
}
 
Example #5
Source File: MeanQualityByCycleSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void saveResults(final MetricsFile<?, Integer> metrics, final SAMFileHeader readsHeader, final String inputFileName){
    MetricsUtils.saveMetrics(metrics, out);

    if (metrics.getAllHistograms().isEmpty()) {
        logger.warn("No valid bases found in input file.");
    } else if (chartOutput != null){
        // Now run R to generate a chart

        // If we're working with a single library, assign that library's name
        // as a suffix to the plot title
        final List<SAMReadGroupRecord> readGroups = readsHeader.getReadGroups();

        /*
         * A subtitle for the plot, usually corresponding to a library.
         */
        String plotSubtitle = "";
        if (readGroups.size() == 1) {
            plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
        }
        final RScriptExecutor executor = new RScriptExecutor();
        executor.addScript(getMeanQualityByCycleRScriptResource());
        executor.addArgs(out, chartOutput.getAbsolutePath(), inputFileName, plotSubtitle);
        executor.exec();
    }
}
 
Example #6
Source File: FragmentUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private GATKRead makeOverlappingRead(final String leftFlank, final int leftQual, final String overlapBases,
                                          final byte[] overlapQuals, final String rightFlank, final int rightQual,
                                          final int alignmentStart) {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    header.addReadGroup(new SAMReadGroupRecord("RG1"));
    final String bases = leftFlank + overlapBases + rightFlank;
    final int readLength = bases.length();
    final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, alignmentStart, readLength);
    final byte[] leftQuals = Utils.dupBytes((byte) leftQual, leftFlank.length());
    final byte[] rightQuals = Utils.dupBytes((byte) rightQual, rightFlank.length());
    final byte[] quals = Utils.concat(leftQuals, overlapQuals, rightQuals);
    read.setCigar(readLength + "M");
    read.setBases(bases.getBytes());
    read.setBaseQualities(quals);
    read.setReadGroup("RG1");
    read.setMappingQuality(60);
    return read;
}
 
Example #7
Source File: ReadGroupUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * Acquire machine type from read group if possible. Assumes platform has been set in read group
 * @param srgr the read group record.
 * @param paired if reads are paired or not
 * @return machine type if recognized, otherwise null
 */
public static MachineType platformToMachineType(SAMReadGroupRecord srgr, boolean paired) {
  if (MachineType.COMPLETE_GENOMICS.compatiblePlatform(srgr.getPlatform())) {
    return MachineType.COMPLETE_GENOMICS;
  } else if (MachineType.COMPLETE_GENOMICS_2.compatiblePlatform(srgr.getPlatform())) {
    return MachineType.COMPLETE_GENOMICS_2;
  } else if (MachineType.FOURFIVEFOUR_PE.compatiblePlatform(srgr.getPlatform()) || MachineType.FOURFIVEFOUR_SE.compatiblePlatform(srgr.getPlatform())) {
    if (paired) {
      return MachineType.FOURFIVEFOUR_PE;
    } else {
      return MachineType.FOURFIVEFOUR_SE;
    }
  } else if (MachineType.ILLUMINA_PE.compatiblePlatform(srgr.getPlatform()) || MachineType.ILLUMINA_SE.compatiblePlatform(srgr.getPlatform())) {
    if (paired) {
      return MachineType.ILLUMINA_PE;
    } else {
      return MachineType.ILLUMINA_SE;
    }
  } else if (MachineType.IONTORRENT.compatiblePlatform(srgr.getPlatform())) {
    return MachineType.IONTORRENT;
  }
  return null;
}
 
Example #8
Source File: SamCommandHelper.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * This validates an retrieves from a sam file the read group matching the {@code selectReadGroup} parameter
 * @param rgFile sam file containing the read group
 * @param selectReadGroup the read group ID to locate
 * @return a {@code SAM ReadGroupRecord} that corresponds to the requested id
 * @throws java.io.IOException if IO falls over when reading the SAM file
 */
public static SAMReadGroupRecord validateSelectedSamRG(File rgFile, String selectReadGroup) throws IOException {
  try (BufferedInputStream bis = FileUtils.createInputStream(rgFile, false)) {
    final SamReader sfr = SamUtils.makeSamReader(bis);
    final List<SAMReadGroupRecord> readGroups = sfr.getFileHeader().getReadGroups();
    if (readGroups.isEmpty()) {
      throw new InvalidParamsException("No read group information matching \"" + selectReadGroup + "\" present in the input file \"" + rgFile.getPath() + "\"");
    }
    for (SAMReadGroupRecord r : readGroups) {
      if (selectReadGroup.equals(r.getId())) {
        if (r.getSample() == null) {
          Diagnostic.warning("Sample not specified in read group, it is recommended to set the sample tag.");
        }
        return r;
      }
    }
    throw new InvalidParamsException("No read group information matching \"" + selectReadGroup + "\" present in the input file \"" + rgFile.getPath() + "\"");
  }

}
 
Example #9
Source File: HalvadeReducer.java    From halvade with GNU General Public License v3.0 6 votes vote down vote up
protected SAMReadGroupRecord createReadGroupRecord(
        String RGID, String RGLB, String RGPL, 
        String RGPU, String RGSM, String RGCN, 
        String RGDS, Iso8601Date RGDT, Integer RGPI) {
    SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
    rg.setLibrary(RGLB);
    rg.setPlatform(RGPL);
    rg.setSample(RGSM);
    rg.setPlatformUnit(RGPU);
    if(RGCN != null)
        rg.setSequencingCenter(RGCN);
    if(RGDS != null)
        rg.setDescription(RGDS);
    if(RGDT != null)
        rg.setRunDate(RGDT);
    if(RGPI != null)
        rg.setPredictedMedianInsertSize(RGPI);
    return rg;
}
 
Example #10
Source File: FormatCli.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
/**
 * @param protein true if processing as protein
 * @param dusting true if dusting should be performed
 * @param inputDescription the input file format
 * @param outDir where the result is to be placed.
 * @param out Print Stream to print summary to.
 * @param namesToExclude names of sequences to be excluded
 * @param includeQuality true if quality data to be included in output, false otherwise
 * @param includeNames true if name data to be included in output, false otherwise
 * @param compressed whether <code>SDF</code> should be compressed
 * @param mappedSam true to use the mapped SAM implementation
 * @param samReadGroup the read group ID as selected by the user. May be null if there is only one read group, not mapping from SAM/BAM, or no read group has been given.
 * @param readGroupRecord the read group for the sam file
 * @param dedupSecondary true to de-duplicate secondary alignments by name. Useful for processing RTG's SAM/BAM output.
 */
PrereadExecutor(boolean protein, boolean dusting, DataSourceDescription inputDescription, File outDir, PrintStream out,
                Collection<String> namesToExclude, boolean includeQuality, boolean includeNames, boolean compressed,
                boolean mappedSam, String samReadGroup, SAMReadGroupRecord readGroupRecord, boolean dedupSecondary) {
  mProtein = protein;
  mDusting = dusting;
  mInputDescription = inputDescription;
  mOutDir = outDir;
  mOut = out;
  mNamesToExclude = namesToExclude;
  mIncludeQuality = includeQuality;
  mIncludeNames = includeNames;
  mCompressed = compressed;
  mMappedSam = mappedSam;
  mSamReadGroup = samReadGroup;
  mReadGroupRecord = readGroupRecord;
  mDedupSecondary = dedupSecondary;
}
 
Example #11
Source File: MultiLevelCollector.java    From picard with MIT License 6 votes vote down vote up
/** Call acceptRecord(args) on the record collector identified by getKey */
public void acceptRecord(final ARGTYPE args, final SAMReadGroupRecord rg) {

    String key = UNKNOWN;
    if(rg != null) {
        final String computedKey = getKey(rg);
        if(computedKey != null) {
            key = computedKey;
        }
    }
    PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE> collector = collectors.get(key);
    if (collector == null) {
        if (!UNKNOWN.equals(key)) {
            throw new PicardException("Could not find collector for " + key);
        }
        collector = makeUnknownCollector();
        collectors.put(key, collector);
    }
    collector.acceptRecord(args);
}
 
Example #12
Source File: HaplotypeBAMDestination.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a new HaplotypeBAMDestination
 *
 * @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination.
 * @param haplotypeReadGroupID read group ID used when writing haplotypes as reads
 */
protected HaplotypeBAMDestination(SAMFileHeader sourceHeader, final String haplotypeReadGroupID) {
    Utils.nonNull(sourceHeader, "sourceHeader cannot be null");
    Utils.nonNull(haplotypeReadGroupID, "haplotypeReadGroupID cannot be null");
    this.haplotypeReadGroupID = haplotypeReadGroupID;

    bamOutputHeader = new SAMFileHeader();
    bamOutputHeader.setSequenceDictionary(sourceHeader.getSequenceDictionary());
    bamOutputHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);

    final List<SAMReadGroupRecord> readGroups = new ArrayList<>();
    readGroups.addAll(sourceHeader.getReadGroups()); // include the original read groups

    // plus an artificial read group for the haplotypes
    final SAMReadGroupRecord rgRec = new SAMReadGroupRecord(getHaplotypeReadGroupID());
    rgRec.setSample(haplotypeSampleTag);
    rgRec.setSequencingCenter("BI");
    readGroups.add(rgRec);
    bamOutputHeader.setReadGroups(readGroups);
    final List<SAMProgramRecord> programRecords = new ArrayList<>(sourceHeader.getProgramRecords());
    programRecords.add(new SAMProgramRecord("HaplotypeBAMWriter"));
    bamOutputHeader.setProgramRecords(programRecords);
}
 
Example #13
Source File: Merge.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static SAMFileHeader mergeHeaders(List<RecordSource> sources) {
	SAMFileHeader header = new SAMFileHeader();
	for (RecordSource source : sources) {
		SAMFileHeader h = source.reader.getFileHeader();

		for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) {
			if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null)
				header.addSequence(seq);
		}

		for (SAMProgramRecord pro : h.getProgramRecords()) {
			if (header.getProgramRecord(pro.getProgramGroupId()) == null)
				header.addProgramRecord(pro);
		}

		for (String comment : h.getComments())
			header.addComment(comment);

		for (SAMReadGroupRecord rg : h.getReadGroups()) {
			if (header.getReadGroup(rg.getReadGroupId()) == null)
				header.addReadGroup(rg);
		}
	}

	return header;
}
 
Example #14
Source File: DefaultSamFilterTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void testFilterReadGroup() {
  final SamFilterParams.SamFilterParamsBuilder builder = SamFilterParams.builder().selectReadGroups(Collections.singleton("rg1"));
  final DefaultSamFilter f = new DefaultSamFilter(builder.create());
  final DefaultSamFilter notf = new DefaultSamFilter(builder.invertFilters(true).create());
  final SAMFileHeader h = new SAMFileHeader();
  final SAMReadGroupRecord r1 = new SAMReadGroupRecord("rg1");
  final SAMReadGroupRecord r2 = new SAMReadGroupRecord("rg2");
  h.addReadGroup(r1);
  h.addReadGroup(r2);
  final SAMRecord rec = new SAMRecord(h); // Not unmapped but alignment position == 0
  assertFalse(f.acceptRecord(rec));
  assertTrue(notf.acceptRecord(rec));
  rec.setAttribute(ReadGroupUtils.RG_ATTRIBUTE, r1.getReadGroupId());
  assertTrue(f.acceptRecord(rec));
  assertFalse(notf.acceptRecord(rec));
  rec.setAttribute(ReadGroupUtils.RG_ATTRIBUTE, r2.getReadGroupId());
  assertFalse(f.acceptRecord(rec));
  assertTrue(notf.acceptRecord(rec));
}
 
Example #15
Source File: FingerprintChecker.java    From picard with MIT License 6 votes vote down vote up
private FingerprintIdDetails createUnknownFP(final Path samFile, final SAMRecord rec) {
    final PicardException e = new PicardException("Found read with no readgroup: " + rec.getReadName() + " in file: " + samFile);
    if (validationStringency != ValidationStringency.STRICT) {
        final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("<UNKNOWN>:::" + samFile.toUri().toString());
        readGroupRecord.setLibrary("<UNKNOWN>");
        readGroupRecord.setSample(defaultSampleID);
        readGroupRecord.setPlatformUnit("<UNKNOWN>.0.ZZZ");

        if (validationStringency != ValidationStringency.SILENT && missingRGFiles.add(samFile)) {
            log.warn(e.getMessage());
            log.warn("further messages from this file will be suppressed");
        }

        return new FingerprintIdDetails(readGroupRecord, samFile.toUri().toString());
    } else {
        log.error(e.getMessage());
        throw e;
    }
}
 
Example #16
Source File: AlignmentContextUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions = UserException.ReadMissingReadGroup.class)
public void testNoSample() throws Exception {
    final SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");

    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);

    final Locatable loc = new SimpleInterval("chr1", 1, 1);
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header,"read1",0,1,10);
    read1.setReadGroup(readGroupOne.getId());

    final ReadPileup pileup = new ReadPileup(loc, Arrays.asList(read1), 1);
    final AlignmentContext ac = new AlignmentContext(loc, pileup);
    ac.splitContextBySampleName(header);

}
 
Example #17
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
/** Creates a simple header with the values provided on the command line. */
public SAMFileHeader createSamFileHeader() {
    final SAMReadGroupRecord rgroup = new SAMReadGroupRecord(this.READ_GROUP_NAME);
    rgroup.setSample(this.SAMPLE_NAME);
    if (this.LIBRARY_NAME != null) rgroup.setLibrary(this.LIBRARY_NAME);
    if (this.PLATFORM != null) rgroup.setPlatform(this.PLATFORM);
    if (this.PLATFORM_UNIT != null) rgroup.setPlatformUnit(this.PLATFORM_UNIT);
    if (this.SEQUENCING_CENTER != null) rgroup.setSequencingCenter(SEQUENCING_CENTER);
    if (this.PREDICTED_INSERT_SIZE != null) rgroup.setPredictedMedianInsertSize(PREDICTED_INSERT_SIZE);
    if (this.DESCRIPTION != null) rgroup.setDescription(this.DESCRIPTION);
    if (this.RUN_DATE != null) rgroup.setRunDate(this.RUN_DATE);
    if (this.PLATFORM_MODEL != null) rgroup.setPlatformModel(this.PLATFORM_MODEL);
    if (this.PROGRAM_GROUP != null) rgroup.setProgramGroup(this.PROGRAM_GROUP);

    final SAMFileHeader header = new SAMFileHeader();
    header.addReadGroup(rgroup);

    for (final String comment : COMMENT) {
        header.addComment(comment);
    }

    header.setSortOrder(this.SORT_ORDER);
    return header ;
}
 
Example #18
Source File: BAMDiff.java    From dataflow-java with Apache License 2.0 6 votes vote down vote up
private boolean compareReadGroup(final SAMReadGroupRecord samReadGroupRecord1, final SAMReadGroupRecord samReadGroupRecord2) throws Exception {
  boolean ret = compareValues(samReadGroupRecord1.getReadGroupId(), samReadGroupRecord2.getReadGroupId(),
          "Read Group ID");
  ret = compareValues(samReadGroupRecord1.getSample(), samReadGroupRecord2.getSample(),
          "Sample for read group " + samReadGroupRecord1.getReadGroupId()) && ret;
  ret = compareValues(samReadGroupRecord1.getLibrary(), samReadGroupRecord2.getLibrary(),
          "Library for read group " + samReadGroupRecord1.getReadGroupId()) && ret;
  final String[] attributes = {"DS", "PU", "PI", "CN", "DT", "PL"};
  for (final String attribute : attributes) {
      String a1 = samReadGroupRecord1.getAttribute(attribute);
      String a2 = samReadGroupRecord2.getAttribute(attribute);
      if (options.ignoreNullVsZeroPI && attribute.equals("PI")) {
        if (a1 == null) {
          a1 = "0";
        }
        if (a2 == null) {
          a2 = "0";
        }
      }
      ret = compareValues(a1, a2,
              attribute + " for read group " + samReadGroupRecord1.getReadGroupId()) && ret;
  }
  return ret;
}
 
Example #19
Source File: ReadGroupBlackListReadFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public boolean test( final GATKRead read ) {
    final SAMReadGroupRecord readGroup = ReadUtils.getSAMReadGroupRecord(read, samHeader);
    if ( readGroup == null ) {
        return true;
    }

    for (final String attributeType : blacklistEntries.keySet()) {

        final String attribute;
        if (SAMReadGroupRecord.READ_GROUP_ID_TAG.equals(attributeType) || SAMTag.RG.name().equals(attributeType)) {
            attribute = readGroup.getId();
        } else {
            attribute = readGroup.getAttribute(attributeType);
        }
        if (attribute != null && blacklistEntries.get(attributeType).contains(attribute)) {
            return false;
        }
    }

    return true;
}
 
Example #20
Source File: AlignmentContextIteratorBuilder.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 *  Create the appropriate instance of an alignment context spliterator based on the input parameters.
 *
 *  Please note that this wrapper is still tied to {@link LocusIteratorByState} and some parameters are being passed directly to that class.
 *
 * @param intervalsForTraversal the intervals to generate alignment contexts over.
 * @param header SAM file header to use
 * @param readIterator iterator of sorted GATK reads
 * @param dictionary the SAMSequenceDictionary being used for this traversal.  This can be the same as the reference.  {@code null} is supported, but will often lead to invalid parameter combinations.
 * @param downsamplingInfo how to downsample (for {@link LocusIteratorByState})
 * @param isReference the dictionary specified above is a reference, {@code false} if no reference being used or it is not a reference.
 * @param emitEmptyLoci whether loci with no coverage should be emitted.  In this case, the AlignmentContext will be empty (not null).
 * @param isKeepUniqueReadListInLibs if true, we will keep the unique reads from the samIterator and make them
 *                                       available via the transferReadsFromAllPreviousPileups interface (this parameter is specific to {@link LocusIteratorByState})
 * @param isIncludeDeletions include reads with deletion on the loci in question
 * @param isIncludeNs include reads with N on the loci in question
 * @return iterator that produces AlignmentContexts ready for consumption (e.g. by a {@link org.broadinstitute.hellbender.engine.LocusWalker})
 */
private static Iterator<AlignmentContext> createAlignmentContextIterator(final List<SimpleInterval> intervalsForTraversal,
                                                                         final SAMFileHeader header,
                                                                         final Iterator<GATKRead> readIterator,
                                                                         final SAMSequenceDictionary dictionary,
                                                                         final LIBSDownsamplingInfo downsamplingInfo,
                                                                         final boolean isReference,
                                                                         boolean emitEmptyLoci,
                                                                         boolean isKeepUniqueReadListInLibs,
                                                                         boolean isIncludeDeletions,
                                                                         boolean isIncludeNs) {

    // get the samples from the read groups
    final Set<String> samples = header.getReadGroups().stream()
            .map(SAMReadGroupRecord::getSample)
            .collect(Collectors.toSet());

    // get the LIBS
    final LocusIteratorByState libs = new LocusIteratorByState(readIterator, downsamplingInfo, isKeepUniqueReadListInLibs, samples, header, isIncludeDeletions, isIncludeNs);

    List<SimpleInterval> finalIntervals = intervalsForTraversal;
    validateEmitEmptyLociParameters(emitEmptyLoci, dictionary, intervalsForTraversal, isReference);
    if (emitEmptyLoci) {

        // If no intervals were specified, then use the entire reference (or best available sequence dictionary).
        if (!areIntervalsSpecified(finalIntervals)) {
            finalIntervals = IntervalUtils.getAllIntervalsForReference(dictionary);
        }
        final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(finalIntervals.iterator());
        return new IntervalAlignmentContextIterator(libs, intervalLocusIterator, header.getSequenceDictionary());
    } else if (areIntervalsSpecified(finalIntervals)) {
        return new IntervalOverlappingIterator<>(libs, finalIntervals, header.getSequenceDictionary());
    } else {
        // prepare the iterator
        return libs;
    }
}
 
Example #21
Source File: XGBoostEvidenceFilterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static SAMFileHeader initSAMFileHeader() {
    final SAMFileHeader samHeader = createArtificialSamHeader();
    SAMReadGroupRecord readGroup = new SAMReadGroupRecord(readGroupName);
    readGroup.setSample(DEFAULT_SAMPLE_NAME);
    samHeader.addReadGroup(readGroup);
    return samHeader;
}
 
Example #22
Source File: MultiLevelCollector.java    From picard with MIT License 5 votes vote down vote up
public Distributor(final List<SAMReadGroupRecord> rgRecs) {
    collectors = new LinkedHashMap<String, PerUnitMetricCollector<METRIC_TYPE, Histogram_KEY, ARGTYPE>>();
    for(final SAMReadGroupRecord rg : rgRecs) {
        final String key = getKey(rg);
        if(!collectors.containsKey(key)) {
            collectors.put(key, makeCollector(rg));
        }
    }
}
 
Example #23
Source File: ReadGroupCovariateUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testMissingPlatformUnit() {
    final String expected = "MY.7";
    final ReadGroupCovariate covariate = new ReadGroupCovariate(new RecalibrationArgumentCollection(), Arrays.asList(expected));
    SAMReadGroupRecord rg = new SAMReadGroupRecord(expected);
    runTest(rg, expected, covariate);
}
 
Example #24
Source File: CollectTargetedMetrics.java    From picard with MIT License 5 votes vote down vote up
/**
 * A factory method for the TargetMetricsCollector to use this time.  Examples of TargetMetricsCollector:
 * (TargetedPcrMetricsCollector, HsMetricsCalculator)
 *
 * @return A TargetMetricsCollector to which we will pass SAMRecords
 */
protected abstract COLLECTOR makeCollector(final Set<MetricAccumulationLevel> accumulationLevels,
                                           final List<SAMReadGroupRecord> samRgRecords,
                                           final ReferenceSequenceFile refFile,
                                           final File perTargetCoverage,
                                           final File perBaseCoverage,
                                           final IntervalList targetIntervals,
                                           final IntervalList probeIntervals,
                                           final String probeSetName,
                                           final int nearProbeDistance);
 
Example #25
Source File: LibraryIdGenerator.java    From picard with MIT License 5 votes vote down vote up
/**
 * Gets the library name from the header for the record. If the RG tag is not present on
 * the record, or the library isn't denoted on the read group, a constant string is
 * returned.
 */
public static String getLibraryName(final SAMFileHeader header, final SAMRecord rec) {
    final String readGroupId = (String) rec.getAttribute(ReservedTagConstants.READ_GROUP_ID);

    if (readGroupId != null) {
        final SAMReadGroupRecord rg = header.getReadGroup(readGroupId);
        if (rg != null) {
            final String libraryName = rg.getLibrary();
            if (null != libraryName) return libraryName;
        }
    }

    return UNKNOWN_LIBRARY;
}
 
Example #26
Source File: ReadGroupCovariateUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testMaxValue() {
    final String id = "MY.ID";
    final String expected = "SAMPLE.1";
    final ReadGroupCovariate covariate = new ReadGroupCovariate(new RecalibrationArgumentCollection(), Arrays.asList(expected));
    SAMReadGroupRecord rg = new SAMReadGroupRecord(id);
    rg.setPlatformUnit(expected);
    Assert.assertEquals(covariate.maximumKeyValue(), 0);//there's just 1 read group, so 0 is the max value
}
 
Example #27
Source File: MeanQualityByCycle.java    From picard with MIT License 5 votes vote down vote up
@Override
protected void setup(final SAMFileHeader header, final File samFile) {
    IOUtil.assertFileIsWritable(CHART_OUTPUT);
    // If we're working with a single library, assign that library's name
    // as a suffix to the plot title
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() == 1) {
        plotSubtitle = StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary());
    }
}
 
Example #28
Source File: DepthOfCoverage.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private HashSet<String> getSamplesByPartitionFromReadHeader(DoCOutputType.Partition type) {
    final HashSet<String> partition = new HashSet<String>();
    final SAMFileHeader header = getHeaderForReads();
    if (type == DoCOutputType.Partition.sample) {
        partition.addAll(ReadUtils.getSamplesFromHeader(header));
    } else {
        for (SAMReadGroupRecord rg : header.getReadGroups()) {
            partition.add(CoverageUtils.getTypeID(rg, type));
        }
    }
    return partition;
}
 
Example #29
Source File: RevertSamSparkUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testAssertAllReadGroupsMappedSuccess() {
    final SAMReadGroupRecord rg1 = new SAMReadGroupRecord("rg1");
    final SAMReadGroupRecord rg2 = new SAMReadGroupRecord("rg2");

    final Map<String, Path> outputMap = new HashMap<>();
    outputMap.put("rg1", IOUtils.getPath(new File("/foo/bar/rg1.bam").getAbsolutePath()));
    outputMap.put("rg2", IOUtils.getPath(new File("/foo/bar/rg2.bam").getAbsolutePath()));
    RevertSamSpark.assertAllReadGroupsMapped(outputMap, Arrays.asList(rg1, rg2));
    RevertSamSpark.assertAllReadGroupsMapped(outputMap, Arrays.asList(rg1));
    RevertSamSpark.assertAllReadGroupsMapped(outputMap, Arrays.asList(rg2));
}
 
Example #30
Source File: LocusIteratorByStateUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads")
public void testLIBS_NotHoldingTooManyReads(final int nReadsPerLocus, final int downsampleTo, final int payloadInBytes) {
    logger.warn(String.format("testLIBS_NotHoldingTooManyReads %d %d %d", nReadsPerLocus, downsampleTo, payloadInBytes));
    final int readLength = 10;

    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 100000);
    final int nSamples = 1;
    final List<String> samples = new ArrayList<>(nSamples);
    for ( int i = 0; i < nSamples; i++ ) {
        final SAMReadGroupRecord rg = new SAMReadGroupRecord("rg" + i);
        final String sample = "sample" + i;
        samples.add(sample);
        rg.setSample(sample);
        rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform());
        header.addReadGroup(rg);
    }

    final boolean downsample = downsampleTo != -1;
    final DownsamplingMethod downsampler = downsample
            ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null)
            : new DownsamplingMethod(DownsampleType.NONE, null, null);

    final WeakReadTrackingIterator iterator = new WeakReadTrackingIterator(nReadsPerLocus, readLength, payloadInBytes, header);

    final LocusIteratorByState li;
    li = new LocusIteratorByState(
            iterator,
            downsampler,
            false,
            samples,
            header,
            true
    );

    while ( li.hasNext() ) {
        final AlignmentContext next = li.next();
        Assert.assertTrue(next.getBasePileup().size() <= downsampleTo, "Too many elements in pileup " + next);
        // TODO -- assert that there are <= X reads in memory after GC for some X
    }
}