Java Code Examples for htsjdk.samtools.SAMFileHeader

The following examples show how to use htsjdk.samtools.SAMFileHeader. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
 * Create a new ReferenceConfidenceModel
 * @param samples the list of all samples we'll be considering with this model
 * @param header the SAMFileHeader describing the read information (used for debugging)
 * @param indelInformativeDepthIndelSize the max size of indels to consider when calculating indel informative depths
public ReferenceConfidenceModel(final SampleList samples,
                                final SAMFileHeader header,
                                final int indelInformativeDepthIndelSize,
                                final int numRefForPrior) {
    Utils.nonNull(samples, "samples cannot be null");
    Utils.validateArg( samples.numberOfSamples() > 0, "samples cannot be empty");
    Utils.nonNull(header, "header cannot be empty");
    //TODO: code and comment disagree -- which is right?
    Utils.validateArg( indelInformativeDepthIndelSize >= 0, () -> "indelInformativeDepthIndelSize must be >= 1 but got " + indelInformativeDepthIndelSize);

    this.samples = samples;
    this.indelInformativeDepthIndelSize = indelInformativeDepthIndelSize;
    this.numRefSamplesForPrior = numRefForPrior;
    this.options = new PosteriorProbabilitiesUtils.PosteriorProbabilitiesOptions(HomoSapiensConstants.SNP_HETEROZYGOSITY,
            HomoSapiensConstants.INDEL_HETEROZYGOSITY, useInputSamplesAlleleCounts, useMLEAC, ignoreInputSamplesForMissingVariants,
Example 2
Source Project: Drop-seq   Source File:    License: MIT License 6 votes vote down vote up
private SAMFileHeader editSequenceDictionary(final SAMFileHeader fileHeader) {
     // Make mutable copy of sequence list
        final ArrayList<SAMSequenceRecord> sequences = new ArrayList<>(fileHeader.getSequenceDictionary().getSequences());
        final ListIterator<SAMSequenceRecord> it = sequences.listIterator();
        while (it.hasNext()) {
            final SAMSequenceRecord sequence =;
            if (DROP_REJECTED_REF && filterReference(sequence.getSequenceName()))
else {
                final String editedSequenceName = stripReferencePrefix(sequence.getSequenceName());
                if (editedSequenceName != null)
		it.set(cloneWithNewName(sequence, editedSequenceName));
 return fileHeader;
Example 3
 * Create a new file destination.
 * @param outPath path where output is written (doesn't have to be local)
 * @param createBamOutIndex true to create an index file for the bamout
 * @param createBamOutMD5 true to create an md5 file for the bamout
 * @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination, must not be null
 * @param haplotypeReadGroupID  read group ID used when writing haplotypes as reads
public SAMFileDestination(
        final Path outPath,
        final boolean createBamOutIndex,
        final boolean createBamOutMD5,
        final SAMFileHeader sourceHeader,
        final String haplotypeReadGroupID)
    super(sourceHeader, haplotypeReadGroupID);
    samWriter = new SAMFileGATKReadWriter(ReadUtils.createCommonSAMWriter(
            getBAMOutputHeader(), // use the header derived from the source header by HaplotypeBAMDestination
Example 4
Source Project: rtg-tools   Source File:    License: BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void testRunCommentUpdate() {
  final SAMFileHeader sf = new SAMFileHeader();
  sf.addComment("READ-SDF-ID:" + Long.toHexString(123456));
  final UUID uuid = UUID.randomUUID();
  sf.addComment(SamUtils.RUN_ID_ATTRIBUTE + uuid);


  for (final String s : sf.getComments()) {
    if (s.startsWith(SamUtils.RUN_ID_ATTRIBUTE)) {
Example 5
Source Project: picard   Source File:    License: MIT License 6 votes vote down vote up
/** Creates a simple header with the values provided on the command line. */
public SAMFileHeader createSamFileHeader() {
    final SAMReadGroupRecord rgroup = new SAMReadGroupRecord(this.READ_GROUP_NAME);
    if (this.LIBRARY_NAME != null) rgroup.setLibrary(this.LIBRARY_NAME);
    if (this.PLATFORM != null) rgroup.setPlatform(this.PLATFORM);
    if (this.PLATFORM_UNIT != null) rgroup.setPlatformUnit(this.PLATFORM_UNIT);
    if (this.SEQUENCING_CENTER != null) rgroup.setSequencingCenter(SEQUENCING_CENTER);
    if (this.PREDICTED_INSERT_SIZE != null) rgroup.setPredictedMedianInsertSize(PREDICTED_INSERT_SIZE);
    if (this.DESCRIPTION != null) rgroup.setDescription(this.DESCRIPTION);
    if (this.RUN_DATE != null) rgroup.setRunDate(this.RUN_DATE);
    if (this.PLATFORM_MODEL != null) rgroup.setPlatformModel(this.PLATFORM_MODEL);
    if (this.PROGRAM_GROUP != null) rgroup.setProgramGroup(this.PROGRAM_GROUP);

    final SAMFileHeader header = new SAMFileHeader();

    for (final String comment : COMMENT) {

    return header ;
Example 6
@Test(expectedExceptions={UserException.CouldNotReadInputFile.class, UserException.MalformedGenomeLoc.class, UserException.MalformedFile.class}, dataProvider="invalidIntervalTestData")
public void testIntervalFileToListInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                   String contig, int intervalStart, int intervalEnd ) throws Exception {

    final SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    final IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    final File picardIntervalFile = createTempFile("testIntervalFileToListInvalidPicardIntervalHandling", ".interval_list");

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.parseIntervalArguments(genomeLocParser, picardIntervalFile.getAbsolutePath());
Example 7
Source Project: gatk   Source File:    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
 * Calculates the distance between two genomeLocs across contigs (if necessary).
 * Returns minDistance(other) if in same contig.
 * Works with intervals!
 * Uses the SAMFileHeader to extract the size of the contigs and follows the order in the dictionary.
 * @param other         the genome loc to compare to
 * @param samFileHeader the contig information
 * @return the sum of all the bases in between the genomeLocs, including entire contigs
public long distanceAcrossContigs(final GenomeLoc other, final SAMFileHeader samFileHeader) {
    if (onSameContig(other)) {
        return minDistance(other);

    // add the distance from the first genomeLoc to the end of it's contig and the distance from the
    // second genomeLoc to the beginning of it's contig.
    long distance = 0;
    if (contigIndex < other.contigIndex) {
        distance += samFileHeader.getSequence(contigIndex).getSequenceLength() - stop;
        distance += other.start;
    } else {
        distance += samFileHeader.getSequence(other.contigIndex).getSequenceLength() - other.stop;
        distance += start;

    // add any contig (in its entirety) in between the two genomeLocs
    for (int i=Math.min(this.contigIndex, other.contigIndex) + 1; i < Math.max(this.contigIndex, other.contigIndex); i++) {
        distance += samFileHeader.getSequence(i).getSequenceLength();
    return distance;
Example 8
Source Project: picard   Source File:    License: MIT License 6 votes vote down vote up
 * Simple implementation of a gather operations that uses SAMFileReaders and Writers in order to concatenate
 * multiple BAM files.
private static void gatherNormally(final List<File> inputs, final File output, final boolean createIndex, final boolean createMd5,
                                   final File referenceFasta) {
    final SAMFileHeader header;
        header = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).getFileHeader(inputs.get(0));

    final SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(createIndex).setCreateMd5File(createMd5).makeSAMOrBAMWriter(header, true, output);

    for (final File f : inputs) {"Gathering " + f.getAbsolutePath());
        final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f);
        for (final SAMRecord rec : in) out.addAlignment(rec);

Example 9
Source Project: abra2   Source File:    License: MIT License 6 votes vote down vote up
public static int findFirstOverlappingRegion(SAMFileHeader samHeader, SAMRecordWrapper read, int readStart, int readEnd, List<Feature> regions, int start) {
	if (start < 0) {
		start = 0;

	for (int idx=start; idx<regions.size(); idx++) {
		Feature region = regions.get(idx);
		if ( (read.getSamRecord().getReferenceIndex() < samHeader.getSequenceDictionary().getSequenceIndex(region.getSeqname())) ||
			 (read.getSamRecord().getReferenceName().equals(region.getSeqname()) && readStart < region.getStart()) ) {
			// This read is in between regions
			// TODO: adjust start region here
			return -1;
		} else if (region.overlaps(read.getSamRecord().getReferenceName(), readStart, readEnd)) {
			return idx;
	// This read is beyond all regions
	return -1;
Example 10
Source Project: abra2   Source File:    License: MIT License 6 votes vote down vote up
public static List<Integer> findAllOverlappingRegions(SAMFileHeader samHeader, SAMRecordWrapper read, List<Feature> regions, int start) {
	List<Integer> overlappingRegions = new ArrayList<Integer>();
	for (Span span : read.getSpanningRegions()) {
		int idx = findFirstOverlappingRegion(samHeader, read, span.start, span.end, regions, start);
		if (idx > -1) {
			boolean isOverlap = true;
			idx += 1;
			while (isOverlap && idx < regions.size()) {
				Feature region = regions.get(idx);
				if (region.overlaps(read.getSamRecord().getReferenceName(), span.start, span.end)) {
				} else {
					isOverlap = false;
				idx += 1;
	return overlappingRegions;
Example 11
Source Project: halvade   Source File:    License: GNU General Public License v3.0 6 votes vote down vote up
public int callElPrep(String input, String output, String rg, int threads, 
        SAMRecordIterator SAMit,
        SAMFileHeader header, String dictFile, boolean updateRG, boolean keepDups, String RGID) throws InterruptedException, QualityException {
    SAMRecord sam;
    SAMFileWriterFactory factory = new SAMFileWriterFactory();
    SAMFileWriter Swriter = factory.makeSAMWriter(header, true, new File(input));
    int reads = 0;
    while(SAMit.hasNext()) {
        sam =;
            sam.setAttribute(, RGID);
    String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "elprep", "");  
    String[] command = CommandGenerator.elPrep(bin, input, output, threads, true, rg, null, !keepDups, customArgs);
    long estimatedTime = runProcessAndWait("elPrep", command);
    if(context != null)
    return reads;
Example 12
public void testAddTag() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    GATKRead read = ArtificialReadUtils.createArtificialRead("40M");
    SATagBuilder builder = new SATagBuilder(read);

    GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 1, 10000, new byte[] {}, new byte[] {}, "4M");
    GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 2, 10001, new byte[] {}, new byte[] {}, "4S");
    SATagBuilder read2builder = new SATagBuilder(read2);
    GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 3, 10003, new byte[] {}, new byte[] {}, "4D");

Example 13
Source Project: cramtools   Source File:    License: Apache License 2.0 6 votes vote down vote up
private static SAMFileHeader mergeHeaders(List<RecordSource> sources) {
	SAMFileHeader header = new SAMFileHeader();
	for (RecordSource source : sources) {
		SAMFileHeader h = source.reader.getFileHeader();

		for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) {
			if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null)

		for (SAMProgramRecord pro : h.getProgramRecords()) {
			if (header.getProgramRecord(pro.getProgramGroupId()) == null)

		for (String comment : h.getComments())

		for (SAMReadGroupRecord rg : h.getReadGroups()) {
			if (header.getReadGroup(rg.getReadGroupId()) == null)

	return header;
Example 14
Source Project: gatk   Source File:    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
 * Writes RDD of reads to path. Note writeReads() is not used because there are separate paired/unpaired outputs.
 * Header sequence dictionary is reduced to only those that were aligned to.
private void writeBam(final JavaRDD<GATKRead> reads, final String inputBamPath, final boolean isPaired,
                      final JavaSparkContext ctx, SAMFileHeader header) {

    //Only retain header sequences that were aligned to.
    //This invokes an action and therefore the reads must be cached.
    header = PSBwaUtils.removeUnmappedHeaderSequences(header, reads, logger);

    final String outputPath = isPaired ? outputPaired : outputUnpaired;
    try {
        ReadsSparkSink.writeReads(ctx, outputPath, null, reads, header,
                shardedOutput ? ReadsWriteFormat.SHARDED : ReadsWriteFormat.SINGLE,
                PSUtils.pathseqGetRecommendedNumReducers(inputBamPath, numReducers, getTargetPartitionSize()), shardedPartsDir, true, splittingIndexGranularity);
    } catch (final IOException e) {
        throw new UserException.CouldNotCreateOutputFile(outputPath, "Writing failed", e);
Example 15
static SVIntervalTree<SVInterval> findGenomewideHighCoverageIntervalsToIgnore(final FindBreakpointEvidenceSparkArgumentCollection params,
                                                                              final ReadMetadata readMetadata,
                                                                              final JavaSparkContext ctx,
                                                                              final SAMFileHeader header,
                                                                              final JavaRDD<GATKRead> unfilteredReads,
                                                                              final SVReadFilter filter,
                                                                              final Logger logger,
                                                                              final Broadcast<ReadMetadata> broadcastMetadata) {
    final int capacity = header.getSequenceDictionary().getSequences().stream()
            .mapToInt(seqRec -> (seqRec.getSequenceLength() + DEPTH_WINDOW_SIZE - 1)/DEPTH_WINDOW_SIZE).sum();
    final List<SVInterval> depthIntervals = new ArrayList<>(capacity);
    for (final SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) {
        final int contigID = readMetadata.getContigID(sequenceRecord.getSequenceName());
        final int contigLength = sequenceRecord.getSequenceLength();
        for (int i = 1; i < contigLength; i = i + DEPTH_WINDOW_SIZE) {
            depthIntervals.add(new SVInterval(contigID, i, Math.min(contigLength, i + DEPTH_WINDOW_SIZE)));

    final List<SVInterval> highCoverageSubintervals = findHighCoverageSubintervalsAndLog(
            params, ctx, broadcastMetadata, depthIntervals, unfilteredReads, filter, logger);
    final SVIntervalTree<SVInterval> highCoverageSubintervalTree = new SVIntervalTree<>();
    highCoverageSubintervals.forEach(i -> highCoverageSubintervalTree.put(i, i));

    return highCoverageSubintervalTree;
Example 16
 * Loads ADAM reads stored as Parquet.
 * @param inputPathSpecifier path to the Parquet data
 * @return RDD of (ADAM-backed) GATKReads from the file.
public JavaRDD<GATKRead> getADAMReads(final GATKPath inputPathSpecifier, final TraversalParameters traversalParameters, final SAMFileHeader header) throws IOException {
    Job job = Job.getInstance(ctx.hadoopConfiguration());
    AvroParquetInputFormat.setAvroReadSchema(job, AlignmentRecord.getClassSchema());
    Broadcast<SAMFileHeader> bHeader;
    if (header == null) {
        bHeader= ctx.broadcast(null);
    } else {
        bHeader = ctx.broadcast(header);
    JavaRDD<AlignmentRecord> recordsRdd = ctx.newAPIHadoopFile(
            inputPathSpecifier.getRawInputString(), AvroParquetInputFormat.class, Void.class, AlignmentRecord.class, job.getConfiguration())
    JavaRDD<GATKRead> readsRdd = -> new BDGAlignmentRecordToGATKReadAdapter(record, bHeader.getValue()));
    JavaRDD<GATKRead> filteredRdd = readsRdd.filter(record -> samRecordOverlaps(record.convertToSAMRecord(header), traversalParameters));

    return fixPartitionsIfQueryGrouped(ctx, header, filteredRdd);
Example 17
Source Project: chipster   Source File:    License: MIT License 6 votes vote down vote up
public static void sortSamBam(File samBamFile, File sortedBamFile) {
	SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samBamFile));
	SAMFileWriter writer = null;
	try {
		writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), false, sortedBamFile);
		Iterator<SAMRecord> iterator = reader.iterator();
		while (iterator.hasNext()) {
	} finally {
Example 18
public void testSortedIterator() throws Exception {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "10M");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "10M");
    read1.setPosition(new SimpleInterval("22", 200, 210));
    read2.setPosition(new SimpleInterval("22", 208, 218));
    final Locatable loc = new SimpleInterval("22", 208, 208);
    final Map<String, ReadPileup> stratified = new LinkedHashMap<>();
    stratified.put("sample1", new ReadPileup(loc, Arrays.asList(read2), 0));
    stratified.put("sample2", new ReadPileup(loc, Arrays.asList(read1), 9));
    final ReadPileup combined = new ReadPileup(loc, stratified);
    final Iterator<PileupElement> sortedIterator = combined.sortedIterator();
    Assert.assertSame(, read1);
    Assert.assertSame(, read2);
Example 19
 * Initialize the collector with input arguments;
public void initialize(
        final InsertSizeMetricsArgumentCollection inputArgs,
        final SAMFileHeader samHeader,
        final List<Header> defaultHeaders)
    this.inputArgs = inputArgs;
    collector.initialize(inputArgs, samHeader);
    metricsFile = new MetricsFile<>();
    if (defaultHeaders != null) { -> metricsFile.addHeader(h));
Example 20
 * Create an activity profile that implements a band pass filter on the states
 *  @param maxProbPropagationDistance region probability propagation distance beyond it's maximum size
 * @param activeProbThreshold  threshold for the probability of a profile state being active
 * @param maxFilterSize the maximum size of the band pass filter we are allowed to create, regardless of sigma
 * @param sigma the variance of the Gaussian kernel for this band pass filter
 * @param adaptiveFilterSize if true, use the kernel itself to determine the best filter size
public BandPassActivityProfile(final int maxProbPropagationDistance,
                               final double activeProbThreshold, final int maxFilterSize, final double sigma, final boolean adaptiveFilterSize,
                               final SAMFileHeader samHeader) {
    super(maxProbPropagationDistance, activeProbThreshold, samHeader);

    Utils.validateArg(sigma >= 0, "Sigma must be greater than or equal to 0");

    // setup the Gaussian kernel for the band pass filter
    this.sigma = sigma;
    final double[] fullKernel = makeKernel(maxFilterSize, sigma);
    this.filterSize = adaptiveFilterSize ? determineFilterSize(fullKernel, MIN_PROB_TO_KEEP_IN_FILTER) : maxFilterSize;
    this.gaussianKernel = makeKernel(this.filterSize, sigma);
Example 21
Source Project: rtg-tools   Source File:    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
private static void extractSamBam(File f, ReferenceRanges<String> regions, OutputStream out, boolean printHeader, boolean headerOnly) throws IOException {
  final SAMFileHeader header = SamUtils.getSingleHeader(f);
  try (final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMWriter(header, true, out, printHeader)) {
    if (!headerOnly) {
      try (RecordIterator<SAMRecord> samfr = new SkipInvalidRecordsIterator(f.getPath(), new SamClosedFileReader(f, regions, null, header))) {
        while (samfr.hasNext()) {
Example 22
Source Project: gatk   Source File:    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static void validateCNVcallDataSource(final SAMFileHeader headerForReads,
                                              final String sampleId,
                                              final FeatureDataSource<VariantContext> dataSource) {
    final VCFHeader cnvCallHeader = (VCFHeader) dataSource.getHeader();
    final ArrayList<String> sampleNamesInOrder = cnvCallHeader.getSampleNamesInOrder();
    Utils.validate(sampleNamesInOrder.size() == 1, "CNV call VCF should be single sample");
    Utils.validate(sampleNamesInOrder.contains(sampleId), ("CNV call VCF does not contain calls for sample " + sampleId));
    Utils.validate(cnvCallHeader.getSequenceDictionary() != null,
            "CNV calls file does not have a valid sequence dictionary");
            "CNV calls file does not have the same sequence dictionary as the read evidence");
Example 23
 * Initialize the collector with input arguments.
public void initialize(
        final ExampleMultiMetricsArgumentCollection inputArgs,
        final SAMFileHeader samHeader,
        final List<Header> defaultHeaders)
    this.inputArgs = inputArgs;
    collector.initialize(inputArgs, samHeader);
    metricsFile = new MetricsFile<>();
    if (defaultHeaders != null) { -> metricsFile.addHeader(h));
Example 24
protected void collectMetrics(
        final JavaRDD<GATKRead> filteredReads,
        final SAMFileHeader samHeader)
    insertSizeCollector.collectMetrics(filteredReads, samHeader);
Example 25
@Test(groups = "sv")
public void testGetLeadingMismatches() {
    final SAMFileHeader samHeader = ArtificialReadUtils.createArtificialSamHeader();
    final ReadMetadata metadata = new ReadMetadata(Collections.emptySet(), samHeader, stats, null, 2L, 2L, 1);
    final GATKRead read = ArtificialReadUtils.createArtificialRead(samHeader, "saRead", 0, 140828201,
            ArtificialReadUtils.createRandomReadBases(151, false), ArtificialReadUtils.createRandomReadQuals(151), "151M");
    read.setAttribute( "MD", "AC152T");
    Assert.assertEquals(2, BreakpointEvidence.ReadEvidence.getLeadingMismatches(read, true));
    Assert.assertEquals(1, BreakpointEvidence.ReadEvidence.getLeadingMismatches(read, false));
    read.setAttribute( "MD", "0AC152T");
    Assert.assertEquals(2, BreakpointEvidence.ReadEvidence.getLeadingMismatches(read, true));
Example 26
public void init() throws IOException {
    // sequence
    try(final ReferenceSequenceFile seq = new CachingIndexedFastaSequenceFile(IOUtils.getPath(b37_reference_20_21))) {
        genomeLocParser = new GenomeLocParser(seq);
        startLoc = genomeLocParser.createGenomeLoc("20", 0, 1, 100);
        header = new SAMFileHeader();
        seq.getSequenceDictionary().getSequences().forEach(sequence -> header.addSequence(sequence));
Example 27
Source Project: gatk   Source File:    License: BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
protected void runTool(final JavaSparkContext ctx) {
    final JavaRDD<GATKRead> reads = getReads();
    final int numReducers = getRecommendedNumReducers();"Using {} reducers", numReducers);

    final SAMFileHeader header = getHeaderForReads();

    writeReads(ctx, outputFile, reads, header, true);
Example 28
@Test(dataProvider= "oneCase")
public void testOneCaseUsingPublicAPI(final int addA, final int addC, final int addG, final int addT, final double contaminationFraction,
                                      final int pileupSize, final int[] initialCounts, final int[] targetCounts) {

    final int[] actualCounts = initialCounts.clone();
    actualCounts[0] += addA;
    actualCounts[1] += addC;
    actualCounts[2] += addG;
    actualCounts[3] += addT;

    final long actualCount = MathUtils.sum(actualCounts);
    final long expectedCount = MathUtils.sum(targetCounts);

    final SAMFileHeader header= ArtificialReadUtils.createArtificialSamHeader();

    final char[] bases= {'A', 'C', 'G', 'T'};   //note: hardwired to use same order as actualCounts
    final byte[] quals = {30};
    final Map<Allele, List<GATKRead>> readMap= new LinkedHashMap<>(bases.length);
    for (int idx = 0; idx < bases.length; idx++) {
        final Allele nonRefAllele = Allele.create(String.valueOf(bases[idx]));
        readMap.put(nonRefAllele, new ArrayList<>());
        for (int i = 0; i < actualCounts[idx]; i++) {
            final byte[] readBases = {(byte) bases[idx]};
            final GATKRead newRead = ArtificialReadUtils.createArtificialRead(header, "read" + i, 0, 1, readBases, quals, "1M");

    final List<GATKRead> results = AlleleBiasedDownsamplingUtils.selectAlleleBiasedEvidence(readMap, contaminationFraction);
    //TODO should add assertions to test that the reads are downsampled properly by allele.
    final long toRemove = actualCount - expectedCount;
    Assert.assertEquals(results.size(), toRemove, 1);
Example 29
 * Method which generates a map of the readgroups from the header so they can be serialized as indexes
private static Map<String, Short> getHeaderReadGroupIndexMap(final SAMFileHeader header) {
    final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
    if (readGroups.size() > 65535) {
        throw new GATKException("Detected too many read groups in the header, currently MarkDuplicatesSpark only supports up to 65535 unique readgroup IDs but " + readGroups.size() + " were found");
    if (readGroups.size()==0) {
        throw new UserException.BadInput("Sam file header missing Read Group fields. MarkDuplicatesSpark currently requires reads to be labeled with read group tags, please add read groups tags to your reads");
    final Iterator<Short> iterator = IntStream.range(0, readGroups.size()).boxed().map(Integer::shortValue).iterator();
    return Maps.uniqueIndex(iterator, idx -> readGroups.get(idx).getId() );
Example 30
Source Project: cramtools   Source File:    License: Apache License 2.0 5 votes vote down vote up
protected void kickedFromCache(FastqRead read) {
	if (writer == null) {"Creating overflow BAM file.");
		headerForOverflowWriter = header.clone();

		writer = new SAMFileWriterFactory().makeBAMWriter(headerForOverflowWriter, false, cacheOverFlowStream);
	SAMRecord r = read.toSAMRecord(writer.getFileHeader());