htsjdk.samtools.SAMFileHeader Java Examples

The following examples show how to use htsjdk.samtools.SAMFileHeader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions={UserException.CouldNotReadInputFile.class, UserException.MalformedGenomeLoc.class, UserException.MalformedFile.class}, dataProvider="invalidIntervalTestData")
public void testIntervalFileToListInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                   String contig, int intervalStart, int intervalEnd ) throws Exception {

    final SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    final IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    final File picardIntervalFile = createTempFile("testIntervalFileToListInvalidPicardIntervalHandling", ".interval_list");
    picardIntervals.write(picardIntervalFile);

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.parseIntervalArguments(genomeLocParser, picardIntervalFile.getAbsolutePath());
}
 
Example #2
Source File: SAMFileDestination.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a new file destination.
 *
 * @param outPath path where output is written (doesn't have to be local)
 * @param createBamOutIndex true to create an index file for the bamout
 * @param createBamOutMD5 true to create an md5 file for the bamout
 * @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination, must not be null
 * @param haplotypeReadGroupID  read group ID used when writing haplotypes as reads
 */
public SAMFileDestination(
        final Path outPath,
        final boolean createBamOutIndex,
        final boolean createBamOutMD5,
        final SAMFileHeader sourceHeader,
        final String haplotypeReadGroupID)
{
    super(sourceHeader, haplotypeReadGroupID);
    samWriter = new SAMFileGATKReadWriter(ReadUtils.createCommonSAMWriter(
            outPath,
            null,
            getBAMOutputHeader(), // use the header derived from the source header by HaplotypeBAMDestination
            false,
            createBamOutIndex,
            createBamOutMD5
    ));
}
 
Example #3
Source File: SATagBuilderUnitTests.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testAddTag() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    GATKRead read = ArtificialReadUtils.createArtificialRead("40M");
    read.setAttribute("SA","2,500,+,3S2=1X2=2S,60,1;");
    SATagBuilder builder = new SATagBuilder(read);

    GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 1, 10000, new byte[] {}, new byte[] {}, "4M");
    read1.setIsSupplementaryAlignment(true);
    GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 2, 10001, new byte[] {}, new byte[] {}, "4S");
    read2.setIsSupplementaryAlignment(true);
    SATagBuilder read2builder = new SATagBuilder(read2);
    GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 3, 10003, new byte[] {}, new byte[] {}, "4D");
    read3.setIsSupplementaryAlignment(false);

    builder.addTag(read1).addTag(read3).addTag(read2builder).setSATag();
    Assert.assertEquals(read.getAttributeAsString("SA"),"4,10003,+,4D,0,*;2,500,+,3S2=1X2=2S,60,1;2,10000,+,4M,0,*;3,10001,+,4S,0,*;");
}
 
Example #4
Source File: Merge.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static SAMFileHeader mergeHeaders(List<RecordSource> sources) {
	SAMFileHeader header = new SAMFileHeader();
	for (RecordSource source : sources) {
		SAMFileHeader h = source.reader.getFileHeader();

		for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) {
			if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null)
				header.addSequence(seq);
		}

		for (SAMProgramRecord pro : h.getProgramRecords()) {
			if (header.getProgramRecord(pro.getProgramGroupId()) == null)
				header.addProgramRecord(pro);
		}

		for (String comment : h.getComments())
			header.addComment(comment);

		for (SAMReadGroupRecord rg : h.getReadGroups()) {
			if (header.getReadGroup(rg.getReadGroupId()) == null)
				header.addReadGroup(rg);
		}
	}

	return header;
}
 
Example #5
Source File: PathSeqBwaSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Writes RDD of reads to path. Note writeReads() is not used because there are separate paired/unpaired outputs.
 * Header sequence dictionary is reduced to only those that were aligned to.
 */
private void writeBam(final JavaRDD<GATKRead> reads, final String inputBamPath, final boolean isPaired,
                      final JavaSparkContext ctx, SAMFileHeader header) {

    //Only retain header sequences that were aligned to.
    //This invokes an action and therefore the reads must be cached.
    reads.persist(StorageLevel.MEMORY_AND_DISK_SER());
    header = PSBwaUtils.removeUnmappedHeaderSequences(header, reads, logger);

    final String outputPath = isPaired ? outputPaired : outputUnpaired;
    try {
        ReadsSparkSink.writeReads(ctx, outputPath, null, reads, header,
                shardedOutput ? ReadsWriteFormat.SHARDED : ReadsWriteFormat.SINGLE,
                PSUtils.pathseqGetRecommendedNumReducers(inputBamPath, numReducers, getTargetPartitionSize()), shardedPartsDir, true, splittingIndexGranularity);
    } catch (final IOException e) {
        throw new UserException.CouldNotCreateOutputFile(outputPath, "Writing failed", e);
    }
}
 
Example #6
Source File: FindBreakpointEvidenceSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
static SVIntervalTree<SVInterval> findGenomewideHighCoverageIntervalsToIgnore(final FindBreakpointEvidenceSparkArgumentCollection params,
                                                                              final ReadMetadata readMetadata,
                                                                              final JavaSparkContext ctx,
                                                                              final SAMFileHeader header,
                                                                              final JavaRDD<GATKRead> unfilteredReads,
                                                                              final SVReadFilter filter,
                                                                              final Logger logger,
                                                                              final Broadcast<ReadMetadata> broadcastMetadata) {
    final int capacity = header.getSequenceDictionary().getSequences().stream()
            .mapToInt(seqRec -> (seqRec.getSequenceLength() + DEPTH_WINDOW_SIZE - 1)/DEPTH_WINDOW_SIZE).sum();
    final List<SVInterval> depthIntervals = new ArrayList<>(capacity);
    for (final SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) {
        final int contigID = readMetadata.getContigID(sequenceRecord.getSequenceName());
        final int contigLength = sequenceRecord.getSequenceLength();
        for (int i = 1; i < contigLength; i = i + DEPTH_WINDOW_SIZE) {
            depthIntervals.add(new SVInterval(contigID, i, Math.min(contigLength, i + DEPTH_WINDOW_SIZE)));
        }
    }

    final List<SVInterval> highCoverageSubintervals = findHighCoverageSubintervalsAndLog(
            params, ctx, broadcastMetadata, depthIntervals, unfilteredReads, filter, logger);
    final SVIntervalTree<SVInterval> highCoverageSubintervalTree = new SVIntervalTree<>();
    highCoverageSubintervals.forEach(i -> highCoverageSubintervalTree.put(i, i));

    return highCoverageSubintervalTree;
}
 
Example #7
Source File: FilterBam.java    From Drop-seq with MIT License 6 votes vote down vote up
private SAMFileHeader editSequenceDictionary(final SAMFileHeader fileHeader) {
 if (DROP_REJECTED_REF || !STRIP_REF_PREFIX.isEmpty()) {
     // Make mutable copy of sequence list
        final ArrayList<SAMSequenceRecord> sequences = new ArrayList<>(fileHeader.getSequenceDictionary().getSequences());
        final ListIterator<SAMSequenceRecord> it = sequences.listIterator();
        while (it.hasNext()) {
            final SAMSequenceRecord sequence = it.next();
            if (DROP_REJECTED_REF && filterReference(sequence.getSequenceName()))
	it.remove();
else {
                final String editedSequenceName = stripReferencePrefix(sequence.getSequenceName());
                if (editedSequenceName != null)
		it.set(cloneWithNewName(sequence, editedSequenceName));
            }
        }
        fileHeader.getSequenceDictionary().setSequences(sequences);
    }
 return fileHeader;
}
 
Example #8
Source File: SamBamUtils.java    From chipster with MIT License 6 votes vote down vote up
public static void sortSamBam(File samBamFile, File sortedBamFile) {
	
	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader reader = new SAMFileReader(IOUtil.openFileForReading(samBamFile));
	SAMFileWriter writer = null;
	try {
		
		reader.getFileHeader().setSortOrder(SAMFileHeader.SortOrder.coordinate);
		writer = new SAMFileWriterFactory().makeBAMWriter(reader.getFileHeader(), false, sortedBamFile);
		Iterator<SAMRecord> iterator = reader.iterator();
		while (iterator.hasNext()) {
			writer.addAlignment(iterator.next());
		}
		
	} finally {
		closeIfPossible(reader);
		closeIfPossible(writer);
	}
}
 
Example #9
Source File: PreprocessingTools.java    From halvade with GNU General Public License v3.0 6 votes vote down vote up
public int callElPrep(String input, String output, String rg, int threads, 
        SAMRecordIterator SAMit,
        SAMFileHeader header, String dictFile, boolean updateRG, boolean keepDups, String RGID) throws InterruptedException, QualityException {
    
    SAMRecord sam;
    SAMFileWriterFactory factory = new SAMFileWriterFactory();
    SAMFileWriter Swriter = factory.makeSAMWriter(header, true, new File(input));
    
    int reads = 0;
    while(SAMit.hasNext()) {
        sam = SAMit.next();
        if(updateRG)
            sam.setAttribute(SAMTag.RG.name(), RGID);
        Swriter.addAlignment(sam);
        reads++;
    }
    Swriter.close();
    
    String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "elprep", "");  
    String[] command = CommandGenerator.elPrep(bin, input, output, threads, true, rg, null, !keepDups, customArgs);
    long estimatedTime = runProcessAndWait("elPrep", command);
    if(context != null)
        context.getCounter(HalvadeCounters.TIME_ELPREP).increment(estimatedTime);
    
    return reads;
}
 
Example #10
Source File: Feature.java    From abra2 with MIT License 6 votes vote down vote up
public static int findFirstOverlappingRegion(SAMFileHeader samHeader, SAMRecordWrapper read, int readStart, int readEnd, List<Feature> regions, int start) {
	if (start < 0) {
		start = 0;
	}

	for (int idx=start; idx<regions.size(); idx++) {
		Feature region = regions.get(idx);
		if ( (read.getSamRecord().getReferenceIndex() < samHeader.getSequenceDictionary().getSequenceIndex(region.getSeqname())) ||
			 (read.getSamRecord().getReferenceName().equals(region.getSeqname()) && readStart < region.getStart()) ) {
			
			// This read is in between regions
			// TODO: adjust start region here
			return -1;
		} else if (region.overlaps(read.getSamRecord().getReferenceName(), readStart, readEnd)) {
			return idx;
		}
	}
	
	// This read is beyond all regions
	return -1;
}
 
Example #11
Source File: ReadsSparkSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Loads ADAM reads stored as Parquet.
 * @param inputPathSpecifier path to the Parquet data
 * @return RDD of (ADAM-backed) GATKReads from the file.
 */
public JavaRDD<GATKRead> getADAMReads(final GATKPath inputPathSpecifier, final TraversalParameters traversalParameters, final SAMFileHeader header) throws IOException {
    Job job = Job.getInstance(ctx.hadoopConfiguration());
    AvroParquetInputFormat.setAvroReadSchema(job, AlignmentRecord.getClassSchema());
    Broadcast<SAMFileHeader> bHeader;
    if (header == null) {
        bHeader= ctx.broadcast(null);
    } else {
        bHeader = ctx.broadcast(header);
    }
    @SuppressWarnings("unchecked")
    JavaRDD<AlignmentRecord> recordsRdd = ctx.newAPIHadoopFile(
            inputPathSpecifier.getRawInputString(), AvroParquetInputFormat.class, Void.class, AlignmentRecord.class, job.getConfiguration())
            .values();
    JavaRDD<GATKRead> readsRdd = recordsRdd.map(record -> new BDGAlignmentRecordToGATKReadAdapter(record, bHeader.getValue()));
    JavaRDD<GATKRead> filteredRdd = readsRdd.filter(record -> samRecordOverlaps(record.convertToSAMRecord(header), traversalParameters));

    return fixPartitionsIfQueryGrouped(ctx, header, filteredRdd);
}
 
Example #12
Source File: ReadPileupUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testSortedIterator() throws Exception {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "10M");
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "10M");
    read1.setPosition(new SimpleInterval("22", 200, 210));
    read2.setPosition(new SimpleInterval("22", 208, 218));
    read1.setMatePosition(read2);
    read2.setMatePosition(read1);
    final Locatable loc = new SimpleInterval("22", 208, 208);
    final Map<String, ReadPileup> stratified = new LinkedHashMap<>();
    stratified.put("sample1", new ReadPileup(loc, Arrays.asList(read2), 0));
    stratified.put("sample2", new ReadPileup(loc, Arrays.asList(read1), 9));
    final ReadPileup combined = new ReadPileup(loc, stratified);
    final Iterator<PileupElement> sortedIterator = combined.sortedIterator();
    Assert.assertSame(sortedIterator.next().getRead(), read1);
    Assert.assertSame(sortedIterator.next().getRead(), read2);
}
 
Example #13
Source File: GenomeLoc.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calculates the distance between two genomeLocs across contigs (if necessary).
 *
 * Returns minDistance(other) if in same contig.
 * Works with intervals!
 * Uses the SAMFileHeader to extract the size of the contigs and follows the order in the dictionary.
 *
 * @param other         the genome loc to compare to
 * @param samFileHeader the contig information
 * @return the sum of all the bases in between the genomeLocs, including entire contigs
 */
public long distanceAcrossContigs(final GenomeLoc other, final SAMFileHeader samFileHeader) {
    if (onSameContig(other)) {
        return minDistance(other);
    }

    // add the distance from the first genomeLoc to the end of it's contig and the distance from the
    // second genomeLoc to the beginning of it's contig.
    long distance = 0;
    if (contigIndex < other.contigIndex) {
        distance += samFileHeader.getSequence(contigIndex).getSequenceLength() - stop;
        distance += other.start;
    } else {
        distance += samFileHeader.getSequence(other.contigIndex).getSequenceLength() - other.stop;
        distance += start;
    }

    // add any contig (in its entirety) in between the two genomeLocs
    for (int i=Math.min(this.contigIndex, other.contigIndex) + 1; i < Math.max(this.contigIndex, other.contigIndex); i++) {
        distance += samFileHeader.getSequence(i).getSequenceLength();
    }
    return distance;
}
 
Example #14
Source File: GatherBamFiles.java    From picard with MIT License 6 votes vote down vote up
/**
 * Simple implementation of a gather operations that uses SAMFileReaders and Writers in order to concatenate
 * multiple BAM files.
 */
private static void gatherNormally(final List<File> inputs, final File output, final boolean createIndex, final boolean createMd5,
                                   final File referenceFasta) {
    final SAMFileHeader header;
    {
        header = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).getFileHeader(inputs.get(0));
    }

    final SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(createIndex).setCreateMd5File(createMd5).makeSAMOrBAMWriter(header, true, output);

    for (final File f : inputs) {
        log.info("Gathering " + f.getAbsolutePath());
        final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f);
        for (final SAMRecord rec : in) out.addAlignment(rec);
        CloserUtil.close(in);
    }

    out.close();
}
 
Example #15
Source File: SamUtilsTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void testRunCommentUpdate() {
  final SAMFileHeader sf = new SAMFileHeader();
  sf.addComment("READ-SDF-ID:" + Long.toHexString(123456));
  final UUID uuid = UUID.randomUUID();
  sf.addComment(SamUtils.RUN_ID_ATTRIBUTE + uuid);
  sf.addComment("TEMPLATE-SDF-ID:$$$$$$$");
  sf.addComment("BLAH:KLJERLWJ");

  SamUtils.updateRunId(sf);

  for (final String s : sf.getComments()) {
    if (s.startsWith(SamUtils.RUN_ID_ATTRIBUTE)) {
      assertFalse(s.contains(uuid.toString()));
    }
  }
}
 
Example #16
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
/** Creates a simple header with the values provided on the command line. */
public SAMFileHeader createSamFileHeader() {
    final SAMReadGroupRecord rgroup = new SAMReadGroupRecord(this.READ_GROUP_NAME);
    rgroup.setSample(this.SAMPLE_NAME);
    if (this.LIBRARY_NAME != null) rgroup.setLibrary(this.LIBRARY_NAME);
    if (this.PLATFORM != null) rgroup.setPlatform(this.PLATFORM);
    if (this.PLATFORM_UNIT != null) rgroup.setPlatformUnit(this.PLATFORM_UNIT);
    if (this.SEQUENCING_CENTER != null) rgroup.setSequencingCenter(SEQUENCING_CENTER);
    if (this.PREDICTED_INSERT_SIZE != null) rgroup.setPredictedMedianInsertSize(PREDICTED_INSERT_SIZE);
    if (this.DESCRIPTION != null) rgroup.setDescription(this.DESCRIPTION);
    if (this.RUN_DATE != null) rgroup.setRunDate(this.RUN_DATE);
    if (this.PLATFORM_MODEL != null) rgroup.setPlatformModel(this.PLATFORM_MODEL);
    if (this.PROGRAM_GROUP != null) rgroup.setProgramGroup(this.PROGRAM_GROUP);

    final SAMFileHeader header = new SAMFileHeader();
    header.addReadGroup(rgroup);

    for (final String comment : COMMENT) {
        header.addComment(comment);
    }

    header.setSortOrder(this.SORT_ORDER);
    return header ;
}
 
Example #17
Source File: Feature.java    From abra2 with MIT License 6 votes vote down vote up
public static List<Integer> findAllOverlappingRegions(SAMFileHeader samHeader, SAMRecordWrapper read, List<Feature> regions, int start) {
	List<Integer> overlappingRegions = new ArrayList<Integer>();
	
	for (Span span : read.getSpanningRegions()) {
	
		int idx = findFirstOverlappingRegion(samHeader, read, span.start, span.end, regions, start);
		if (idx > -1) {
			overlappingRegions.add(idx);
			boolean isOverlap = true;
			idx += 1;
			
			while (isOverlap && idx < regions.size()) {
				Feature region = regions.get(idx);
				if (region.overlaps(read.getSamRecord().getReferenceName(), span.start, span.end)) {
					overlappingRegions.add(idx);
				} else {
					isOverlap = false;
				}
				idx += 1;
			}
		}
	}
	
	return overlappingRegions;
}
 
Example #18
Source File: VarDictLauncher.java    From VarDictJava with MIT License 5 votes vote down vote up
/**
 * Read map of chromosome lengths
 * @param bam BAM file name
 * @return Map of chromosome lengths. Key - chromosome name, value - length
 * @throws IOException if BAM/SAM file can't be opened
 */
public static Map<String, Integer> readChr(String bam) throws IOException {
    try (SamReader reader = SamReaderFactory.makeDefault().open(new File(bam))) {
        SAMFileHeader header = reader.getFileHeader();
        Map<String, Integer> chrs = new HashMap<>();
        for (SAMSequenceRecord record : header.getSequenceDictionary().getSequences()) {
            record.getSequenceLength();
            String sn = record.getSequenceName();
            int ln = record.getSequenceLength();
            chrs.put(sn, ln);
        }
        return chrs;
    }
}
 
Example #19
Source File: GATKReadFilterPluginDescriptorTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private GATKRead simpleGoodRead( final SAMFileHeader header ) {
    final String cigarString = "101M";
    final Cigar cigar = TextCigarCodec.decode(cigarString);
    GATKRead read = createRead(header, cigar, 1, 0, 10);
    read.setMappingQuality(50);
    return read;
}
 
Example #20
Source File: ReadGroupCovariateUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static void runTest(final SAMReadGroupRecord rg, final String expected, final ReadGroupCovariate covariate) {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithReadGroup(rg);

    GATKRead read = ArtificialReadUtils.createRandomRead(header, 10);
    read.setReadGroup(rg.getReadGroupId());

    ReadCovariates readCovariates = new ReadCovariates(read.getLength(), 1, new CovariateKeyCache());
    covariate.recordValues(read, header, readCovariates, true);
    verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected, covariate);

}
 
Example #21
Source File: ExampleCollectSingleMetricsSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Execute the actual metrics collection.
 * @param filteredReads Input reads, already filtered using the filter returned by {@link #getDefaultReadFilters(SAMFileHeader)}
 * @param samHeader SAMFileHeader for the input
 */
@Override
protected void collectMetrics(
        final JavaRDD<GATKRead> filteredReads,
        final SAMFileHeader samHeader) {
    exampleSingleCollector.collectMetrics(filteredReads, samHeader);
}
 
Example #22
Source File: RevertSam.java    From picard with MIT License 5 votes vote down vote up
private SAMFileHeader createOutHeader(
        final SAMFileHeader inHeader,
        final SAMFileHeader.SortOrder sortOrder,
        final boolean removeAlignmentInformation) {

    final SAMFileHeader outHeader = new SAMFileHeader();
    outHeader.setSortOrder(sortOrder);
    if (!removeAlignmentInformation) {
        outHeader.setSequenceDictionary(inHeader.getSequenceDictionary());
        outHeader.setProgramRecords(inHeader.getProgramRecords());
    }
    return outHeader;
}
 
Example #23
Source File: CollectGcBiasMetricsTest.java    From picard with MIT License 5 votes vote down vote up
public void setupTest2(final int ID, final String readGroupId, final SAMReadGroupRecord readGroupRecord, final String sample,
                       final String library, final SAMFileHeader header, final SAMRecordSetBuilder setBuilder)
        throws IOException {

    final String separator = ":";
    final int contig1 = 0;
    final int contig2 = 1;
    final int contig3 = 2;
    readGroupRecord.setSample(sample);
    readGroupRecord.setPlatform(platform);
    readGroupRecord.setLibrary(library);
    readGroupRecord.setPlatformUnit(readGroupId);
    setBuilder.setReadGroup(readGroupRecord);
    setBuilder.setUseNmFlag(true);

    setBuilder.setHeader(header);

    final int max = 800;
    final int min = 1;
    final Random rg = new Random(5);

    //add records that align to all 3 chr in reference file
    for (int i = 0; i < NUM_READS; i++) {
        final int start = rg.nextInt(max) + min;
        final String newReadName = READ_NAME + separator + ID + separator + i;

        if (i<=NUM_READS/3) {
            setBuilder.addPair(newReadName, contig1, start + ID, start + ID + LENGTH);
        } else if (i< (NUM_READS - (NUM_READS/3))) {
            setBuilder.addPair(newReadName, contig2, start + ID, start + ID + LENGTH);
        } else {
            setBuilder.addPair(newReadName, contig3, start + ID, start + ID + LENGTH);
        }
    }
}
 
Example #24
Source File: HaplotypeCallerSpark.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @return an {@link OverlapDetector} loaded with {@link ShardBoundary}
 * based on the -L intervals
 */
private static OverlapDetector<ShardBoundary> getShardBoundaryOverlapDetector(final SAMFileHeader header, final List<SimpleInterval> intervals, final int readShardSize, final int readShardPadding) {
    final OverlapDetector<ShardBoundary> shardBoundaryOverlapDetector = new OverlapDetector<>(0, 0);
    intervals.stream()
            .flatMap(interval -> Shard.divideIntervalIntoShards(interval, readShardSize, readShardPadding, header.getSequenceDictionary()).stream())
            .forEach(boundary -> shardBoundaryOverlapDetector.addLhs(boundary, boundary.getPaddedInterval()));
    return shardBoundaryOverlapDetector;
}
 
Example #25
Source File: SAMRecordWriter.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
private void init(
		OutputStream output, SAMFileHeader header, boolean writeHeader)
	throws IOException
{
	this.header = header;
	writer = new SAMTextWriter(output);

	writer.setSortOrder(header.getSortOrder(), false);
	if (writeHeader)
		writer.setHeader(header);
}
 
Example #26
Source File: Fragment.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public Fragment(final GATKRead first, final SAMFileHeader header, int partitionIndex, MarkDuplicatesScoringStrategy scoringStrategy, Map<String, Byte> headerLibraryMap) {
    super(partitionIndex, first.getName());

    this.score = scoringStrategy.score(first);
    this.R1R = first.isReverseStrand();
    this.key = ReadsKey.getKeyForFragment(ReadUtils.getStrandedUnclippedStart(first),
            isRead1ReverseStrand(),
            (short)ReadUtils.getReferenceIndex(first, header),
            headerLibraryMap.get(MarkDuplicatesSparkUtils.getLibraryForRead(first, header, LibraryIdGenerator.UNKNOWN_LIBRARY)));
}
 
Example #27
Source File: SATagBuilderUnitTests.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public static void testSetReadsAsSupplementalNo() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    GATKRead primarySup = ArtificialReadUtils.createArtificialRead(header, "read1", 1, 10000, new byte[] {}, new byte[] {}, "4M");
    primarySup.setMappingQuality(100);
    primarySup.setAttribute("NM",20);
    primarySup.setIsReverseStrand(true);
    GATKRead secondarySup = ArtificialReadUtils.createArtificialRead(header, "read1", 2, 10001, new byte[] {}, new byte[] {}, "4S");
    secondarySup.setMappingQuality(200);
    List<GATKRead> sups = new ArrayList<>();
    sups.add(secondarySup);

    SATagBuilder.setReadsAsSupplemental(primarySup,sups);
    Assert.assertEquals(primarySup.getAttributeAsString("SA"), "3,10001,+,4S,200,*;");
    Assert.assertEquals(primarySup.isSupplementaryAlignment(), false);
    Assert.assertEquals(secondarySup.getAttributeAsString("SA"), "2,10000,-,4M,100,20;");
    Assert.assertEquals(secondarySup.isSupplementaryAlignment(), true);

    GATKRead tertiarySup = ArtificialReadUtils.createArtificialRead(header, "read1", 3, 10003, new byte[] {}, new byte[] {}, "4D");
    tertiarySup.setMappingQuality(200);
    primarySup.clearAttribute("SA");
    secondarySup.clearAttribute("SA");
    sups.add(tertiarySup);

    SATagBuilder.setReadsAsSupplemental(primarySup,sups);
    Assert.assertEquals(sups.size(), 2);
    Assert.assertEquals(sups.get(0).getAttributeAsString("SA"), "2,10000,-,4M,100,20;4,10003,+,4D,200,*;");
    Assert.assertTrue(sups.get(1).getAttributeAsString("SA").startsWith("2,10000,-,4M,100,20;"));
    Assert.assertTrue(primarySup.getAttributeAsString("SA").contains("4,10003,+,4D,200,*;"));
    Assert.assertTrue(primarySup.getAttributeAsString("SA").contains("3,10001,+,4S,200,*;"));

    GATKRead unmappedSup = ArtificialReadUtils.createArtificialUnmappedRead(header,new byte[] {}, new byte[] {});
    primarySup.clearAttribute("SA");
    sups.clear();
    sups.add(unmappedSup);

    SATagBuilder.setReadsAsSupplemental(primarySup,sups);
    Assert.assertEquals(primarySup.getAttributeAsString("SA"), "*,0,+,*,0,*;");
    Assert.assertEquals(sups.get(0).getAttributeAsString("SA"), "2,10000,-,4M,100,20;");
}
 
Example #28
Source File: GATKProtectedVariantContextUtilsUnitTest.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testGetPileup() {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    final Locatable loc = new SimpleInterval("chr1", 10, 10);
    final int readLength = 3;

    //this read doesn't overlap {@code loc}
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, readLength);
    read1.setBases(Utils.dupBytes((byte) 'A', readLength));
    read1.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
    read1.setCigar("3M");

    //this read overlaps {@code loc} with a Q30 'A'
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, 10, readLength);
    read2.setBases(Utils.dupBytes((byte) 'A', readLength));
    read2.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
    read2.setCigar("3M");

    //this read overlaps {@code loc} with a Q20 'C' (due to the deletion)
    final GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 0, 7, readLength);
    read3.setBases(Utils.dupBytes((byte) 'C', readLength));
    read3.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
    read3.setCigar("1M1D2M");

    //this read doesn't overlap {@code loc} due to the deletion
    final GATKRead read4 = ArtificialReadUtils.createArtificialRead(header, "read4", 0, 7, readLength);
    read4.setBases(Utils.dupBytes((byte) 'C', readLength));
    read4.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
    read4.setCigar("1M5D2M");

    final ReadPileup pileup = GATKProtectedVariantContextUtils.getPileup(loc, Arrays.asList(read1, read2, read3, read4));

    // the pileup should contain a Q30 'A' and a Q20 'C'
    final int[] counts = pileup.getBaseCounts();
    Assert.assertEquals(counts, new int[]{1, 1, 0, 0});

}
 
Example #29
Source File: MarkDuplicatesSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private boolean treatAsReadGroupOrdered(SAMFileHeader header, boolean treatUnsortedAsReadGrouped) {
    final SAMFileHeader.SortOrder sortOrder = header.getSortOrder();
    if( ReadUtils.isReadNameGroupedBam(header) ){
        return true;
    } else if ( treatUnsortedAsReadGrouped && (sortOrder.equals(SAMFileHeader.SortOrder.unknown) || sortOrder.equals(SAMFileHeader.SortOrder.unsorted))) {
        logger.warn("Input bam was marked as " + sortOrder.toString() + " but " + TREAT_UNSORTED_AS_ORDERED + " is specified so it's being treated as read name grouped");
        return true;
    }
    return false;
}
 
Example #30
Source File: AssemblyRegionIterator.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Constructs an AssemblyRegionIterator over a provided read shard
 *  @param readShard MultiIntervalShard containing the reads that will go into the assembly regions.
 *                  Must have a MAPPED filter set on it.
 * @param readHeader header for the reads
 * @param reference source of reference bases (may be null)
 * @param features source of arbitrary features (may be null)
 * @param evaluator evaluator used to determine whether a locus is active
 */
public AssemblyRegionIterator(final MultiIntervalShard<GATKRead> readShard,
                              final SAMFileHeader readHeader,
                              final ReferenceDataSource reference,
                              final FeatureManager features,
                              final AssemblyRegionEvaluator evaluator,
                              final AssemblyRegionArgumentCollection assemblyRegionArgs) {

    Utils.nonNull(readShard);
    Utils.nonNull(readHeader);
    Utils.nonNull(evaluator);
    assemblyRegionArgs.validate();


    this.readShard = readShard;
    this.readHeader = readHeader;
    this.reference = reference;
    this.features = features;
    this.evaluator = evaluator;
    this.assemblyRegionArgs = assemblyRegionArgs;

    this.readyRegion = null;
    this.previousRegionReads = null;
    this.pendingRegions = new ArrayDeque<>();
    this.readCachingIterator = new ReadCachingIterator(readShard.iterator());
    this.readCache = new ArrayDeque<>();
    this.activityProfile = new BandPassActivityProfile(assemblyRegionArgs.maxProbPropagationDistance, assemblyRegionArgs.activeProbThreshold, BandPassActivityProfile.MAX_FILTER_SIZE, BandPassActivityProfile.DEFAULT_SIGMA, readHeader);

    // We wrap our LocusIteratorByState inside an IntervalAlignmentContextIterator so that we get empty loci
    // for uncovered locations. This is critical for reproducing GATK 3.x behavior!
    this.libs = new LocusIteratorByState(readCachingIterator, DownsamplingMethod.NONE, false, ReadUtils.getSamplesFromHeader(readHeader), readHeader, true);
    final IntervalLocusIterator intervalLocusIterator = new IntervalLocusIterator(readShard.getIntervals().iterator());
    this.locusIterator = new IntervalAlignmentContextIterator(libs, intervalLocusIterator, readHeader.getSequenceDictionary());

    readyRegion = loadNextAssemblyRegion();
}