htsjdk.samtools.SAMFileHeader.SortOrder Java Examples
The following examples show how to use
htsjdk.samtools.SAMFileHeader.SortOrder.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CustomBAMIterators.java From Drop-seq with MIT License | 6 votes |
public static CloseableIterator<SAMRecord> getReadsInTagOrder (SamReader reader, String primaryTag) { // SamReader reader = SamReaderFactory.makeDefault().open(bamFile); SAMSequenceDictionary dict= reader.getFileHeader().getSequenceDictionary(); List<SAMProgramRecord> programs =reader.getFileHeader().getProgramRecords(); final SAMFileHeader writerHeader = new SAMFileHeader(); writerHeader.setSortOrder(SAMFileHeader.SortOrder.queryname); writerHeader.setSequenceDictionary(dict); for (SAMProgramRecord spr : programs) { writerHeader.addProgramRecord(spr); } final ProgressLogger progressLogger = new ProgressLogger(log, 1000000); log.info("Reading in records for TAG name sorting"); final CloseableIterator<SAMRecord> result = SamRecordSortingIteratorFactory.create(writerHeader, reader.iterator(), new StringTagComparator(primaryTag), progressLogger); log.info("Sorting finished."); return (result); }
Example #2
Source File: CustomBAMIterators.java From Drop-seq with MIT License | 6 votes |
/** * If the file is sorter in query name order, return an iterator over * the file. Otherwise, sort records in queryname order and return an iterator over those records. * @return An iterator over the records in the file, in queryname order. */ public static CloseableIterator<SAMRecord> getQuerynameSortedRecords(SamReader reader) { if (reader.getFileHeader().getSortOrder().equals(SortOrder.queryname)) { return reader.iterator(); } log.info("Input SAM/BAM not in queryname order, sorting..."); SAMSequenceDictionary dict= reader.getFileHeader().getSequenceDictionary(); List<SAMProgramRecord> programs =reader.getFileHeader().getProgramRecords(); final SAMFileHeader writerHeader = new SAMFileHeader(); writerHeader.setSortOrder(SAMFileHeader.SortOrder.queryname); writerHeader.setSequenceDictionary(dict); for (SAMProgramRecord spr : programs) { writerHeader.addProgramRecord(spr); } log.info("Reading in records for query name sorting"); final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting reads in query name order"); final CloseableIterator<SAMRecord> result = SamRecordSortingIteratorFactory.create(writerHeader, reader.iterator(), new SAMRecordQueryNameComparator(), progressLogger); log.info("Sorting finished."); return result; }
Example #3
Source File: CRAMContainerStreamWriter.java From cramtools with Apache License 2.0 | 6 votes |
/** * Decide if the current container should be completed and flushed. The * decision is based on a) number of records and b) if the reference * sequence id has changed. * * @param nextRecord * the record to be added into the current or next container * @return true if the current container should be flushed and the following * records should go into a new container; false otherwise. */ protected boolean shouldFlushContainer(final SAMRecord nextRecord) { if (refIdSet.isEmpty()) { return false; } if (samFileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { return samRecords.size() >= containerSize; } boolean newRef = !refIdSet.contains(nextRecord.getReferenceIndex()); int seenRefs = refIdSet.size(); if (newRef && nextRecord.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { // separate unsorted reads return true; } if (newRef && seenRefs == 1) { return samRecords.size() >= minSingeRefRecords; } return samRecords.size() >= containerSize; }
Example #4
Source File: AnalyzeSaturationMutagenesis.java From gatk with BSD 3-Clause "New" or "Revised" License | 6 votes |
@Override public void onTraversalStart() { super.onTraversalStart(); staticLogger = logger; final SAMFileHeader header = getHeaderForReads(); if ( pairedMode && header != null && header.getSortOrder() == SortOrder.coordinate ) { throw new UserException("In paired mode the BAM cannot be coordinate sorted. Mates must be adjacent."); } if ( codonTranslation.length() != CodonTracker.N_REGULAR_CODONS ) { throw new UserException("codon-translation string must contain exactly 64 characters"); } reference = new Reference(ReferenceDataSource.of(referenceArguments.getReferencePath())); codonTracker = new CodonTracker(orfCoords, reference.getRefSeq(), logger); if ( writeRejectedReads ) { rejectedReadsBAMWriter = createSAMWriter(new GATKPath(outputFilePrefix + ".rejected.bam"), false); } }
Example #5
Source File: RevertSam.java From picard with MIT License | 5 votes |
private SAMFileHeader createOutHeader( final SAMFileHeader inHeader, final SAMFileHeader.SortOrder sortOrder, final boolean removeAlignmentInformation) { final SAMFileHeader outHeader = new SAMFileHeader(); outHeader.setSortOrder(sortOrder); if (!removeAlignmentInformation) { outHeader.setSequenceDictionary(inHeader.getSequenceDictionary()); outHeader.setProgramRecords(inHeader.getProgramRecords()); } return outHeader; }
Example #6
Source File: AbstractAlignmentMerger.java From picard with MIT License | 5 votes |
/** constructor with a default setting for unmappingReadsStrategy. * * see full constructor for parameters * * */ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final File referenceFasta, final boolean clipAdapters, final boolean bisulfiteSequence, final boolean alignedReadsOnly, final SAMProgramRecord programRecord, final List<String> attributesToRetain, final List<String> attributesToRemove, final Integer read1BasesTrimmed, final Integer read2BasesTrimmed, final List<SamPairUtil.PairOrientation> expectedOrientations, final SortOrder sortOrder, final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy, final boolean addMateCigar, final boolean unmapContaminantReads) { this(unmappedBamFile, targetBamFile, referenceFasta, clipAdapters, bisulfiteSequence, alignedReadsOnly, programRecord, attributesToRetain, attributesToRemove, read1BasesTrimmed, read2BasesTrimmed, expectedOrientations, sortOrder, primaryAlignmentSelectionStrategy, addMateCigar, unmapContaminantReads, UnmappingReadStrategy.DO_NOT_CHANGE); }
Example #7
Source File: CompareDropSeqAlignments.java From Drop-seq with MIT License | 5 votes |
private Iterator<SAMRecord> getQueryNameSortedData (final List<File> bamFiles) { SamReaderFactory factory= SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.EAGERLY_DECODE); SamHeaderAndIterator headerIterator= SamFileMergeUtil.mergeInputs(bamFiles, false, factory); if (headerIterator.header.getSortOrder().equals(SortOrder.queryname)) return headerIterator.iterator; log.info("Input SAM/BAM not in queryname order, sorting..."); final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting reads in query name order"); final CloseableIterator<SAMRecord> result = SamRecordSortingIteratorFactory.create(headerIterator.header, headerIterator.iterator, READ_NAME_COMPARATOR, progressLogger); log.info("Sorting finished."); return result; }
Example #8
Source File: RevertSam.java From picard with MIT License | 5 votes |
private Map<String, SAMFileHeader> createHeaderMap( final SAMFileHeader inHeader, final SortOrder sortOrder, final boolean removeAlignmentInformation) { final Map<String, SAMFileHeader> headerMap = new HashMap<>(); for (final SAMReadGroupRecord readGroup : inHeader.getReadGroups()) { final SAMFileHeader header = createOutHeader(inHeader, sortOrder, removeAlignmentInformation); header.addReadGroup(readGroup); headerMap.put(readGroup.getId(), header); } return headerMap; }
Example #9
Source File: SamAlignmentMerger.java From picard with MIT License | 5 votes |
/** * Constructor with a default value for unmappingReadStrategy * */ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final File referenceFasta, final SAMProgramRecord programRecord, final boolean clipAdapters, final boolean bisulfiteSequence, final boolean alignedReadsOnly, final List<File> alignedSamFile, final int maxGaps, final List<String> attributesToRetain, final List<String> attributesToRemove, final Integer read1BasesTrimmed, final Integer read2BasesTrimmed, final List<File> read1AlignedSamFile, final List<File> read2AlignedSamFile, final List<SamPairUtil.PairOrientation> expectedOrientations, final SortOrder sortOrder, final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy, final boolean addMateCigar, final boolean unmapContaminantReads, final int minUnclippedBases) { this(unmappedBamFile, targetBamFile, referenceFasta, programRecord, clipAdapters, bisulfiteSequence, alignedReadsOnly, alignedSamFile, maxGaps, attributesToRetain, attributesToRemove, read1BasesTrimmed, read2BasesTrimmed, read1AlignedSamFile, read2AlignedSamFile, expectedOrientations, sortOrder, primaryAlignmentSelectionStrategy, addMateCigar, unmapContaminantReads, minUnclippedBases, UnmappingReadStrategy.DO_NOT_CHANGE, SAMSequenceDictionary.DEFAULT_DICTIONARY_EQUAL_TAG ); }
Example #10
Source File: SortedSAMWriter.java From abra2 with MIT License | 5 votes |
public void outputFinal(int sampleIdx, String inputBam) throws IOException { Logger.info("Finishing: " + outputFiles[sampleIdx]); SAMRecord[] readsByNameArray = null; SAMRecord[] readsByCoordArray = null; if (shouldSort) { // Initialize internal read buffers used by SortingCollection2 // These are initialized once per thread and re-used each time // a SortingCollection2 is initialized. readsByNameArray = new SAMRecord[maxRecordsInRam]; readsByCoordArray = new SAMRecord[maxRecordsInRam]; // Only allow buffering if sorting writerFactory.setUseAsyncIo(true); writerFactory.setAsyncOutputBufferSize(ASYNC_READ_CACHE_SIZE); writerFactory.setCreateIndex(shouldCreateIndex); } else { writerFactory.setUseAsyncIo(false); } writerFactory.setCompressionLevel(finalCompressionLevel); if (shouldSort) { samHeaders[sampleIdx].setSortOrder(SortOrder.coordinate); } else { samHeaders[sampleIdx].setSortOrder(SortOrder.unsorted); } SAMFileWriter output = writerFactory.makeBAMWriter(samHeaders[sampleIdx], true, new File(outputFiles[sampleIdx]), finalCompressionLevel); for (String chromosome : chromosomeChunker.getChromosomes()) { processChromosome(output, sampleIdx, chromosome, readsByNameArray, readsByCoordArray); } processUnmapped(output, inputBam); output.close(); }
Example #11
Source File: FixMateInformation.java From picard with MIT License | 4 votes |
protected void createSamFileWriter(final SAMFileHeader header) { out = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, header.getSortOrder() == SortOrder.queryname, OUTPUT); }
Example #12
Source File: Transcode.java From cramtools with Apache License 2.0 | 4 votes |
public static void main(String[] args) throws IOException { Params params = new Params(); JCommander jc = new JCommander(params); jc.parse(args); Log.setGlobalLogLevel(params.logLevel); if (args.length == 0 || params.help) { usage(jc); System.exit(1); } if (params.reference == null) { System.out.println("Reference file not found, will try downloading..."); } ReferenceSource referenceSource = null; if (params.reference != null) { System.setProperty("reference", params.reference.getAbsolutePath()); referenceSource = new ReferenceSource(params.reference); } else { String prop = System.getProperty("reference"); if (prop != null) { referenceSource = new ReferenceSource(new File(prop)); } } SamReaderFactory factory = SamReaderFactory.make().validationStringency(params.validationLevel); SamInputResource r; if ("file".equalsIgnoreCase(params.url.getProtocol())) r = SamInputResource.of(params.url.getPath()); else r = SamInputResource.of(params.url); SamReader reader = factory.open(r); SAMRecordIterator iterator = reader.iterator(); SAMFileWriterFactory writerFactory = new SAMFileWriterFactory(); SAMFileWriter writer = null; OutputStream os = new BufferedOutputStream(new FileOutputStream(params.outputFile)); switch (params.outputFormat) { case BAM: writer = writerFactory.makeBAMWriter(reader.getFileHeader(), reader.getFileHeader().getSortOrder() == SortOrder.coordinate, os); break; case CRAM: writer = writerFactory.makeCRAMWriter(reader.getFileHeader(), os, params.reference); break; default: System.out.println("Unknown output format: " + params.outputFormat); System.exit(1); } while (iterator.hasNext()) { writer.addAlignment(iterator.next()); } writer.close(); reader.close(); }
Example #13
Source File: TagBamWithReadSequenceExtended.java From Drop-seq with MIT License | 4 votes |
@Override protected int doWork() { if (this.TAG_BARCODED_READ && this.DISCARD_READ) { log.error("If TAG_BARCODED_READ=true and DISCARD_READ=true, you're throwing away the tag with the read. Stopping"); return 1; } if (HARD_CLIP_BASES && DISCARD_READ) { log.error("It doesn't make sense for HARD_CLIP_BASES and DISCARD_READ both to be true."); return 1; } IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); // get the header. SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT); SAMFileHeader h= inputSam.getFileHeader(); PeekableIterator<SAMRecord> iter = new PeekableIterator<>(CustomBAMIterators.getQuerynameSortedRecords(inputSam)); SamHeaderUtil.addPgRecord(h, this); // only assume reads are correctly sorted for output if the input BAM is queryname sorted. boolean assumeSorted = h.getSortOrder().equals(SortOrder.queryname); SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(h, assumeSorted, OUTPUT); List<BaseRange> baseRanges = BaseRange.parseBaseRange(this.BASE_RANGE); BaseQualityFilter filter = new BaseQualityFilter(baseRanges, this.BASE_QUALITY); // this.metric = new FailedBaseMetric(BaseRange.getTotalRangeSize(this.BASE_RANGE)); ProgressLogger progress = new ProgressLogger(this.log); while (iter.hasNext()) { //log.info(count); SAMRecord r1 = iter.next(); SAMRecord r2 = iter.peek(); // if the 2nd read is null, then you're just about done, so don't test the non-existent read name boolean sameName=false; if (r2!=null) sameName=r1.getReadName().equals(r2.getReadName()); if (!sameName) { processSingleRead(r1, filter, writer, this.HARD_CLIP_BASES); continue; } // check to see if the two reads are properly paired if they have the same name ReadPair p = new ReadPair(r1, r2); if (p.testProperlyPaired()==false) { log.error(("Reads not properly paired! R1: " + r1.getReadName() + " R2: " + r2.getReadName())); System.exit(1); } // since you're in paired end land, make the 2nd read a real read and not a peeked read. r2=iter.next(); r1=p.getRead1(); r2=p.getRead2(); if (BARCODED_READ==1) processReadPair(r1, r2, filter, writer, this.DISCARD_READ, this.HARD_CLIP_BASES); if (BARCODED_READ==2) processReadPair(r2, r1, filter, writer, this.DISCARD_READ, this.HARD_CLIP_BASES); progress.record(r1); progress.record(r2); } log.info("Total of " + progress.getCount() + " reads processed."); writer.close(); if (this.SUMMARY!=null) writeOutput (filter.getMetric(), this.SUMMARY); CloserUtil.close(inputSam); CloserUtil.close(iter); return (0); }
Example #14
Source File: CollapseTagWithContext.java From Drop-seq with MIT License | 4 votes |
@Override protected int doWork() { int vc = validateCommands(); if (vc>0) return vc; if (this.COUNT_TAGS_EDIT_DISTANCE>0) this.medUMI = new MapBarcodesByEditDistance(false); med = new MapBarcodesByEditDistance(false, this.NUM_THREADS, 0); PrintStream outMetrics = null; if (this.ADAPTIVE_ED_METRICS_FILE!=null) { outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.ADAPTIVE_ED_METRICS_FILE)); writeAdaptiveEditDistanceMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics); } if (this.MUTATIONAL_COLLAPSE_METRICS_FILE!=null) { med = new MapBarcodesByEditDistance(true, this.NUM_THREADS, 1000); outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.MUTATIONAL_COLLAPSE_METRICS_FILE)); writeMutationalCollapseMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics); } IOUtil.assertFileIsReadable(INPUT); IOUtil.assertFileIsWritable(OUTPUT); SamReader reader = SamReaderFactory.makeDefault().open(INPUT); SAMFileHeader header = reader.getFileHeader(); SortOrder sortOrder= header.getSortOrder(); SAMFileWriter writer = getWriter (reader); final ObjectSink<SAMRecord> recSink = new SamWriterSink(writer); PeekableGroupingIterator<SAMRecord> groupingIter = orderReadsByTagsPeekable(reader, this.COLLAPSE_TAG, this.CONTEXT_TAGS, this.READ_MQ, this.OUT_TAG, recSink); log.info("Collapsing tag and writing results"); if (!LOW_MEMORY_MODE) fasterIteration(groupingIter, writer, outMetrics); else lowMemoryIteration(groupingIter, writer, outMetrics, header); log.info("Re-sorting output BAM in "+ sortOrder.toString()+ " if neccesary"); CloserUtil.close(groupingIter); CloserUtil.close(reader); writer.close(); if (outMetrics!=null) CloserUtil.close(outMetrics); log.info("DONE"); return 0; }
Example #15
Source File: CollectInsertSizeMetricsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override protected SortOrder getExpectedSortOrder() { return insertSizeCollector.getExpectedSortOrder(); }
Example #16
Source File: CollectQualityYieldMetricsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 4 votes |
@Override protected SortOrder getExpectedSortOrder() { return qualityYieldCollector.getExpectedSortOrder(); }
Example #17
Source File: CollectHsMetricsTest.java From picard with MIT License | 4 votes |
@Test public void testHsMetricsHandlesIndelsAppropriately() throws IOException { final SAMRecordSetBuilder withDeletions = new SAMRecordSetBuilder(true, SortOrder.coordinate); final SAMRecordSetBuilder withInsertions = new SAMRecordSetBuilder(true, SortOrder.coordinate); final IntervalList targets = new IntervalList(withDeletions.getHeader()); final IntervalList baits = new IntervalList(withDeletions.getHeader()); targets.add(new Interval("chr1", 1000, 1199, false, "t1")); baits.add(new Interval("chr1", 950, 1049, false, "b1")); baits.add(new Interval("chr1", 1050, 1149, false, "b2")); baits.add(new Interval("chr1", 1150, 1249, false, "b3")); // Generate 100 reads that fully cover the the target in each BAM for (int i=0; i<100; ++i) { withDeletions.addFrag( "d" + i, 0, 1000, false, false, "100M20D80M", null, 30); withInsertions.addFrag("i" + i, 0, 1000, false, false, "100M50I100M", null, 30); } // Write things out to file final File dir = IOUtil.createTempDir("hsmetrics.", ".test"); final File bs = new File(dir, "baits.interval_list").getAbsoluteFile(); final File ts = new File(dir, "targets.interval_list").getAbsoluteFile(); baits.write(bs); targets.write(ts); final File withDelBam = writeBam(withDeletions, new File(dir, "with_del.bam")); final File withInsBam = writeBam(withInsertions, new File(dir, "with_ins.bam")); // Now run CollectHsMetrics four times final File out = Files.createTempFile("hsmetrics.", ".txt").toFile(); runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath())); final HsMetrics delsWithoutIndelHandling = readMetrics(out); runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withDelBam.getAbsolutePath())); final HsMetrics delsWithIndelHandling = readMetrics(out); runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=false", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath())); final HsMetrics insWithoutIndelHandling = readMetrics(out); runPicardCommandLine(Arrays.asList("INCLUDE_INDELS=true", "SAMPLE_SIZE=0", "TI="+ts.getPath(), "BI="+bs.getPath(), "O="+out.getPath(), "I="+withInsBam.getAbsolutePath())); final HsMetrics insWithIndelHandling = readMetrics(out); IOUtil.deleteDirectoryTree(dir); Assert.assertEquals(delsWithoutIndelHandling.MEAN_TARGET_COVERAGE, 90.0); // 100X over 180/200 bases due to deletion Assert.assertEquals(delsWithIndelHandling.MEAN_TARGET_COVERAGE, 100.0); // 100X with counting the deletion Assert.assertEquals(insWithoutIndelHandling.PCT_USABLE_BASES_ON_TARGET, 200/250d); // 50/250 inserted bases are not counted as on target Assert.assertEquals(insWithIndelHandling.PCT_USABLE_BASES_ON_TARGET, 1.0d); // inserted bases are counted as on target }
Example #18
Source File: SamBamSequenceDataSource.java From rtg-tools with BSD 2-Clause "Simplified" License | 4 votes |
protected void checkSortOrder() { if (mSamReader.getFileHeader().getSortOrder() != SortOrder.queryname) { throw new NoTalkbackSlimException("SAM or BAM input must be sorted by queryname."); } }
Example #19
Source File: AbstractAlignmentMerger.java From picard with MIT License | 4 votes |
/** * Constructor * * @param unmappedBamFile The BAM file that was used as the input to the aligner, which will * include info on all the reads that did not map. Required. * @param targetBamFile The file to which to write the merged SAM records. Required. * @param referenceFasta The reference sequence for the map files. Required. * @param clipAdapters Whether adapters marked in unmapped BAM file should be marked as * soft clipped in the merged bam. Required. * @param bisulfiteSequence Whether the reads are bisulfite sequence (used when calculating the * NM and UQ tags). Required. * @param alignedReadsOnly Whether to output only those reads that have alignment data * @param programRecord Program record for target file SAMRecords created. * @param attributesToRetain private attributes from the alignment record that should be * included when merging. This overrides the exclusion of * attributes whose tags start with the reserved characters * of X, Y, and Z * @param attributesToRemove attributes from the alignment record that should be * removed when merging. This overrides attributesToRetain if they share * common tags. * @param read1BasesTrimmed The number of bases trimmed from start of read 1 prior to alignment. Optional. * @param read2BasesTrimmed The number of bases trimmed from start of read 2 prior to alignment. Optional. * @param expectedOrientations A List of SamPairUtil.PairOrientations that are expected for * aligned pairs. Used to determine the properPair flag. * @param sortOrder The order in which the merged records should be output. If null, * output will be coordinate-sorted * @param primaryAlignmentSelectionStrategy What to do when there are multiple primary alignments, or multiple * alignments but none primary, for a read or read pair. * @param addMateCigar True if we are to add or maintain the mate CIGAR (MC) tag, false if we are to remove or not include. * @param unmapContaminantReads If true, identify reads having the signature of cross-species contamination (i.e. mostly clipped bases), * and mark them as unmapped. * @param unmappingReadsStrategy An enum describing how to deal with reads whose mapping information are being removed (currently this happens due to cross-species * contamination). Ignored unless unmapContaminantReads is true. */ public AbstractAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final File referenceFasta, final boolean clipAdapters, final boolean bisulfiteSequence, final boolean alignedReadsOnly, final SAMProgramRecord programRecord, final List<String> attributesToRetain, final List<String> attributesToRemove, final Integer read1BasesTrimmed, final Integer read2BasesTrimmed, final List<SamPairUtil.PairOrientation> expectedOrientations, final SortOrder sortOrder, final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy, final boolean addMateCigar, final boolean unmapContaminantReads, final UnmappingReadStrategy unmappingReadsStrategy) { IOUtil.assertFileIsReadable(unmappedBamFile); IOUtil.assertFileIsWritable(targetBamFile); IOUtil.assertFileIsReadable(referenceFasta); this.unmappedBamFile = unmappedBamFile; this.targetBamFile = targetBamFile; this.referenceFasta = referenceFasta; this.refSeq = new ReferenceSequenceFileWalker(referenceFasta); this.clipAdapters = clipAdapters; this.bisulfiteSequence = bisulfiteSequence; this.alignedReadsOnly = alignedReadsOnly; this.header = new SAMFileHeader(); this.sortOrder = sortOrder != null ? sortOrder : SortOrder.coordinate; header.setSortOrder(SortOrder.coordinate); if (programRecord != null) { setProgramRecord(programRecord); } if (attributesToRetain != null) { this.attributesToRetain.addAll(attributesToRetain); } if (attributesToRemove != null) { this.attributesToRemove.addAll(attributesToRemove); // attributesToRemove overrides attributesToRetain if (!this.attributesToRetain.isEmpty()) { this.attributesToRemove.stream() .filter(this.attributesToRetain::contains) .peek(a->log.info("Overriding retaining the " + a + " tag since 'remove' overrides 'retain'.")) .forEach(this.attributesToRetain::remove); } } this.read1BasesTrimmed = read1BasesTrimmed; this.read2BasesTrimmed = read2BasesTrimmed; this.expectedOrientations = expectedOrientations; this.primaryAlignmentSelectionStrategy = primaryAlignmentSelectionStrategy; this.addMateCigar = addMateCigar; this.unmapContaminantReads = unmapContaminantReads; this.unmappingReadsStrategy = unmappingReadsStrategy; }
Example #20
Source File: SortedSAMWriter.java From abra2 with MIT License | 4 votes |
private void initChromosomeChunk(int sampleIdx, int chromosomeChunkIdx) { Logger.debug("Writer init: %d, %d", sampleIdx, chromosomeChunkIdx); samHeaders[sampleIdx].setSortOrder(SortOrder.unsorted); writers[sampleIdx][chromosomeChunkIdx] = writerFactory.makeBAMWriter(samHeaders[sampleIdx], false, new File(getTempFilename(sampleIdx, chromosomeChunkIdx)), TEMP_COMPRESSION_LEVEL); }
Example #21
Source File: RevertSam.java From picard with MIT License | 4 votes |
static void validateSanitizeSortOrder(final boolean sanitize, final SAMFileHeader.SortOrder sortOrder, final List<String> errors) { if (sanitize && sortOrder != SAMFileHeader.SortOrder.queryname) { errors.add("SORT_ORDER must be queryname when sanitization is enabled with SANITIZE=true."); } }
Example #22
Source File: RevertSam.java From picard with MIT License | 4 votes |
private boolean isPresorted(final SAMFileHeader inHeader, final SortOrder sortOrder, final boolean sanitizing) { return (inHeader.getSortOrder() == sortOrder) || (sortOrder == SortOrder.queryname && sanitizing); }
Example #23
Source File: SamAlignmentMerger.java From picard with MIT License | 4 votes |
/** * Reads the aligned SAM records into a SortingCollection and returns an iterator over that collection */ protected CloseableIterator<SAMRecord> getQuerynameSortedAlignedRecords() { final CloseableIterator<SAMRecord> mergingIterator; final SAMFileHeader header; // When the alignment records, including both ends of a pair, are in SAM files if (alignedSamFile != null && !alignedSamFile.isEmpty()) { final List<SAMFileHeader> headers = new ArrayList<>(alignedSamFile.size()); final List<SamReader> readers = new ArrayList<>(alignedSamFile.size()); for (final File f : this.alignedSamFile) { final SamReader r = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f); headers.add(r.getFileHeader()); readers.add(r); // As we're going through and opening the aligned files, if we don't have a @PG yet // and there is only a single one in the input file, use that! if (getProgramRecord() == null && r.getFileHeader().getProgramRecords().size() == 1) { setProgramRecord(r.getFileHeader().getProgramRecords().iterator().next()); } } // assert that all the dictionaries are the same before merging the headers. alignedSamDictionary = headers.get(0).getSequenceDictionary(); headers.stream() .map(SAMFileHeader::getSequenceDictionary) .forEach(alignedSamDictionary::assertSameDictionary); final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SortOrder.queryname, headers, false); mergingIterator = new MergingSamRecordIterator(headerMerger, readers, true); header = headerMerger.getMergedHeader(); } // When the ends are aligned separately and don't have firstOfPair information correctly // set we use this branch. else { // this merging iterator already asserts that all the headers are the same mergingIterator = new SeparateEndAlignmentIterator(this.read1AlignedSamFile, this.read2AlignedSamFile, referenceFasta); header = ((SeparateEndAlignmentIterator) mergingIterator).getHeader(); alignedSamDictionary = header.getSequenceDictionary(); // As we're going through and opening the aligned files, if we don't have a @PG yet // and there is only a single one in the input file, use that! if (getProgramRecord() == null && header.getProgramRecords().size() == 1) { setProgramRecord(header.getProgramRecords().iterator().next()); } } if (!forceSort) { return mergingIterator; } final SortingCollection<SAMRecord> alignmentSorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM); int count = 0; while (mergingIterator.hasNext()) { alignmentSorter.add(mergingIterator.next()); count++; if (count > 0 && count % 1000000 == 0) { log.info("Read " + count + " records from alignment SAM/BAM."); } } log.info("Finished reading " + count + " total records from alignment SAM/BAM."); mergingIterator.close(); return new DelegatingIterator<SAMRecord>(alignmentSorter.iterator()) { @Override public void close() { super.close(); alignmentSorter.cleanup(); } }; }
Example #24
Source File: SortedSAMWriter.java From abra2 with MIT License | 3 votes |
public static void main(String[] args) throws IOException { // Logger.LEVEL = Logger.Level.TRACE; String ref = "/home/lmose/dev/reference/hg38b/hg38.fa"; CompareToReference2 c2r = new CompareToReference2(); c2r.init(ref); ChromosomeChunker cc = new ChromosomeChunker(c2r); cc.init(); // SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/sort_issue3/0.5.bam"); // SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/sort_issue4/1.6.bam"); SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/mate_fix/0.87.bam"); SAMFileHeader header = reader.getFileHeader(); header.setSortOrder(SortOrder.coordinate); int maxRecordsInRam = 1000000; SAMRecord[] readsByNameArray = new SAMRecord[maxRecordsInRam]; SAMRecord[] readsByCoordArray = new SAMRecord[maxRecordsInRam]; SortedSAMWriter writer = new SortedSAMWriter(new String[] { "/home/lmose/dev/abra2_dev/mate_fix" }, "/home/lmose/dev/abra2_dev/mate_fix", new SAMFileHeader[] { reader.getFileHeader() }, true, cc, 1,true,1000,false, false, false, maxRecordsInRam); SAMFileWriterFactory writerFactory = new SAMFileWriterFactory(); SAMFileWriter out = writerFactory.makeBAMWriter(reader.getFileHeader(), true, new File("/home/lmose/dev/abra2_dev/mate_fix/test.bam"),1); long start = System.currentTimeMillis(); writer.processChromosome(out, 0, "chr12", readsByNameArray, readsByCoordArray); out.close(); long stop = System.currentTimeMillis(); System.out.println("Elapsed msecs: " + (stop-start)); }
Example #25
Source File: ExampleCollectMultiMetricsSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 2 votes |
/** * Return the sort order required/expected by this collector. * @return SAMFileHeader.SortOrder for this collector */ @Override protected SortOrder getExpectedSortOrder() { return exampleMultiCollector.getExpectedSortOrder(); }
Example #26
Source File: MetricsCollectorSpark.java From gatk with BSD 3-Clause "New" or "Revised" License | 2 votes |
/** * Return the sort order this collect requires. Collector consumers will validate * the expected sort order. If no specific sort order is required, return * <code>SortOrder.unsorted</code>. Default implementation returns * <code>SortOrder.coordinate</code> * @return SortOrder required for this collector or <code>SortOrder.unsorted</code> * to indicate any sort order is acceptable. */ default SortOrder getExpectedSortOrder() { return SortOrder.coordinate; }
Example #27
Source File: SamAlignmentMerger.java From picard with MIT License | 2 votes |
/** * Constructor * * @param unmappedBamFile The BAM file that was used as the input to the aligner, which will * include info on all the reads that did not map. Required. * @param targetBamFile The file to which to write the merged SAM records. Required. * @param referenceFasta The reference sequence for the map files. Required. * @param programRecord Program record for taget file SAMRecords created. * @param clipAdapters Whether adapters marked in unmapped BAM file should be marked as * soft clipped in the merged bam. Required. * @param bisulfiteSequence Whether the reads are bisulfite sequence (used when calculating the * NM and UQ tags). Required. * @param alignedReadsOnly Whether to output only those reads that have alignment data * @param alignedSamFile The SAM file(s) with alignment information. Optional. If this is * not provided, then read1AlignedSamFile and read2AlignedSamFile must be. * @param maxGaps The maximum number of insertions or deletions permitted in an * alignment. Alignments with more than this many gaps will be ignored. * -1 means to allow any number of gaps. * @param attributesToRetain attributes from the alignment record that should be * retained when merging, overridden by attributesToRemove if they share * common tags. * @param attributesToRemove attributes from the alignment record that should be * removed when merging. This overrides attributesToRetain if they share * common tags. * @param read1BasesTrimmed The number of bases trimmed from start of read 1 prior to alignment. Optional. * @param read2BasesTrimmed The number of bases trimmed from start of read 2 prior to alignment. Optional. * @param read1AlignedSamFile The alignment records for read1. Used when the two ends of a read are * aligned separately. This is optional, but must be specified if * alignedSamFile is not. * @param read2AlignedSamFile The alignment records for read1. Used when the two ends of a read are * aligned separately. This is optional, but must be specified if * alignedSamFile is not. * @param expectedOrientations A List of SamPairUtil.PairOrientations that are expected for * aligned pairs. Used to determine the properPair flag. * @param sortOrder The order in which the merged records should be output. If null, * output will be coordinate-sorted * @param primaryAlignmentSelectionStrategy How to handle multiple alignments for a fragment or read pair, * in which none are primary, or more than one is marked primary * @param addMateCigar True if we are to add or maintain the mate CIGAR (MC) tag, false if we are to remove or not include. * @param unmapContaminantReads If true, identify reads having the signature of cross-species contamination (i.e. mostly clipped bases), * and mark them as unmapped. * @param minUnclippedBases If unmapContaminantReads is set, require this many unclipped bases or else the read will be marked as contaminant. * @param unmappingReadStrategy An enum describing how to deal with reads whose mapping information are being removed (currently this happens due to cross-species * contamination). Ignored unless unmapContaminantReads is true. * @param requiredMatchingDictionaryTags A list of SAMSequenceRecord tags that must be equal (if present) in the aligned bam and the reference dictionary. * Program will issue a warning about other tags, if present in both files and are different. */ public SamAlignmentMerger(final File unmappedBamFile, final File targetBamFile, final File referenceFasta, final SAMProgramRecord programRecord, final boolean clipAdapters, final boolean bisulfiteSequence, final boolean alignedReadsOnly, final List<File> alignedSamFile, final int maxGaps, final List<String> attributesToRetain, final List<String> attributesToRemove, final Integer read1BasesTrimmed, final Integer read2BasesTrimmed, final List<File> read1AlignedSamFile, final List<File> read2AlignedSamFile, final List<SamPairUtil.PairOrientation> expectedOrientations, final SortOrder sortOrder, final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy, final boolean addMateCigar, final boolean unmapContaminantReads, final int minUnclippedBases, final UnmappingReadStrategy unmappingReadStrategy, final List<String> requiredMatchingDictionaryTags) { super(unmappedBamFile, targetBamFile, referenceFasta, clipAdapters, bisulfiteSequence, alignedReadsOnly, programRecord, attributesToRetain, attributesToRemove, read1BasesTrimmed, read2BasesTrimmed, expectedOrientations, sortOrder, primaryAlignmentSelectionStrategy, addMateCigar, unmapContaminantReads, unmappingReadStrategy); if ((alignedSamFile == null || alignedSamFile.isEmpty()) && (read1AlignedSamFile == null || read1AlignedSamFile.isEmpty() || read2AlignedSamFile == null || read2AlignedSamFile.isEmpty())) { throw new IllegalArgumentException("Either alignedSamFile or BOTH of read1AlignedSamFile and " + "read2AlignedSamFile must be specified."); } if (alignedSamFile != null) { alignedSamFile.forEach(IOUtil::assertFileIsReadable); } else { read1AlignedSamFile.forEach(IOUtil::assertFileIsReadable); read2AlignedSamFile.forEach(IOUtil::assertFileIsReadable); } this.alignedSamFile = alignedSamFile; this.read1AlignedSamFile = read1AlignedSamFile; this.read2AlignedSamFile = read2AlignedSamFile; this.maxGaps = maxGaps; this.minUnclippedBases = minUnclippedBases; this.contaminationFilter = new OverclippedReadFilter(minUnclippedBases, false); this.requiredMatchingDictionaryTags = requiredMatchingDictionaryTags; log.info("Processing SAM file(s): " + ((alignedSamFile != null) ? alignedSamFile : (read1AlignedSamFile + "," + read2AlignedSamFile))); }
Example #28
Source File: MetricsCollectorSparkTool.java From gatk with BSD 3-Clause "New" or "Revised" License | votes |
abstract protected SortOrder getExpectedSortOrder();