Java Code Examples for htsjdk.samtools.SAMFileHeader#SortOrder

The following examples show how to use htsjdk.samtools.SAMFileHeader#SortOrder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SortSamSparkIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(enabled = false, dataProvider="sortbams", groups="spark")
public void testSortBAMsSharded(
        final String inputFileName,
        final String unused,
        final String referenceFileName,
        final String outputExtension,
        final SAMFileHeader.SortOrder sortOrder) {
    final File inputFile = getTestFile(inputFileName);
    final File actualOutputFile = createTempFile("sort_sam", outputExtension);
    File referenceFile = null == referenceFileName ? null : getTestFile(referenceFileName);
    ArgumentsBuilder args = new ArgumentsBuilder();
    args.addInput(inputFile);
    args.addOutput(actualOutputFile);
    if (null != referenceFile) {
        args.addReference(referenceFile);
    }
    args.add(StandardArgumentDefinitions.SORT_ORDER_LONG_NAME, sortOrder.name());
    args.add(GATKSparkTool.SHARDED_OUTPUT_LONG_NAME,true);
    args.add(GATKSparkTool.NUM_REDUCERS_LONG_NAME, "2");

    this.runCommandLine(args);

    final ReadsSparkSource source = new ReadsSparkSource(SparkContextFactory.getTestSparkContext());
    final JavaRDD<GATKRead> reads = source.getParallelReads(new GATKPath(actualOutputFile.getAbsolutePath()), referenceFile == null ? null : new GATKPath(referenceFile.getAbsolutePath()));

    final SAMFileHeader header = source.getHeader(new GATKPath(actualOutputFile.getAbsolutePath()),
            referenceFileName == null ? null : new GATKPath(referenceFile.getAbsolutePath()));

    final List<SAMRecord> reloadedReads = reads.collect().stream().map(read -> read.convertToSAMRecord(header)).collect(Collectors.toList());
    BaseTest.assertSorted(reloadedReads.iterator(), sortOrder.getComparatorInstance(),   reloadedReads.stream().map(SAMRecord::getSAMString).collect(Collectors.joining("\n")));
}
 
Example 2
Source File: RevertSam.java    From picard with MIT License 5 votes vote down vote up
private SAMFileHeader createOutHeader(
        final SAMFileHeader inHeader,
        final SAMFileHeader.SortOrder sortOrder,
        final boolean removeAlignmentInformation) {

    final SAMFileHeader outHeader = new SAMFileHeader();
    outHeader.setSortOrder(sortOrder);
    if (!removeAlignmentInformation) {
        outHeader.setSequenceDictionary(inHeader.getSequenceDictionary());
        outHeader.setProgramRecords(inHeader.getProgramRecords());
    }
    return outHeader;
}
 
Example 3
Source File: SortSamSparkIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(expectedExceptions = CommandLineException.BadArgumentValue.class, dataProvider = "getInvalidSortOrders")
public void testBadSortOrders(SAMFileHeader.SortOrder badOrder){
    final File unsortedBam = new File(getTestDataDir(), QUERY_NAME_BAM);
    ArgumentsBuilder args = new ArgumentsBuilder();
    args.addInput(unsortedBam);
    args.addOutput(createTempFile("sort_bam_spark", BAM));
    args.add(StandardArgumentDefinitions.SORT_ORDER_LONG_NAME, badOrder.toString());

    this.runCommandLine(args);
}
 
Example 4
Source File: SortSamSparkIntegrationTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider="sortbams", groups="spark")
public void testSortBAMs(
        final String inputFileName,
        final String expectedOutputFileName,
        final String referenceFileName,
        final String outputExtension,
        final SAMFileHeader.SortOrder sortOrder) throws Exception {
    final File inputFile =  getTestFile(inputFileName);
    final File expectedOutputFile =  getTestFile(expectedOutputFileName);
    final File actualOutputFile = createTempFile("sort_sam", outputExtension);
    File referenceFile = null == referenceFileName ? null : getTestFile(referenceFileName);

    final SamReaderFactory factory = SamReaderFactory.makeDefault();

    ArgumentsBuilder args = new ArgumentsBuilder();
    args.addInput(inputFile);
    args.addOutput(actualOutputFile);
    if (null != referenceFile) {
        args.addReference(referenceFile);
        factory.referenceSequence(referenceFile);
    }
    args.add(StandardArgumentDefinitions.SORT_ORDER_LONG_NAME, sortOrder.name());

    this.runCommandLine(args);

    //test files are exactly equal
    SamAssertionUtils.assertSamsEqual(actualOutputFile, expectedOutputFile, ValidationStringency.DEFAULT_STRINGENCY, referenceFile);

    //test sorting matches htsjdk
    try(ReadsDataSource in = new ReadsPathDataSource(actualOutputFile.toPath(), factory )) {
        BaseTest.assertSorted(Utils.stream(in).map(read -> read.convertToSAMRecord(in.getHeader())).iterator(), sortOrder.getComparatorInstance());
    }
}
 
Example 5
Source File: GATKSparkTool.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
static SAMFileHeader.SortOrder identifySortOrder(final Collection<SAMFileHeader> headers){
    if (headers.size() > 1){
        return SAMFileHeader.SortOrder.unsorted;
    } else {
        return headers.iterator().next().getSortOrder();
    }
}
 
Example 6
Source File: ReadsPathDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@VisibleForTesting
static SAMFileHeader.SortOrder identifySortOrder( final List<SAMFileHeader> headers ){
    final Set<SAMFileHeader.SortOrder> sortOrders = headers.stream().map(SAMFileHeader::getSortOrder).collect(Collectors.toSet());
    final SAMFileHeader.SortOrder order;
    if (sortOrders.size() == 1) {
        order = sortOrders.iterator().next();
    } else {
        order = SAMFileHeader.SortOrder.unsorted;
        logger.warn("Inputs have different sort orders. Assuming {} sorted reads for all of them.", order);
    }
    return order;
}
 
Example 7
Source File: AsIsMarkDuplicatesTest.java    From picard with MIT License 4 votes vote down vote up
AbstractMarkDuplicatesCommandLineProgramTester getTester(SAMFileHeader.SortOrder so) {
    return new BySumOfBaseQAndInOriginalOrderMDTester();
}
 
Example 8
Source File: SortSamSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public SAMFileHeader.SortOrder getSamOrder() {
     return order;
}
 
Example 9
Source File: SortSamSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
SparkSortOrder(SAMFileHeader.SortOrder order) {
    this.order = order;
}
 
Example 10
Source File: SVUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test(groups = "sv", dataProvider = "forGetSamRecordComparator")
void testGetSamRecordComparator(final SAMFileHeader.SortOrder sortOrder, final Class<SAMRecordComparator> expectedClass) {
    Assert.assertEquals(SVUtils.getSamRecordComparator(sortOrder).getClass(), expectedClass);
}
 
Example 11
Source File: BoundSAMRecordIterator.java    From cramtools with Apache License 2.0 4 votes vote down vote up
@Override
public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) {
	return delegate.assertSorted(sortOrder);
}
 
Example 12
Source File: SamClosedFileReader.java    From rtg-tools with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) {
  return this;
}
 
Example 13
Source File: CollectDuplicateMetricsTester.java    From picard with MIT License 4 votes vote down vote up
public CollectDuplicateMetricsTester( final SAMFileHeader.SortOrder sortOrder) {
    super(DuplicateScoringStrategy.ScoringStrategy.SUM_OF_BASE_QUALITIES, sortOrder, false);
}
 
Example 14
Source File: Merge.java    From cramtools with Apache License 2.0 4 votes vote down vote up
@Override
public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) {
	// TODO Auto-generated method stub
	return null;
}
 
Example 15
Source File: AbstractAlignmentMergerTest.java    From picard with MIT License 4 votes vote down vote up
@Test
    public void testUnmapBacterialContamination() throws IOException {
        final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname);
        final SAMFileHeader header = builder.getHeader();
        final SAMFileHeader.SortOrder sortOrder = header.getSortOrder();
        final SAMFileHeader newHeader = SAMRecordSetBuilder.makeDefaultHeader(sortOrder, 100000,true);
        builder.setHeader(newHeader);

        final File reference = File.createTempFile("reference",".fasta");
        reference.deleteOnExit();

        builder.writeRandomReference(reference.toPath());

        builder.addPair("overlappingpair", 0,500,500, false,false,"20S20M60S","20S20M60M",true,false,45);
        builder.addPair("overlappingpairFirstAligned", 0,500,500, false,true,"20S20M60S",null,true,false,45);
        builder.addPair("overlappingpairSecondAligned", 0,500,500, true,false,null,"20S20M60S",true,false,45);
        builder.addPair("overlappingpairFirstAlignedB", 0,500,500, false,true,"20S20M60S",null,false,true,45);
        builder.addPair("overlappingpairSecondAlignedB", 0,500,500, true,false,null,"20S20M60S",false,true,45);

//        builder.addFrag("frag",1,500,false,false,"20S20M60S",null, 45);
//        builder.addFrag("frag2",1,500,true,false,"20S20M60S",null, 45);
//        builder.addFrag("frag3",1,500,false,false,"20S20M60S",null, 45);
//        builder.addFrag("frag4",1,500,true,false,"20S20M60S",null, 45);

        final File file = newTempSamFile("aligned");

        try (SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(builder.getHeader(), true, file, null)) {
            builder.getRecords().forEach(writer::addAlignment);
        }

        final RevertSam revertSam = new RevertSam();

        revertSam.INPUT = file;
        final File fileUnaligned = newTempSamFile("unaligned");
        revertSam.OUTPUT = fileUnaligned;

        revertSam.SANITIZE = false;
        revertSam.REMOVE_ALIGNMENT_INFORMATION=true;
        revertSam.REMOVE_DUPLICATE_INFORMATION=true;

        revertSam.SORT_ORDER = SAMFileHeader.SortOrder.queryname;

        Assert.assertEquals(revertSam.doWork(),0);

        MergeBamAlignment mergeBamAlignment = new MergeBamAlignment();

        mergeBamAlignment.ALIGNED_BAM = Collections.singletonList(file);
        mergeBamAlignment.UNMAPPED_BAM = fileUnaligned;
        mergeBamAlignment.UNMAP_CONTAMINANT_READS = true;

        //yuck!
        final RequiredReferenceArgumentCollection requiredReferenceArgumentCollection = new RequiredReferenceArgumentCollection();
        requiredReferenceArgumentCollection.REFERENCE_SEQUENCE = reference;
        mergeBamAlignment.referenceSequence = requiredReferenceArgumentCollection;

        final File fileMerged = newTempSamFile("merged");

        mergeBamAlignment.OUTPUT = fileMerged;

        // merge file with itself.
        Assert.assertEquals(mergeBamAlignment.doWork(),0);

        // check that all reads have been unmapped due to bacterial contamination as needed.
        try (SamReader mergedReader = SamReaderFactory.makeDefault().open(fileMerged)) {
            for (SAMRecord mergedRecord : mergedReader) {
                Assert.assertTrue(mergedRecord.getReadUnmappedFlag(), mergedRecord.toString());
                Assert.assertTrue(!mergedRecord.getReadPairedFlag() || mergedRecord.getMateUnmappedFlag(), mergedRecord.toString());
            }
        }
    }
 
Example 16
Source File: SortSam.java    From picard with MIT License 4 votes vote down vote up
public SAMFileHeader.SortOrder getSortOrder(){
    return SAMFileHeader.SortOrder.valueOf(this.name());
}
 
Example 17
Source File: MergeSamFiles.java    From picard with MIT License 4 votes vote down vote up
/** Combines multiple SAM/BAM files into one. */
@Override
protected int doWork() {
    boolean matchedSortOrders = true;
    
    // read interval list if it is defined
    final List<Interval> intervalList = (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals() );
    // map reader->iterator used if INTERVALS is defined
    final Map<SamReader, CloseableIterator<SAMRecord> > samReaderToIterator = new HashMap<>(INPUT.size());

    final List<Path> inputPaths = IOUtil.getPaths(INPUT);

    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<>();
    final List<SAMFileHeader> headers = new ArrayList<>();
    {
        SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory

        for (final Path inFile : inputPaths) {
            IOUtil.assertFileIsReadable(inFile);
            final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
            if (INTERVALS != null) {
                if (!in.hasIndex()) {
                    throw new PicardException("Merging with interval but BAM file is not indexed: " + inFile);
                }
                final CloseableIterator<SAMRecord> samIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(in, intervalList, true);
                samReaderToIterator.put(in, samIterator);
            }

            readers.add(in);
            headers.add(in.getFileHeader());

            // A slightly hackish attempt to keep memory consumption down when merging multiple files with
            // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical, then
            // replace the duplicate copies with a single dictionary to reduce the memory footprint. 
            if (dict == null) {
                dict = in.getFileHeader().getSequenceDictionary();
            } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
                in.getFileHeader().setSequenceDictionary(dict);
            }

            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
        }
    }

    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;

    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED || INTERVALS != null ) {
        log.info("Input files are in same order as output so sorting to temp directory is not needed.");
        headerMergerSortOrder = SORT_ORDER;
        mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
        presorted = true;
    } else {
        log.info("Sorting input files using temp directory " + TMP_DIR);
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        mergingSamRecordIteratorAssumeSorted = false;
        presorted = false;
    }
    final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator;
    // no interval defined, get an iterator for the whole bam
    if (intervalList == null) {
        iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    } else {
        // show warning related to https://github.com/broadinstitute/picard/pull/314/files
        log.info("Warning: merged bams from different interval lists may contain the same read in both files");
        iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true);
    }
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
        header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
        samFileWriterFactory.setUseAsyncIo(true);
    }
    final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT);

    // Lastly loop through and write out the records
    final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL);
    while (iterator.hasNext()) {
        final SAMRecord record = iterator.next();
        out.addAlignment(record);
        progress.record(record);
    }

    log.info("Finished reading inputs.");
    for(final CloseableIterator<SAMRecord> iter : samReaderToIterator.values())  CloserUtil.close(iter);
    CloserUtil.close(readers);
    out.close();
    return 0;
}
 
Example 18
Source File: RevertSam.java    From picard with MIT License 4 votes vote down vote up
static void validateSanitizeSortOrder(final boolean sanitize, final SAMFileHeader.SortOrder sortOrder, final List<String> errors) {
    if (sanitize && sortOrder != SAMFileHeader.SortOrder.queryname) {
        errors.add("SORT_ORDER must be queryname when sanitization is enabled with SANITIZE=true.");
    }
}
 
Example 19
Source File: MarkIlluminaAdapters.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(METRICS);

    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    final SAMFileHeader.SortOrder order = in.getFileHeader().getSortOrder();
    SAMFileWriter out = null;
    if (OUTPUT != null) {
        IOUtil.assertFileIsWritable(OUTPUT);
        out = new SAMFileWriterFactory().makeSAMOrBAMWriter(in.getFileHeader(), true, OUTPUT);
    }

    final Histogram<Integer> histo = new Histogram<Integer>("clipped_bases", "read_count");

    // Combine any adapters and custom adapter pairs from the command line into an array for use in clipping
    final AdapterPair[] adapters;
    {
        final List<AdapterPair> tmp = new ArrayList<AdapterPair>();
        tmp.addAll(ADAPTERS);
        if (FIVE_PRIME_ADAPTER != null && THREE_PRIME_ADAPTER != null) {
            tmp.add(new CustomAdapterPair(FIVE_PRIME_ADAPTER, THREE_PRIME_ADAPTER));
        }
        adapters = tmp.toArray(new AdapterPair[tmp.size()]);
    }

    ////////////////////////////////////////////////////////////////////////
    // Main loop that consumes reads, clips them and writes them to the output
    ////////////////////////////////////////////////////////////////////////
    final ProgressLogger progress = new ProgressLogger(log, 1000000, "Read");
    final SAMRecordIterator iterator = in.iterator();

    final AdapterMarker adapterMarker = new AdapterMarker(ADAPTER_TRUNCATION_LENGTH, adapters).
            setMaxPairErrorRate(MAX_ERROR_RATE_PE).setMinPairMatchBases(MIN_MATCH_BASES_PE).
            setMaxSingleEndErrorRate(MAX_ERROR_RATE_SE).setMinSingleEndMatchBases(MIN_MATCH_BASES_SE).
            setNumAdaptersToKeep(NUM_ADAPTERS_TO_KEEP).
            setThresholdForSelectingAdaptersToKeep(PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPTERS_SEEN);

    while (iterator.hasNext()) {
        final SAMRecord rec = iterator.next();
        final SAMRecord rec2 = rec.getReadPairedFlag() && iterator.hasNext() ? iterator.next() : null;
        rec.setAttribute(ReservedTagConstants.XT, null);

        // Do the clipping one way for PE and another for SE reads
        if (rec.getReadPairedFlag()) {
            // Assert that the input file is in query name order only if we see some PE reads
            if (order != SAMFileHeader.SortOrder.queryname) {
                throw new PicardException("Input BAM file must be sorted by queryname");
            }

            if (rec2 == null) throw new PicardException("Missing mate pair for paired read: " + rec.getReadName());
            rec2.setAttribute(ReservedTagConstants.XT, null);

            // Assert that we did in fact just get two mate pairs
            if (!rec.getReadName().equals(rec2.getReadName())) {
                throw new PicardException("Adjacent reads expected to be mate-pairs have different names: " +
                        rec.getReadName() + ", " + rec2.getReadName());
            }

            // establish which of pair is first and which second
            final SAMRecord first, second;

            if (rec.getFirstOfPairFlag() && rec2.getSecondOfPairFlag()) {
                first = rec;
                second = rec2;
            } else if (rec.getSecondOfPairFlag() && rec2.getFirstOfPairFlag()) {
                first = rec2;
                second = rec;
            } else {
                throw new PicardException("Two reads with same name but not correctly marked as 1st/2nd of pair: " + rec.getReadName());
            }

            adapterMarker.adapterTrimIlluminaPairedReads(first, second);
        } else {
            adapterMarker.adapterTrimIlluminaSingleRead(rec);
        }

        // Then output the records, update progress and metrics
        for (final SAMRecord r : new SAMRecord[]{rec, rec2}) {
            if (r != null) {
                progress.record(r);
                if (out != null) out.addAlignment(r);

                final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT);
                if (clip != null) histo.increment(r.getReadLength() - clip + 1);
            }
        }
    }

    if (out != null) out.close();

    // Lastly output the metrics to file
    final MetricsFile<?, Integer> metricsFile = getMetricsFile();
    metricsFile.setHistogram(histo);
    metricsFile.write(METRICS);

    CloserUtil.close(in);
    return 0;
}
 
Example 20
Source File: ExampleCollectSingleMetricsSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 2 votes vote down vote up
/**
 * Return the sort order required/expected by this collector.
 * @return SAMFileHeader.SortOrder for this collector
 */
@Override
protected SAMFileHeader.SortOrder getExpectedSortOrder() { return exampleSingleCollector.getExpectedSortOrder(); }