Java Code Examples for htsjdk.samtools.SAMFileWriter#close()

The following examples show how to use htsjdk.samtools.SAMFileWriter#close() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PreprocessingTools.java    From halvade with GNU General Public License v3.0 6 votes vote down vote up
public int callElPrep(String input, String output, String rg, int threads, 
        SAMRecordIterator SAMit,
        SAMFileHeader header, String dictFile, boolean updateRG, boolean keepDups, String RGID) throws InterruptedException, QualityException {
    
    SAMRecord sam;
    SAMFileWriterFactory factory = new SAMFileWriterFactory();
    SAMFileWriter Swriter = factory.makeSAMWriter(header, true, new File(input));
    
    int reads = 0;
    while(SAMit.hasNext()) {
        sam = SAMit.next();
        if(updateRG)
            sam.setAttribute(SAMTag.RG.name(), RGID);
        Swriter.addAlignment(sam);
        reads++;
    }
    Swriter.close();
    
    String customArgs = HalvadeConf.getCustomArgs(context.getConfiguration(), "elprep", "");  
    String[] command = CommandGenerator.elPrep(bin, input, output, threads, true, rg, null, !keepDups, customArgs);
    long estimatedTime = runProcessAndWait("elPrep", command);
    if(context != null)
        context.getCounter(HalvadeCounters.TIME_ELPREP).increment(estimatedTime);
    
    return reads;
}
 
Example 2
Source File: SortSam.java    From picard with MIT License 6 votes vote down vote up
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    ;
    reader.getFileHeader().setSortOrder(SORT_ORDER.getSortOrder());
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), false, OUTPUT);
    writer.setProgressLogger(
            new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));

    final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Read");
    for (final SAMRecord rec : reader) {
        writer.addAlignment(rec);
        progress.record(rec);
    }

    log.info("Finished reading inputs, merging and writing to output now.");

    CloserUtil.close(reader);
    writer.close();
    return 0;
}
 
Example 3
Source File: GatherBamFiles.java    From picard with MIT License 6 votes vote down vote up
/**
 * Simple implementation of a gather operations that uses SAMFileReaders and Writers in order to concatenate
 * multiple BAM files.
 */
private static void gatherNormally(final List<File> inputs, final File output, final boolean createIndex, final boolean createMd5,
                                   final File referenceFasta) {
    final SAMFileHeader header;
    {
        header = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).getFileHeader(inputs.get(0));
    }

    final SAMFileWriter out = new SAMFileWriterFactory().setCreateIndex(createIndex).setCreateMd5File(createMd5).makeSAMOrBAMWriter(header, true, output);

    for (final File f : inputs) {
        log.info("Gathering " + f.getAbsolutePath());
        final SamReader in = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(f);
        for (final SAMRecord rec : in) out.addAlignment(rec);
        CloserUtil.close(in);
    }

    out.close();
}
 
Example 4
Source File: BamSlicerApplication.java    From hmftools with GNU General Public License v3.0 6 votes vote down vote up
private static void sliceFromVCF(@NotNull CommandLine cmd) throws IOException {
    String inputPath = cmd.getOptionValue(INPUT);
    String vcfPath = cmd.getOptionValue(VCF);
    int proximity = Integer.parseInt(cmd.getOptionValue(PROXIMITY, "500"));

    SamReaderFactory readerFactory = createFromCommandLine(cmd);
    SamReader reader = readerFactory.open(new File(inputPath));

    QueryInterval[] intervals = getIntervalsFromVCF(vcfPath, reader.getFileHeader(), proximity);
    CloseableIterator<SAMRecord> iterator = reader.queryOverlapping(intervals);
    SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true)
            .makeBAMWriter(reader.getFileHeader(), true, new File(cmd.getOptionValue(OUTPUT)));

    writeToSlice(writer, iterator);

    writer.close();
    reader.close();
}
 
Example 5
Source File: CollectGcBiasMetricsTest.java    From picard with MIT License 6 votes vote down vote up
public File build (final List<SAMRecordSetBuilder> setBuilder, final File unsortedSam, final SAMFileHeader header) throws IOException {
    final File sortedSam = VcfTestUtils.createTemporaryIndexedFile("CollectGcBias", ".bam");

    final SAMFileWriter writer = new SAMFileWriterFactory()
            .setCreateIndex(true).makeBAMWriter(header, false, unsortedSam);

    for (final SAMRecordSetBuilder subSetBuilder : setBuilder) {
        for (final SAMRecord record : subSetBuilder) {
            writer.addAlignment(record);
        }
    }
    writer.close();

    final SortSam sorter = new SortSam();
    final String[] args = new String[] {
            "INPUT=" + unsortedSam.getAbsolutePath(),
            "OUTPUT=" + sortedSam.getAbsolutePath(),
            "SORT_ORDER=coordinate"
    };

    sorter.instanceMain(args);

    return sortedSam;
}
 
Example 6
Source File: TagReadWithGeneExonFunction.java    From Drop-seq with MIT License 5 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(this.INPUT);
    IOUtil.assertFileIsReadable(this.ANNOTATIONS_FILE);
    if (this.SUMMARY!=null) IOUtil.assertFileIsWritable(this.SUMMARY);
    IOUtil.assertFileIsWritable(this.OUTPUT);

    SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT);

    SAMFileHeader header = inputSam.getFileHeader();
    SamHeaderUtil.addPgRecord(header, this);
    SAMSequenceDictionary bamDict = header.getSequenceDictionary();

    final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(ANNOTATIONS_FILE, bamDict);
    SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, OUTPUT);

    for (SAMRecord r: inputSam) {
        pl.record(r);

        if (!r.getReadUnmappedFlag())
            r=	setAnnotations(r, geneOverlapDetector);
        writer.addAlignment(r);
    }

    CloserUtil.close(inputSam);
    writer.close();
    if (this.USE_STRAND_INFO) log.info(this.metrics.toString());
    if (SUMMARY==null) return 0;

    //process summary
    MetricsFile<ReadTaggingMetric, Integer> outFile = new MetricsFile<>();
    outFile.addMetric(this.metrics);
    outFile.write(this.SUMMARY);
    return 0;

}
 
Example 7
Source File: CleanSam.java    From picard with MIT License 5 votes vote down vote up
/**
 * Do the work after command line has been parsed.
 * RuntimeException may be thrown by this method, and are reported appropriately.
 *
 * @return program exit status.
 */
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final SamReaderFactory factory = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE);
    if (VALIDATION_STRINGENCY == ValidationStringency.STRICT) {
        factory.validationStringency(ValidationStringency.LENIENT);
    }
    final SamReader reader = factory.open(INPUT);
    final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
    final CloseableIterator<SAMRecord> it = reader.iterator();
    final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class));

    // If the read (or its mate) maps off the end of the alignment, clip it
    while (it.hasNext()) {
        final SAMRecord rec = it.next();

        // If the read (or its mate) maps off the end of the alignment, clip it
        AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec);

        // check the read's mapping quality
        if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) {
            rec.setMappingQuality(0);
        }

        writer.addAlignment(rec);
        progress.record(rec);
    }

    writer.close();
    it.close();
    CloserUtil.close(reader);
    return 0;
}
 
Example 8
Source File: BAMTestUtil.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public static File writeBamFileWithLargeHeader() throws IOException {
  SAMRecordSetBuilder samRecordSetBuilder =
      new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname);
  for (int i = 0; i < 1000; i++) {
    int chr = 20;
    int start1 = (i + 1) * 1000;
    int start2 = start1 + 100;
    samRecordSetBuilder.addPair(String.format("test-read-%03d", i), chr, start1,
        start2);
  }

  final File bamFile = File.createTempFile("test", ".bam");
  bamFile.deleteOnExit();
  SAMFileHeader samHeader = samRecordSetBuilder.getHeader();
  StringBuffer sb = new StringBuffer();
  for (int i = 0; i < 1000000; i++) {
    sb.append("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
  }
  samHeader.addComment(sb.toString());
  final SAMFileWriter bamWriter = new SAMFileWriterFactory()
      .makeSAMOrBAMWriter(samHeader, true, bamFile);
  for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
    bamWriter.addAlignment(rec);
  }
  bamWriter.close();

  return bamFile;
}
 
Example 9
Source File: SortedSAMWriter.java    From abra2 with MIT License 5 votes vote down vote up
public void outputFinal(int sampleIdx, String inputBam) throws IOException {
	
	Logger.info("Finishing: " + outputFiles[sampleIdx]);
	
	SAMRecord[] readsByNameArray = null;
	SAMRecord[] readsByCoordArray = null;

	if (shouldSort) {
		
		// Initialize internal read buffers used by SortingCollection2
		// These are initialized once per thread and re-used each time
		// a SortingCollection2 is initialized.
		readsByNameArray = new SAMRecord[maxRecordsInRam];
		readsByCoordArray = new SAMRecord[maxRecordsInRam];
		
		// Only allow buffering if sorting
		writerFactory.setUseAsyncIo(true);
		writerFactory.setAsyncOutputBufferSize(ASYNC_READ_CACHE_SIZE);
		writerFactory.setCreateIndex(shouldCreateIndex);
	} else {
		writerFactory.setUseAsyncIo(false);
	}
	
	writerFactory.setCompressionLevel(finalCompressionLevel);
	if (shouldSort) {
		samHeaders[sampleIdx].setSortOrder(SortOrder.coordinate);
	} else {
		samHeaders[sampleIdx].setSortOrder(SortOrder.unsorted);
	}
	SAMFileWriter output = writerFactory.makeBAMWriter(samHeaders[sampleIdx], true, new File(outputFiles[sampleIdx]), finalCompressionLevel);
	
	for (String chromosome : chromosomeChunker.getChromosomes()) {
		processChromosome(output, sampleIdx, chromosome, readsByNameArray, readsByCoordArray);
	}
	
	processUnmapped(output, inputBam);
	
	output.close();
}
 
Example 10
Source File: SamBamUtils.java    From chipster with MIT License 5 votes vote down vote up
private static void closeIfPossible(SAMFileWriter writer) {
	if (writer != null) {
		try {
			writer.close();
		} catch (Exception e) {
			// Ignore
		}
	}
}
 
Example 11
Source File: FilterBamByTag.java    From Drop-seq with MIT License 5 votes vote down vote up
/**
 *
 * @param in
 * @param out
 * @param values
 */
void processPairedMode (final SamReader in, final SAMFileWriter out, final Set<String> values, final File summaryFile, Integer mapQuality, boolean allowPartial) {
	ProgressLogger progLog = new ProgressLogger(log);
	FilteredReadsMetric m = new FilteredReadsMetric();
	
	PeekableIterator<SAMRecord> iter = new PeekableIterator<>(CustomBAMIterators.getQuerynameSortedRecords(in));
	while (iter.hasNext()) {
		SAMRecord r1 = iter.next();
		progLog.record(r1);
		boolean filterFlag1 = filterRead(r1, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
		
		SAMRecord r2 = null;
		if (iter.hasNext()) r2 = iter.peek();
		// check for r2 being null in case the last read is unpaired.
		if (r2!=null && r1.getReadName().equals(r2.getReadName())) {
			// paired read found.
			progLog.record(r2);
			r2=iter.next();
			boolean filterFlag2 = filterRead(r2, this.TAG, values, this.ACCEPT_TAG, mapQuality, allowPartial);
			// if in accept tag mode, if either read shouldn't be filterd accept the pair
			// if in reject mode, if both reads shouldn't be filtered to accept the pair.
			if ((!filterFlag1 || !filterFlag2 & this.ACCEPT_TAG) || (!filterFlag1 && !filterFlag2 & !this.ACCEPT_TAG)) {
				out.addAlignment(r1);
				out.addAlignment(r2);
				m.READS_ACCEPTED+=2;
			} else 
				m.READS_REJECTED+=2;
		} else if (!filterFlag1) {
			out.addAlignment(r1);
			m.READS_ACCEPTED++;
		} else {
			m.READS_REJECTED++;
		}
	}
	CloserUtil.close(in);
	writeSummary(summaryFile, m);
	out.close();
	reportAndCheckFilterResults(m);
}
 
Example 12
Source File: Evidence2SAM.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, ParseException {
	EvidenceRecordFileIterator iterator = new EvidenceRecordFileIterator(new File(args[0]));
	Read context = new Read();
	SAMFileHeader header = new SAMFileHeader();
	header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
	SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr10", 135534747);
	samSequenceRecord.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, iterator.assembly_ID);
	String readGroup = String.format("%s-%s", iterator.assembly_ID, iterator.chromosome);
	SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG, iterator.sample);
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_UNIT_TAG, readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.SEQUENCING_CENTER_TAG, "\"Complete Genomics\"");
	Date date = new SimpleDateFormat("yyyy-MMM-dd hh:mm:ss.S").parse(iterator.generatedAt);
	readGroupRecord.setAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG,
			new SimpleDateFormat("yyyy-MM-dd").format(date));
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_TAG, "\"Complete Genomics\"");
	header.addReadGroup(readGroupRecord);

	header.addSequence(samSequenceRecord);

	SAMFileWriterFactory f = new SAMFileWriterFactory();
	SAMFileWriter samWriter;
	if (args.length > 1)
		samWriter = f.makeBAMWriter(header, false, new File(args[1]));
	else
		samWriter = f.makeSAMWriter(header, false, System.out);
	int i = 0;
	long time = System.currentTimeMillis();
	DedupIterator dedupIt = new DedupIterator(iterator);

	while (dedupIt.hasNext()) {
		EvidenceRecord evidenceRecord = dedupIt.next();
		if (evidenceRecord == null)
			throw new RuntimeException();
		try {
			context.reset(evidenceRecord);
			context.parse();
		} catch (Exception e) {
			System.err.println("Failed on line:");
			System.err.println(evidenceRecord.line);
			throw new RuntimeException(e);
		}

		SAMRecord[] samRecords = context.toSAMRecord(header);
		for (SAMRecord samRecord : samRecords) {
			samRecord.setAttribute(SAMTag.RG.name(), readGroup);
			samWriter.addAlignment(samRecord);
		}

		i++;
		if (i % 1000 == 0) {
			if (System.currentTimeMillis() - time > 10 * 1000) {
				time = System.currentTimeMillis();
				System.err.println(i);
			}
		}

		if (i > 10000)
			break;
	}
	samWriter.close();
}
 
Example 13
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
	int vc = validateCommands();
	if (vc>0) return vc;

	if (this.COUNT_TAGS_EDIT_DISTANCE>0) this.medUMI = new MapBarcodesByEditDistance(false);

	med = new MapBarcodesByEditDistance(false, this.NUM_THREADS, 0);
	
	PrintStream outMetrics = null;
	if (this.ADAPTIVE_ED_METRICS_FILE!=null) {
		outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.ADAPTIVE_ED_METRICS_FILE));
		writeAdaptiveEditDistanceMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics);
	}
	
	if (this.MUTATIONAL_COLLAPSE_METRICS_FILE!=null) {
		med = new MapBarcodesByEditDistance(true, this.NUM_THREADS, 1000);
		outMetrics = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(this.MUTATIONAL_COLLAPSE_METRICS_FILE));
		writeMutationalCollapseMetricsHeader(this.ADAPTIVE_ED_METRICS_ED_LIST, outMetrics);
	}

	IOUtil.assertFileIsReadable(INPUT);
       IOUtil.assertFileIsWritable(OUTPUT);

       SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
       SAMFileHeader header =  reader.getFileHeader();
       SortOrder sortOrder= header.getSortOrder();
       
       SAMFileWriter writer = getWriter (reader);
       final ObjectSink<SAMRecord> recSink = new SamWriterSink(writer);
               
       PeekableGroupingIterator<SAMRecord> groupingIter = orderReadsByTagsPeekable(reader, this.COLLAPSE_TAG, this.CONTEXT_TAGS, this.READ_MQ, this.OUT_TAG, recSink);

       log.info("Collapsing tag and writing results");

       if (!LOW_MEMORY_MODE) 
       	fasterIteration(groupingIter, writer, outMetrics);
       else
       	lowMemoryIteration(groupingIter, writer, outMetrics, header);
               
       log.info("Re-sorting output BAM in "+ sortOrder.toString()+ " if neccesary");
       CloserUtil.close(groupingIter);
       CloserUtil.close(reader);
       writer.close();
       if (outMetrics!=null) CloserUtil.close(outMetrics);
       log.info("DONE");
	return 0;
}
 
Example 14
Source File: Transcode.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException {
	Params params = new Params();
	JCommander jc = new JCommander(params);
	jc.parse(args);

	Log.setGlobalLogLevel(params.logLevel);

	if (args.length == 0 || params.help) {
		usage(jc);
		System.exit(1);
	}

	if (params.reference == null) {
		System.out.println("Reference file not found, will try downloading...");
	}

	ReferenceSource referenceSource = null;
	if (params.reference != null) {
		System.setProperty("reference", params.reference.getAbsolutePath());
		referenceSource = new ReferenceSource(params.reference);
	} else {
		String prop = System.getProperty("reference");
		if (prop != null) {
			referenceSource = new ReferenceSource(new File(prop));
		}
	}

	SamReaderFactory factory = SamReaderFactory.make().validationStringency(params.validationLevel);
	SamInputResource r;
	if ("file".equalsIgnoreCase(params.url.getProtocol()))
		r = SamInputResource.of(params.url.getPath());
	else
		r = SamInputResource.of(params.url);
	SamReader reader = factory.open(r);
	SAMRecordIterator iterator = reader.iterator();

	SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
	SAMFileWriter writer = null;
	OutputStream os = new BufferedOutputStream(new FileOutputStream(params.outputFile));
	switch (params.outputFormat) {
	case BAM:
		writer = writerFactory.makeBAMWriter(reader.getFileHeader(),
				reader.getFileHeader().getSortOrder() == SortOrder.coordinate, os);
		break;
	case CRAM:
		writer = writerFactory.makeCRAMWriter(reader.getFileHeader(), os, params.reference);
		break;

	default:
		System.out.println("Unknown output format: " + params.outputFormat);
		System.exit(1);
	}

	while (iterator.hasNext()) {
		writer.addAlignment(iterator.next());
	}
	writer.close();
	reader.close();
}
 
Example 15
Source File: FilterBam.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	buildPatterns();

	SamReader in = SamReaderFactory.makeDefault().open(INPUT);

	SAMFileHeader fileHeader = editSequenceDictionary(in.getFileHeader().clone());
	SamHeaderUtil.addPgRecord(fileHeader, this);
	SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(fileHeader, true, OUTPUT);
	ProgressLogger progLog=new ProgressLogger(log);

	final boolean sequencesRemoved = fileHeader.getSequenceDictionary().getSequences().size() != in.getFileHeader().getSequenceDictionary().getSequences().size();
	
	FilteredReadsMetric m = new FilteredReadsMetric();
	
	for (final SAMRecord r : in) {
		progLog.record(r);
		if (!filterRead(r)) {
               String sequenceName = stripReferencePrefix(r.getReferenceName());
               String mateSequenceName = null;
               if (r.getMateReferenceIndex() != -1)
				mateSequenceName = stripReferencePrefix(r.getMateReferenceName());
		    if (sequencesRemoved || sequenceName != null) {
		        if (sequenceName == null)
					sequenceName = r.getReferenceName();
                   // Even if sequence name has not been edited, if sequences have been removed, need to set
                   // reference name again to invalidate reference index cache.
                   r.setReferenceName(sequenceName);
               }
               if (r.getMateReferenceIndex() != -1 && (sequencesRemoved || mateSequenceName != null)) {
                   if (mateSequenceName == null)
					mateSequenceName = r.getMateReferenceName();
                   // It's possible that the mate was mapped to a reference sequence that has been dropped from
                   // the sequence dictionary.  If so, set the mate to be unmapped.
                   if (fileHeader.getSequenceDictionary().getSequence(mateSequenceName) != null)
					r.setMateReferenceName(mateSequenceName);
				else {
                       r.setMateUnmappedFlag(true);
                       r.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
                       r.setMateAlignmentStart(0);
                   }
               }
			out.addAlignment(r);
			m.READS_ACCEPTED++;
		} else {
			m.READS_REJECTED++;
		}
	}
	// write the summary if the summary file is not null.
	writeSummary(this.SUMMARY, m);
       CloserUtil.close(in);
       out.close();
       FilterProgramUtils.reportAndCheckFilterResults("reads", m.READS_ACCEPTED, m.READS_REJECTED,
			PASSING_READ_THRESHOLD, log);
	return (0);
}
 
Example 16
Source File: MergeBamAlignmentTest.java    From picard with MIT License 4 votes vote down vote up
private void testBestFragmentMapqStrategy(final String testName, final int[] firstMapQs, final int[] secondMapQs,
                                          final boolean includeSecondary, final int expectedFirstMapq,
                                          final int expectedSecondMapq) throws Exception {
    final File unmappedSam = File.createTempFile("unmapped.", ".sam");
    unmappedSam.deleteOnExit();
    final SAMFileWriterFactory factory = new SAMFileWriterFactory();
    final SAMFileHeader header = new SAMFileHeader();
    header.setSortOrder(SAMFileHeader.SortOrder.queryname);

    final String readName = "theRead";
    final SAMRecord firstUnmappedRead = new SAMRecord(header);
    firstUnmappedRead.setReadName(readName);
    firstUnmappedRead.setReadString("ACGTACGTACGTACGT");
    firstUnmappedRead.setBaseQualityString("5555555555555555");
    firstUnmappedRead.setReadUnmappedFlag(true);
    firstUnmappedRead.setMateUnmappedFlag(true);
    firstUnmappedRead.setReadPairedFlag(true);
    firstUnmappedRead.setFirstOfPairFlag(true);

    final SAMRecord secondUnmappedRead = new SAMRecord(header);
    secondUnmappedRead.setReadName(readName);
    secondUnmappedRead.setReadString("TCGAACGTTCGAACTG");
    secondUnmappedRead.setBaseQualityString("6666666666666666");
    secondUnmappedRead.setReadUnmappedFlag(true);
    secondUnmappedRead.setMateUnmappedFlag(true);
    secondUnmappedRead.setReadPairedFlag(true);
    secondUnmappedRead.setSecondOfPairFlag(true);

    final SAMFileWriter unmappedWriter = factory.makeSAMWriter(header, false, unmappedSam);
    unmappedWriter.addAlignment(firstUnmappedRead);
    unmappedWriter.addAlignment(secondUnmappedRead);
    unmappedWriter.close();

    final File alignedSam = File.createTempFile("aligned.", ".sam");
    alignedSam.deleteOnExit();

    final String sequence = "chr1";
    // Populate the header with SAMSequenceRecords
    header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(sequenceDict2.toPath()));

    final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam);

    addAlignmentsForBestFragmentMapqStrategy(alignedWriter, firstUnmappedRead, sequence, firstMapQs);
    addAlignmentsForBestFragmentMapqStrategy(alignedWriter, secondUnmappedRead, sequence, secondMapQs);
    alignedWriter.close();

    final File output = File.createTempFile("testBestFragmentMapqStrategy." + testName, ".sam");
    output.deleteOnExit();
    doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam),
            null, null, null, null,
            false, true, false, 1,
            "0", "1.0", "align!", "myAligner",
            true,
            fasta, output,
            SamPairUtil.PairOrientation.FR,
            MergeBamAlignment.PrimaryAlignmentStrategy.BestEndMapq,
            null, includeSecondary, null, null);
    final SamReader reader = SamReaderFactory.makeDefault().open(output);

    int numFirstRecords = 0;
    int numSecondRecords = 0;
    int firstPrimaryMapq = -1;
    int secondPrimaryMapq = -1;
    for (final SAMRecord rec: reader) {
        Assert.assertTrue(rec.getReadPairedFlag());
        if (rec.getFirstOfPairFlag()) ++numFirstRecords;
        else if (rec.getSecondOfPairFlag()) ++ numSecondRecords;
        else Assert.fail("unpossible!");
        if (!rec.getReadUnmappedFlag() && !rec.getNotPrimaryAlignmentFlag()) {
            if (rec.getFirstOfPairFlag()) {
                Assert.assertEquals(firstPrimaryMapq, -1);
                firstPrimaryMapq = rec.getMappingQuality();
            } else {
                Assert.assertEquals(secondPrimaryMapq, -1);
                secondPrimaryMapq = rec.getMappingQuality();
            }
        } else if (rec.getNotPrimaryAlignmentFlag()) {
            Assert.assertTrue(rec.getMateUnmappedFlag());
        }
    }
    reader.close();
    Assert.assertEquals(firstPrimaryMapq, expectedFirstMapq);
    Assert.assertEquals(secondPrimaryMapq, expectedSecondMapq);
    if (!includeSecondary) {
        Assert.assertEquals(numFirstRecords, 1);
        Assert.assertEquals(numSecondRecords, 1);
    } else {
        // If no alignments for an end, there will be a single unmapped record
        Assert.assertEquals(numFirstRecords, Math.max(1, firstMapQs.length));
        Assert.assertEquals(numSecondRecords, Math.max(1, secondMapQs.length));
    }
}
 
Example 17
Source File: MergeSamFiles.java    From picard with MIT License 4 votes vote down vote up
/** Combines multiple SAM/BAM files into one. */
@Override
protected int doWork() {
    boolean matchedSortOrders = true;
    
    // read interval list if it is defined
    final List<Interval> intervalList = (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals() );
    // map reader->iterator used if INTERVALS is defined
    final Map<SamReader, CloseableIterator<SAMRecord> > samReaderToIterator = new HashMap<>(INPUT.size());

    final List<Path> inputPaths = IOUtil.getPaths(INPUT);

    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<>();
    final List<SAMFileHeader> headers = new ArrayList<>();
    {
        SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory

        for (final Path inFile : inputPaths) {
            IOUtil.assertFileIsReadable(inFile);
            final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
            if (INTERVALS != null) {
                if (!in.hasIndex()) {
                    throw new PicardException("Merging with interval but BAM file is not indexed: " + inFile);
                }
                final CloseableIterator<SAMRecord> samIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(in, intervalList, true);
                samReaderToIterator.put(in, samIterator);
            }

            readers.add(in);
            headers.add(in.getFileHeader());

            // A slightly hackish attempt to keep memory consumption down when merging multiple files with
            // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical, then
            // replace the duplicate copies with a single dictionary to reduce the memory footprint. 
            if (dict == null) {
                dict = in.getFileHeader().getSequenceDictionary();
            } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
                in.getFileHeader().setSequenceDictionary(dict);
            }

            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
        }
    }

    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;

    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED || INTERVALS != null ) {
        log.info("Input files are in same order as output so sorting to temp directory is not needed.");
        headerMergerSortOrder = SORT_ORDER;
        mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
        presorted = true;
    } else {
        log.info("Sorting input files using temp directory " + TMP_DIR);
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        mergingSamRecordIteratorAssumeSorted = false;
        presorted = false;
    }
    final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator;
    // no interval defined, get an iterator for the whole bam
    if (intervalList == null) {
        iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    } else {
        // show warning related to https://github.com/broadinstitute/picard/pull/314/files
        log.info("Warning: merged bams from different interval lists may contain the same read in both files");
        iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true);
    }
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
        header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
        samFileWriterFactory.setUseAsyncIo(true);
    }
    final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT);

    // Lastly loop through and write out the records
    final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL);
    while (iterator.hasNext()) {
        final SAMRecord record = iterator.next();
        out.addAlignment(record);
        progress.record(record);
    }

    log.info("Finished reading inputs.");
    for(final CloseableIterator<SAMRecord> iter : samReaderToIterator.values())  CloserUtil.close(iter);
    CloserUtil.close(readers);
    out.close();
    return 0;
}
 
Example 18
Source File: CollectInsertSizeMetricsTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testWidthOfMetrics() throws IOException {
    final File testSamFile = File.createTempFile("CollectInsertSizeMetrics", ".bam");
    testSamFile.deleteOnExit();
    final File testSamFileIndex = new File(testSamFile.getParentFile(),testSamFile.getName().replaceAll("bam$","bai"));
    testSamFileIndex.deleteOnExit();


    new File(testSamFile.getAbsolutePath().replace(".bam$",".bai")).deleteOnExit();

    final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true);
    setBuilder.setReadLength(10);

    final int insertBy = 3; // the # of bases to increase the insert by in the records below.
    int queryIndex = 0;

    // Create records such that we have 10 records in the 10th through the 90th percentiles, and 5 for the 95th and 99th percentiles.
    // WIDTH_OF_10_PERCENT through WIDTH_OF_90_PERCENT (90 pairs total))
    for (int j = 0; j < 9; j++) {
        for (int i = 0; i < 5; i++, queryIndex++) {
            setBuilder.addPair("query:" + queryIndex, 0, 1, 50 + j*insertBy, false, false, "10M", "10M", false, true, 60);
        }
        for (int i = 0; i < 5; i++, queryIndex++) {
            setBuilder.addPair("query:" + queryIndex, 0, 1, 50 - j*insertBy, false, false, "10M", "10M", false, true, 60);
        }
    }
    // WIDTH_OF_95_PERCENT through WIDTH_OF_99_PERCENT (10 pairs total)
    for (int j = 9; j < 11; j++) {
        for (int i = 0; i < 3; i++, queryIndex++) {
            setBuilder.addPair("query:" + queryIndex, 0, 1, 50 + j*insertBy, false, false, "10M", "10M", false, true, 60);
        }
        for (int i = 0; i < 2; i++, queryIndex++) {
            setBuilder.addPair("query:" + queryIndex, 0, 1, 50 - j*insertBy, false, false, "10M", "10M", false, true, 60);
        }
    }

    // Add one to make the an odd # of pairs for the median
    setBuilder.addPair("query:" + queryIndex, 0, 1, 50, false, false, "10M", "10M", false, true, 60);

    final SAMFileWriter writer = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(setBuilder.getHeader(), false, testSamFile);
    setBuilder.forEach(writer::addAlignment);
    writer.close();


    final File outfile = File.createTempFile("test", ".insert_size_metrics");
    final File pdf     = File.createTempFile("test", ".pdf");
    outfile.deleteOnExit();
    pdf.deleteOnExit();
    final String[] args = new String[] {
            "INPUT="  + testSamFile.getAbsolutePath(),
            "OUTPUT=" + outfile.getAbsolutePath(),
            "Histogram_FILE=" + pdf.getAbsolutePath()
    };
    Assert.assertEquals(runPicardCommandLine(args), 0);

    final MetricsFile<InsertSizeMetrics, Comparable<?>> output = new MetricsFile<InsertSizeMetrics, Comparable<?>>();
    output.read(new FileReader(outfile));
    
    final List<InsertSizeMetrics> metrics = output.getMetrics();
    
    Assert.assertEquals(metrics.size(), 1);
    
    final InsertSizeMetrics metric = metrics.get(0);

    Assert.assertEquals(metric.PAIR_ORIENTATION.name(), "FR");
    Assert.assertEquals((int) metric.MEDIAN_INSERT_SIZE, 59);
    Assert.assertEquals((int) metric.MODE_INSERT_SIZE, 59);
    Assert.assertEquals(metric.MIN_INSERT_SIZE, 29);
    Assert.assertEquals(metric.MAX_INSERT_SIZE, 89);
    Assert.assertEquals(metric.READ_PAIRS, 101);
    Assert.assertEquals(metric.WIDTH_OF_10_PERCENT, 1);
    Assert.assertEquals(metric.WIDTH_OF_20_PERCENT, 1 + insertBy*2);
    Assert.assertEquals(metric.WIDTH_OF_30_PERCENT, 1 + insertBy*4);
    Assert.assertEquals(metric.WIDTH_OF_40_PERCENT, 1 + insertBy*6);
    Assert.assertEquals(metric.WIDTH_OF_50_PERCENT, 1 + insertBy*8);
    Assert.assertEquals(metric.WIDTH_OF_60_PERCENT, 1 + insertBy*10);
    Assert.assertEquals(metric.WIDTH_OF_70_PERCENT, 1 + insertBy*12);
    Assert.assertEquals(metric.WIDTH_OF_80_PERCENT, 1 + insertBy*14);
    Assert.assertEquals(metric.WIDTH_OF_90_PERCENT, 1 + insertBy*16);
    Assert.assertEquals(metric.WIDTH_OF_95_PERCENT, 1 + insertBy*18);
    Assert.assertEquals(metric.WIDTH_OF_99_PERCENT, 1 + insertBy*20);
}
 
Example 19
Source File: MergeBamAlignmentTest.java    From picard with MIT License 4 votes vote down vote up
/**
 * @return a 2-element array in which the first element is the unmapped SAM, and the second the mapped SAM.
 */
private File[] createSamFilesToBeMerged(final MultipleAlignmentSpec[] specs) {
    try {
        final File unmappedSam = File.createTempFile("unmapped.", ".sam");
        unmappedSam.deleteOnExit();
        final SAMFileWriterFactory factory = new SAMFileWriterFactory();
        final SAMFileHeader header = new SAMFileHeader();
        header.setSortOrder(SAMFileHeader.SortOrder.queryname);
        final SAMRecord unmappedRecord = new SAMRecord(header);

        unmappedRecord.setReadName("theRead");
        unmappedRecord.setReadString("ACGTACGTACGTACGT");
        unmappedRecord.setBaseQualityString("5555555555555555");
        unmappedRecord.setReadUnmappedFlag(true);

        final SAMFileWriter unmappedWriter = factory.makeSAMWriter(header, false, unmappedSam);
        unmappedWriter.addAlignment(unmappedRecord);
        unmappedWriter.close();

        final File alignedSam = File.createTempFile("aligned.", ".sam");
        alignedSam.deleteOnExit();

        final String sequence = "chr1";
        // Populate the header with SAMSequenceRecords
        header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(sequenceDict2.toPath()));

        final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam);
        for (final MultipleAlignmentSpec spec : specs) {
            final SAMRecord alignedRecord = new SAMRecord(header);
            alignedRecord.setReadName(unmappedRecord.getReadName());
            alignedRecord.setReadBases(unmappedRecord.getReadBases());
            alignedRecord.setBaseQualities(unmappedRecord.getBaseQualities());
            alignedRecord.setReferenceName(sequence);
            alignedRecord.setAlignmentStart(1);
            alignedRecord.setReadNegativeStrandFlag(spec.reverseStrand);
            alignedRecord.setCigarString(spec.cigar);
            alignedRecord.setMappingQuality(spec.mapQ);
            if (spec.oneOfTheBest) {
                alignedRecord.setAttribute(ONE_OF_THE_BEST_TAG, 1);
            }
            alignedWriter.addAlignment(alignedRecord);
        }
        alignedWriter.close();

        return new File[]{unmappedSam, alignedSam};
    } catch (IOException e) {
        throw new PicardException(e.getMessage(), e);
    }
}
 
Example 20
Source File: SortedSAMWriter.java    From abra2 with MIT License 3 votes vote down vote up
public static void main(String[] args) throws IOException {
//		Logger.LEVEL = Logger.Level.TRACE;
		String ref = "/home/lmose/dev/reference/hg38b/hg38.fa";
		
		CompareToReference2 c2r = new CompareToReference2();
		c2r.init(ref);
		
		ChromosomeChunker cc = new ChromosomeChunker(c2r);
		
		cc.init();
		
//		SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/sort_issue3/0.5.bam");
//		SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/sort_issue4/1.6.bam");
		SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/mate_fix/0.87.bam");

		SAMFileHeader header = reader.getFileHeader();
		header.setSortOrder(SortOrder.coordinate);
		
		int maxRecordsInRam = 1000000;
		SAMRecord[] readsByNameArray = new SAMRecord[maxRecordsInRam];
		SAMRecord[] readsByCoordArray = new SAMRecord[maxRecordsInRam];
		
		SortedSAMWriter writer = new SortedSAMWriter(new String[] { "/home/lmose/dev/abra2_dev/mate_fix" }, "/home/lmose/dev/abra2_dev/mate_fix", new SAMFileHeader[] { reader.getFileHeader() }, true, cc,
				1,true,1000,false, false, false, maxRecordsInRam);

		SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
		SAMFileWriter out = writerFactory.makeBAMWriter(reader.getFileHeader(), true, new File("/home/lmose/dev/abra2_dev/mate_fix/test.bam"),1);
		
		long start = System.currentTimeMillis();
		
		writer.processChromosome(out, 0, "chr12", readsByNameArray, readsByCoordArray);
		
		out.close();
		
		long stop = System.currentTimeMillis();
		
		System.out.println("Elapsed msecs: " + (stop-start));
	}