Java Code Examples for htsjdk.samtools.SamReader#getFileHeader()

The following examples show how to use htsjdk.samtools.SamReader#getFileHeader() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SpliceJunctionCounter.java    From abra2 with MIT License 6 votes vote down vote up
public List<Feature> getJunctions(String[] inputs) {
	SAMFileHeader header = null;
	for (String input : inputs) {
		SamReader reader = SAMRecordUtils.getSamReader(input);
		updateCounts(reader);
		
		if (header == null) {
			header = reader.getFileHeader();
		}
	}
	
	List<SpliceJunction> junctions = new ArrayList<SpliceJunction>(uniqueReads.keySet());
	Collections.sort(junctions, new SpliceJunctionComparator(header));
	
	List<Feature> splices = new ArrayList<Feature>();
	for (SpliceJunction junction : junctions) {
		splices.add(new Feature(junction.chrom, junction.start, junction.stop));
	}
	
	return splices;
}
 
Example 2
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testTagCodingRead () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict);

	String recName="H575CBGXY:3:13502:3588:9959";
	SAMRecord r = getRecord(testBAMFile, recName);

	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);

	Assert.assertEquals(r.getStringAttribute("gn"), "Elp2");
	Assert.assertEquals(r.getStringAttribute("gs"), "+");
	Assert.assertEquals(r.getStringAttribute("gf"), "CODING");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.CODING.name());
}
 
Example 3
Source File: TagReadWithGeneFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testTagCodingUTR () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict);

	String recName="H575CBGXY:2:21206:5655:8103";
	SAMRecord r = getRecord(testBAMFile, recName);

	TagReadWithGeneFunction tagger = new TagReadWithGeneFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector, false);

	String geneName = r.getStringAttribute("gn");
	String geneStrand = r.getStringAttribute("gs");
	String function = r.getStringAttribute("gf");

	Assert.assertEquals(geneName, "Elp2");
	Assert.assertEquals(geneStrand, "+");
	Assert.assertEquals(function, "UTR");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name());
}
 
Example 4
Source File: TagReadWithGeneExonFunctionTest.java    From Drop-seq with MIT License 6 votes vote down vote up
@Test
public void testTagCodingUTR () {
	SamReader inputSam = SamReaderFactory.makeDefault().open(testBAMFile);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	final OverlapDetector<Gene> geneOverlapDetector = GeneAnnotationReader.loadAnnotationsFile(annotationsFile, bamDict);

	String recName="H575CBGXY:2:21206:5655:8103";
	SAMRecord r = getRecord(testBAMFile, recName);

	TagReadWithGeneExonFunction tagger = new TagReadWithGeneExonFunction();
	r=tagger.setAnnotations(r, geneOverlapDetector);

	String geneName = r.getStringAttribute("GE");
	String geneStrand = r.getStringAttribute("GS");

	Assert.assertEquals(geneName, "Elp2");
	Assert.assertEquals(geneStrand, "+");
	Assert.assertEquals(r.getStringAttribute("XF"), LocusFunction.UTR.name());
}
 
Example 5
Source File: SamFileReaderAdaptor.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Constructor that adapts a regular <code>SAMFileReader</code>, optionally filtering on regions
 * @param reader the reader
 * @param regions regions to filter from output, may be null
 */
public SamFileReaderAdaptor(SamReader reader, final ReferenceRanges<String> regions) {
  super(reader.getFileHeader());
  mReader = reader;
  SamUtils.logRunId(mReader.getFileHeader());
  if (regions == null || regions.allAvailable()) {
    mIterator = mReader.iterator();
  } else {
    mIterator = new SamRestrictingIterator(mReader.iterator(), regions);
  }
}
 
Example 6
Source File: HeaderInfo.java    From dataflow-java with Apache License 2.0 5 votes vote down vote up
public static HeaderInfo getHeaderFromBAMFile(Storage.Objects storage, String BAMPath, Iterable<Contig> explicitlyRequestedContigs) throws IOException {
  HeaderInfo result = null;

  // Open and read start of BAM
  LOG.info("Reading header from " + BAMPath);
  final SamReader samReader = BAMIO
      .openBAM(storage, BAMPath, ValidationStringency.DEFAULT_STRINGENCY);
  final SAMFileHeader header = samReader.getFileHeader();
  Contig firstContig = getFirstExplicitContigOrNull(header, explicitlyRequestedContigs);
  if (firstContig == null) {
    final SAMSequenceRecord seqRecord = header.getSequence(0);
    firstContig = new Contig(seqRecord.getSequenceName(), -1, -1);
  }

  LOG.info("Reading first chunk of reads from " + BAMPath);
  final SAMRecordIterator recordIterator = samReader.query(
      firstContig.referenceName, (int)firstContig.start + 1, (int)firstContig.end + 1, false);

  Contig firstShard = null;
  while (recordIterator.hasNext() && result == null) {
    SAMRecord record = recordIterator.next();
    final int alignmentStart = record.getAlignmentStart();
    if (firstShard == null && alignmentStart > firstContig.start &&
        (alignmentStart < firstContig.end || firstContig.end == -1)) {
      firstShard = new Contig(firstContig.referenceName, alignmentStart, alignmentStart);
      LOG.info("Determined first shard to be " + firstShard);
      result = new HeaderInfo(header, firstShard);
    }
  }
  recordIterator.close();
  samReader.close();

  if (result == null) {
    throw new IOException("Did not find reads for the first contig " + firstContig.toString());
  }
  LOG.info("Finished header reading from " + BAMPath);
  return result;
}
 
Example 7
Source File: SamClosedFileReaderTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
private void checkRecords(File bam, SamReader normalReader, Iterator<SAMRecord> norm, ReferenceRanges<String> ranges) throws IOException {
  try (final RecordIterator<SAMRecord> closed = new SamClosedFileReader(bam, ranges, null, normalReader.getFileHeader())) {
    while (norm.hasNext() && closed.hasNext()) {
      final SAMRecord a = norm.next();
      final SAMRecord b = closed.next();
      assertEquals(a.getSAMString(), b.getSAMString());
    }
    assertEquals(norm.hasNext(), closed.hasNext());
  }
}
 
Example 8
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 5 votes vote down vote up
private SAMFileWriter getWriter (final SamReader reader) {
	SAMFileHeader header = reader.getFileHeader();
	SamHeaderUtil.addPgRecord(header, this);
	String context = StringUtil.join(" ", this.CONTEXT_TAGS);
	header.addComment("Edit distance collapsed tag " +  this.COLLAPSE_TAG + " to new tag " + this.OUT_TAG+ " with edit distance "+ this.EDIT_DISTANCE + "using indels=" + this.FIND_INDELS + " in the context of tags [" + context + "]");
       SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, this.OUTPUT);
       return writer;
}
 
Example 9
Source File: IntervalTagComparatorTest.java    From Drop-seq with MIT License 5 votes vote down vote up
private List<SAMRecord> createManyIntervalTaggedSAMRecords (final int desiredNumRecords) {
	List<SAMRecord> data = new ArrayList<>();

	SamReader inputSam = SamReaderFactory.makeDefault().open(this.dictFile);
	SAMRecord samRecordTemplate = new SAMRecord (inputSam.getFileHeader());

	SAMSequenceDictionary dict= inputSam.getFileHeader().getSequenceDictionary();
	List<SAMSequenceRecord> recs = dict.getSequences();
	int numRecs = recs.size();

	Random randomGenerator = new Random();
	for (int i=0; i<desiredNumRecords; i++) {
		SAMSequenceRecord r = recs.get(randomGenerator.nextInt(numRecs+1));
		String chr = r.getSequenceName();
		int seqLen = r.getSequenceLength();
		int s1 = randomGenerator.nextInt(seqLen);
		int s2 = randomGenerator.nextInt(seqLen);
		int s = Math.min(s1, s2);
		int e = Math.max(s1, s2);
		Interval interval = new Interval (chr, s1,s2);
		try {
			SAMRecord r1 = (SAMRecord) samRecordTemplate.clone();
			// I realize that using encoding the full interval can be a bit heavy handed.
			r1.setAttribute(this.intervalTag, interval.toString());
			data.add(r1);
		} catch (CloneNotSupportedException e1) {
			// this should never happen, sigh.
		}
	}
	return data;
}
 
Example 10
Source File: BamOverlapChecker.java    From systemsgenetics with GNU General Public License v3.0 5 votes vote down vote up
public BamOverlapChecker(SamReader bam_file){
    
    SAMFileHeader  header = bam_file.getFileHeader();
    SAMSequenceDictionary dict = header.getSequenceDictionary();
    List<SAMSequenceRecord> sequences = dict.getSequences();
   
    
    booleanMap = new HashMap<String, boolean[]>();
    
    for(SAMSequenceRecord sequence : sequences){
        int sequenceEnd = sequence.getSequenceLength();
        int arrayLength = (int) Math.ceil( (float) sequenceEnd / (float)stepSize );
        boolean[] tempArray;
        tempArray = new boolean[arrayLength];
        
        for(int i=0;i<arrayLength;i++){
            SAMRecordIterator bamQuery = bam_file.queryOverlapping(sequence.getSequenceName(), i*stepSize, (i+1)*stepSize);
            if(bamQuery.hasNext()){
                tempArray[i] = true;
            }else{
                tempArray[i] = false;
            }
            bamQuery.close();
        }
        booleanMap.put(sequence.getSequenceName(),tempArray );
        //System.out.println("Finished checking the bam for chromosome " + sequence.getSequenceName());
    }
    
}
 
Example 11
Source File: WriteBAIFn.java    From dataflow-java with Apache License 2.0 4 votes vote down vote up
@ProcessElement
public void processElement(DoFn<String, String>.ProcessContext c) throws Exception {
  Metrics.counter(WriteBAIFn.class, "Initialized Indexing Shard Count").inc();
  Stopwatch stopWatch = Stopwatch.createStarted();

  final HeaderInfo header = c.sideInput(headerView);
  final String bamFilePath = c.sideInput(writtenBAMFilerView);
  final Iterable<KV<Integer, Long>> sequenceShardSizes = c.sideInput(sequenceShardSizesView);

  final String sequenceName = c.element();
  final int sequenceIndex = header.header.getSequence(sequenceName).getSequenceIndex();
  final String baiFilePath = bamFilePath + "-" +
      String.format("%02d",sequenceIndex) + "-" + sequenceName + ".bai";

  long offset = 0;
  int skippedReferences  = 0;
  long bytesToProcess = 0;

  for (KV<Integer, Long> sequenceShardSize : sequenceShardSizes) {
    if (sequenceShardSize.getKey() < sequenceIndex) {
      offset += sequenceShardSize.getValue();
      skippedReferences++;
    } else if (sequenceShardSize.getKey() == sequenceIndex) {
      bytesToProcess = sequenceShardSize.getValue();
    }
  }
  LOG.info("Generating BAI index: " + baiFilePath);
  LOG.info("Reading BAM file: " + bamFilePath + " for reference " + sequenceName +
      ", skipping " + skippedReferences + " references at offset " + offset +
      ", expecting to process " + bytesToProcess + " bytes");

  Options options = c.getPipelineOptions().as(Options.class);
  final Storage.Objects storage = Transport.newStorageClient(
      options
        .as(GCSOptions.class))
        .build()
        .objects();
  final SamReader reader = BAMIO.openBAM(storage, bamFilePath, ValidationStringency.SILENT, true,
      offset);
  final OutputStream outputStream =
      Channels.newOutputStream(
          new GcsUtil.GcsUtilFactory().create(options)
            .create(GcsPath.fromUri(baiFilePath),
                BAMIO.BAM_INDEX_FILE_MIME_TYPE));
  final BAMShardIndexer indexer = new BAMShardIndexer(outputStream, reader.getFileHeader(), sequenceIndex);

  long processedReads = 0;
  long skippedReads = 0;

  // create and write the content
  if (bytesToProcess > 0) {
    SAMRecordIterator it = reader.iterator();
    boolean foundRecords = false;
    while (it.hasNext()) {
      SAMRecord r = it.next();
      if (!r.getReferenceName().equals(sequenceName)) {
        if (foundRecords) {
          LOG.info("Finishing index building for " + sequenceName + " after processing " + processedReads);
          break;
        }
        skippedReads++;
        continue;
      } else if (!foundRecords) {
        LOG.info("Found records for refrence " + sequenceName + " after skipping " + skippedReads);
        foundRecords = true;
      }
      indexer.processAlignment(r);
      processedReads++;
    }
    it.close();
  } else {
    LOG.info("No records for refrence " + sequenceName + ": writing empty index ");
  }
  long noCoordinateReads = indexer.finish();
  c.output(baiFilePath);
  c.output(NO_COORD_READS_COUNT_TAG, noCoordinateReads);
  LOG.info("Generated " + baiFilePath + ", " + processedReads + " reads, " +
      noCoordinateReads + " no coordinate reads, " + skippedReads + ", skipped reads");
  stopWatch.stop();
  Metrics.distribution(WriteBAIFn.class, "Indexing Shard Processing Time (sec)").update(
      stopWatch.elapsed(TimeUnit.SECONDS));
  Metrics.counter(WriteBAIFn.class, "Finished Indexing Shard Count").inc();
  Metrics.counter(WriteBAIFn.class, "Indexed reads").inc(processedReads);
  Metrics.counter(WriteBAIFn.class, "Indexed no coordinate reads").inc(noCoordinateReads);
  Metrics.distribution(WriteBAIFn.class, "Reads Per Indexing Shard").update(processedReads);
}
 
Example 12
Source File: GenotypeSperm.java    From Drop-seq with MIT License 4 votes vote down vote up
private SNPUMIBasePileupIterator getIter (SamReader reader, IntervalList snpIntervals, List <String> cellBarcodes) {
	
	Iterator<SAMRecord> iter = reader.iterator();
	
	// pre-process iterator to add the "gene" tag and the gene strand.
	TagAddingIterator taggedIter = new TagAddingIterator(iter);						
	SamHeaderAndIterator headerAndIter = new SamHeaderAndIterator(reader.getFileHeader(), taggedIter);			
	
	// some hard coded params.	
	
	SNPUMIBasePileupIterator sbpi = new SNPUMIBasePileupIterator(
			headerAndIter, snpIntervals, GeneFunctionCommandLineBase.DEFAULT_GENE_NAME_TAG, GeneFunctionCommandLineBase.DEFAULT_GENE_STRAND_TAG, GeneFunctionCommandLineBase.DEFAULT_GENE_FUNCTION_TAG,
			GeneFunctionCommandLineBase.DEFAULT_LOCUS_FUNCTION_LIST, StrandStrategy.BOTH, this.CELL_BARCODE_TAG,
			this.MOLECULAR_BARCODE_TAG, this.SNP_TAG, null, this.READ_MQ, true, cellBarcodes, SortOrder.SNP_GENE);
	
	return sbpi;
	
}
 
Example 13
Source File: AddOrReplaceReadGroups.java    From picard with MIT License 4 votes vote down vote up
protected int doWork() {
    IOUtil.assertInputIsValid(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    final SamReader in = SamReaderFactory.makeDefault()
        .referenceSequence(REFERENCE_SEQUENCE)
        .open(SamInputResource.of(INPUT));

    // create the read-group we'll be using
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(RGID);
    rg.setLibrary(RGLB);
    rg.setPlatform(RGPL);
    rg.setSample(RGSM);
    rg.setPlatformUnit(RGPU);
    if (RGCN != null) rg.setSequencingCenter(RGCN);
    if (RGDS != null) rg.setDescription(RGDS);
    if (RGDT != null) rg.setRunDate(RGDT);
    if (RGPI != null) rg.setPredictedMedianInsertSize(RGPI);
    if (RGPG != null) rg.setProgramGroup(RGPG);
    if (RGPM != null) rg.setPlatformModel(RGPM);
    if (RGKS != null) rg.setKeySequence(RGKS);
    if (RGFO != null) rg.setFlowOrder(RGFO);

    log.info(String.format("Created read-group ID=%s PL=%s LB=%s SM=%s%n", rg.getId(), rg.getPlatform(), rg.getLibrary(), rg.getSample()));

    // create the new header and output file
    final SAMFileHeader inHeader = in.getFileHeader();
    final SAMFileHeader outHeader = inHeader.clone();
    outHeader.setReadGroups(Collections.singletonList(rg));
    if (SORT_ORDER != null) outHeader.setSortOrder(SORT_ORDER);

    final SAMFileWriter outWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader,
            outHeader.getSortOrder() == inHeader.getSortOrder(),
            OUTPUT);

    final ProgressLogger progress = new ProgressLogger(log);
    for (final SAMRecord read : in) {
        read.setAttribute(SAMTag.RG.name(), RGID);
        outWriter.addAlignment(read);
        progress.record(read);
    }

    // cleanup
    CloserUtil.close(in);
    outWriter.close();
    return 0;
}
 
Example 14
Source File: MarkDuplicatesTest.java    From picard with MIT License 4 votes vote down vote up
/**
 * Test that PG header records are created & chained appropriately (or not created), and that the PG record chains
 * are as expected.  MarkDuplicates is used both to merge and to mark dupes in this case.
 * @param suppressPg If true, do not create PG header record.
 * @param expectedPnVnByReadName For each read, info about the expect chain of PG records.
 */
@Test(dataProvider = "pgRecordChainingTest")
public void pgRecordChainingTest(final boolean suppressPg,
                                 final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) {
    final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
    outputDir.deleteOnExit();
    try {
        // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
        // record creation according to suppressPg.
        final MarkDuplicates markDuplicates = new MarkDuplicates();
        final ArrayList<String> args = new ArrayList<>();
        for (int i = 1; i <= 3; ++i) {
            args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath());
        }
        final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
        args.add("OUTPUT=" + outputSam.getAbsolutePath());
        args.add("METRICS_FILE=" + new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics").getAbsolutePath());
        args.add("ADD_PG_TAG_TO_READS=true");
        if (suppressPg) args.add("PROGRAM_RECORD_ID=null");

        // I generally prefer to call doWork rather than invoking the argument parser, but it is necessary
        // in this case to initialize the command line.
        // Note that for the unit test, version won't come through because it is obtained through jar
        // manifest, and unit test doesn't run code from a jar.
        Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0);

        // Read the MarkDuplicates output file, and get the PG ID for each read.  In this particular test,
        // the PG ID should be the same for both ends of a pair.
        final SamReader reader = SamReaderFactory.makeDefault().open(outputSam);

        final Map<String, String> pgIdForReadName = new HashMap<>();
        for (final SAMRecord rec : reader) {
            final String existingPgId = pgIdForReadName.get(rec.getReadName());
            final String thisPgId = rec.getStringAttribute(SAMTag.PG.name());
            if (existingPgId != null) {
                Assert.assertEquals(thisPgId, existingPgId);
            } else {
                pgIdForReadName.put(rec.getReadName(), thisPgId);
            }
        }
        final SAMFileHeader header = reader.getFileHeader();
        CloserUtil.close(reader);

        // Confirm that for each read name, the chain of PG records contains exactly the number that is expected,
        // and that values in the PG chain are as expected.
        for (final Map.Entry<String, List<ExpectedPnAndVn>> entry : expectedPnVnByReadName.entrySet()) {
            final String readName = entry.getKey();
            final List<ExpectedPnAndVn> expectedList = entry.getValue();
            String pgId = pgIdForReadName.get(readName);
            for (final ExpectedPnAndVn expected : expectedList) {
                final SAMProgramRecord programRecord = header.getProgramRecord(pgId);
                if (expected.expectedPn != null) Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn);
                if (expected.expectedVn != null) Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn);
                pgId = programRecord.getPreviousProgramGroupId();
            }
            Assert.assertNull(pgId);
        }

    } finally {
        IOUtil.recursiveDelete(outputDir.toPath());
    }
}
 
Example 15
Source File: BAMRecordReader.java    From Hadoop-BAM with MIT License 4 votes vote down vote up
@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
           throws IOException
{
	// This method should only be called once (see Hadoop API). However,
	// there seems to be disagreement between implementations that call
	// initialize() and Hadoop-BAM's own code that relies on
	// {@link BAMInputFormat} to call initialize() when the reader is
	// created. Therefore we add this check for the time being. 
	if(isInitialized)
		close();
	isInitialized = true;
	reachedEnd = false;

	final Configuration conf = ctx.getConfiguration();

	final FileVirtualSplit split = (FileVirtualSplit)spl;
	final Path             file  = split.getPath();
	final FileSystem       fs    = file.getFileSystem(conf);

	ValidationStringency stringency = SAMHeaderReader.getValidationStringency(conf);
	boolean useIntelInflater = BAMInputFormat.useIntelInflater(conf);

	java.nio.file.Path index = SamFiles.findIndex(NIOFileUtil.asPath(fs.makeQualified(file).toUri()));
	Path fileIndex = index == null ? null : new Path(index.toUri());
	SeekableStream indexStream = fileIndex == null ? null : WrapSeekable.openPath(fs, fileIndex);
	in = WrapSeekable.openPath(fs, file);
	SamReader samReader = createSamReader(in, indexStream, stringency, useIntelInflater);
	final SAMFileHeader header = samReader.getFileHeader();

	long virtualStart = split.getStartVirtualOffset();

	fileStart  = virtualStart >>> 16;
	virtualEnd = split.getEndVirtualOffset();

	SamReader.PrimitiveSamReader primitiveSamReader =
			((SamReader.PrimitiveSamReaderToSamReaderAdapter) samReader).underlyingReader();
	bamFileReader = (BAMFileReader) primitiveSamReader;

	if (logger.isDebugEnabled()) {
		final long recordStart = virtualStart & 0xffff;
		logger.debug("Initialized BAMRecordReader; byte offset: {}, record offset: {}",
			fileStart, recordStart);
	}

	if (conf.getBoolean("hadoopbam.bam.keep-paired-reads-together", false)) {
		throw new IllegalArgumentException("Property hadoopbam.bam.keep-paired-reads-together is no longer honored.");
	}

	boolean boundedTraversal = BAMInputFormat.isBoundedTraversal(conf);
	if (boundedTraversal && split.getIntervalFilePointers() != null) {
		// return reads for intervals
		List<Interval> intervals = BAMInputFormat.getIntervals(conf);
		QueryInterval[] queryIntervals = BAMInputFormat.prepareQueryIntervals(intervals, header.getSequenceDictionary());
		iterator = bamFileReader.createIndexIterator(queryIntervals, false, split.getIntervalFilePointers());
	} else if (boundedTraversal && split.getIntervalFilePointers() == null) {
		// return unmapped reads
		iterator = bamFileReader.queryUnmapped();
	} else {
		// return everything
		BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(virtualStart, virtualEnd));
		iterator = bamFileReader.getIterator(splitSpan);
	}
}
 
Example 16
Source File: AnnotationUtilsTest.java    From Drop-seq with MIT License 4 votes vote down vote up
private OverlapDetector<Gene> getGeneOverlapDetector (final File bam, final File gtf) {
	SamReader inputSam = SamReaderFactory.makeDefault().open(bam);
	SAMFileHeader header = inputSam.getFileHeader();
	SAMSequenceDictionary bamDict = header.getSequenceDictionary();
	return GeneAnnotationReader.loadAnnotationsFile(gtf, bamDict);
}
 
Example 17
Source File: CollectWgsMetrics.java    From picard with MIT License 4 votes vote down vote up
/** Gets the SamReader from which records will be examined.  This will also set the header so that it is available in
 *  */
protected SamReader getSamReader() {
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
    this.header        = in.getFileHeader();
    return in;
}
 
Example 18
Source File: RevertSam.java    From picard with MIT License 4 votes vote down vote up
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    ValidationUtil.assertWritable(OUTPUT, OUTPUT_BY_READGROUP);

    final boolean sanitizing = SANITIZE;
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
    final SAMFileHeader inHeader = in.getFileHeader();
    ValidationUtil.validateHeaderOverrides(inHeader, SAMPLE_ALIAS, LIBRARY_NAME);

    ////////////////////////////////////////////////////////////////////////////
    // Build the output writer with an appropriate header based on the options
    ////////////////////////////////////////////////////////////////////////////
    final boolean presorted = isPresorted(inHeader, SORT_ORDER, sanitizing);
    if (SAMPLE_ALIAS != null) overwriteSample(inHeader.getReadGroups(), SAMPLE_ALIAS);
    if (LIBRARY_NAME != null) overwriteLibrary(inHeader.getReadGroups(), LIBRARY_NAME);
    final SAMFileHeader singleOutHeader = createOutHeader(inHeader, SORT_ORDER, REMOVE_ALIGNMENT_INFORMATION);
    inHeader.getReadGroups().forEach(readGroup -> singleOutHeader.addReadGroup(readGroup));

    final Map<String, File> outputMap;
    final Map<String, SAMFileHeader> headerMap;
    if (OUTPUT_BY_READGROUP) {
        if (inHeader.getReadGroups().isEmpty()) {
            throw new PicardException(INPUT + " does not contain Read Groups");
        }

        final String defaultExtension;
        if (OUTPUT_BY_READGROUP_FILE_FORMAT == FileType.dynamic) {
            defaultExtension = getDefaultExtension(INPUT.toString());
        } else {
            defaultExtension = "." + OUTPUT_BY_READGROUP_FILE_FORMAT.toString();
        }

        outputMap = createOutputMap(OUTPUT_MAP, OUTPUT, defaultExtension, inHeader.getReadGroups());
        ValidationUtil.assertAllReadGroupsMapped(outputMap, inHeader.getReadGroups());
        headerMap = createHeaderMap(inHeader, SORT_ORDER, REMOVE_ALIGNMENT_INFORMATION);
    } else {
        outputMap = null;
        headerMap = null;
    }

    if (RESTORE_HARDCLIPS && !REMOVE_ALIGNMENT_INFORMATION) {
        throw new PicardException("Cannot revert sam file when RESTORE_HARDCLIPS is true and REMOVE_ALIGNMENT_INFORMATION is false.");
    }

    final SAMFileWriterFactory factory = new SAMFileWriterFactory();
    final RevertSamWriter out = new RevertSamWriter(OUTPUT_BY_READGROUP, headerMap, outputMap, singleOutHeader, OUTPUT, presorted, factory, REFERENCE_SEQUENCE);

    ////////////////////////////////////////////////////////////////////////////
    // Build a sorting collection to use if we are sanitizing
    ////////////////////////////////////////////////////////////////////////////
    final RevertSamSorter sorter;
    if (sanitizing)
        sorter = new RevertSamSorter(OUTPUT_BY_READGROUP, headerMap, singleOutHeader, MAX_RECORDS_IN_RAM);
    else sorter = null;

    final ProgressLogger progress = new ProgressLogger(log, 1000000, "Reverted");
    for (final SAMRecord rec : in) {
        // Weed out non-primary and supplemental read as we don't want duplicates in the reverted file!
        if (rec.isSecondaryOrSupplementary()) continue;

        // log the progress before you revert because otherwise the "last read position" might not be accurate
        progress.record(rec);

        // Actually do the reverting of the remaining records
        revertSamRecord(rec);

        if (sanitizing) sorter.add(rec);
        else out.addAlignment(rec);
    }
    CloserUtil.close(in);

    ////////////////////////////////////////////////////////////////////////////
    // Now if we're sanitizing, clean up the records and write them to the output
    ////////////////////////////////////////////////////////////////////////////
    if (!sanitizing) {
        out.close();
    } else {
        final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat;
        try {
            readGroupToFormat = createReadGroupFormatMap(inHeader, REFERENCE_SEQUENCE, VALIDATION_STRINGENCY, INPUT, RESTORE_ORIGINAL_QUALITIES);
        } catch (final PicardException e) {
            log.error(e.getMessage());
            return -1;
        }

        final long[] sanitizeResults = sanitize(readGroupToFormat, sorter, out);
        final long discarded = sanitizeResults[0];
        final long total = sanitizeResults[1];
        out.close();

        final double discardRate = discarded / (double) total;
        final NumberFormat fmt = new DecimalFormat("0.000%");
        log.info("Discarded " + discarded + " out of " + total + " (" + fmt.format(discardRate) + ") reads in order to sanitize output.");

        if (discardRate > MAX_DISCARD_FRACTION) {
            throw new PicardException("Discarded " + fmt.format(discardRate) + " which is above MAX_DISCARD_FRACTION of " + fmt.format(MAX_DISCARD_FRACTION));
        }
    }

    return 0;
}
 
Example 19
Source File: SortedSAMWriter.java    From abra2 with MIT License 3 votes vote down vote up
public static void main(String[] args) throws IOException {
//		Logger.LEVEL = Logger.Level.TRACE;
		String ref = "/home/lmose/dev/reference/hg38b/hg38.fa";
		
		CompareToReference2 c2r = new CompareToReference2();
		c2r.init(ref);
		
		ChromosomeChunker cc = new ChromosomeChunker(c2r);
		
		cc.init();
		
//		SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/sort_issue3/0.5.bam");
//		SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/sort_issue4/1.6.bam");
		SamReader reader = SAMRecordUtils.getSamReader("/home/lmose/dev/abra2_dev/mate_fix/0.87.bam");

		SAMFileHeader header = reader.getFileHeader();
		header.setSortOrder(SortOrder.coordinate);
		
		int maxRecordsInRam = 1000000;
		SAMRecord[] readsByNameArray = new SAMRecord[maxRecordsInRam];
		SAMRecord[] readsByCoordArray = new SAMRecord[maxRecordsInRam];
		
		SortedSAMWriter writer = new SortedSAMWriter(new String[] { "/home/lmose/dev/abra2_dev/mate_fix" }, "/home/lmose/dev/abra2_dev/mate_fix", new SAMFileHeader[] { reader.getFileHeader() }, true, cc,
				1,true,1000,false, false, false, maxRecordsInRam);

		SAMFileWriterFactory writerFactory = new SAMFileWriterFactory();
		SAMFileWriter out = writerFactory.makeBAMWriter(reader.getFileHeader(), true, new File("/home/lmose/dev/abra2_dev/mate_fix/test.bam"),1);
		
		long start = System.currentTimeMillis();
		
		writer.processChromosome(out, 0, "chr12", readsByNameArray, readsByCoordArray);
		
		out.close();
		
		long stop = System.currentTimeMillis();
		
		System.out.println("Elapsed msecs: " + (stop-start));
	}