htsjdk.samtools.BAMRecordCodec Java Examples

The following examples show how to use htsjdk.samtools.BAMRecordCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SamRecordSortingIteratorFactory.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
   * @param progressLogger pass null if not interested in progress.
   * @return An iterator with all the records from underlyingIterator, in order defined by comparator.
   */
  public static CloseableIterator<SAMRecord> create(final SAMFileHeader header,
                                         final Iterator<SAMRecord> underlyingIterator,
                                         final Comparator<SAMRecord> comparator,
                                         final ProgressLogger progressLogger) {
      final SortingIteratorFactory.ProgressCallback<SAMRecord> progressCallback;
      if (progressLogger != null)
	progressCallback = new SortingIteratorFactory.ProgressCallback<SAMRecord>() {
              @Override
              public void logProgress(final SAMRecord record) {
                  progressLogger.record(record);
              }
          };
else
	progressCallback = null;
      return SortingIteratorFactory.create(SAMRecord.class,
              underlyingIterator, comparator, new BAMRecordCodec(header),
              SAMFileWriterImpl.getDefaultMaxRecordsInRam(),
              progressCallback);
  }
 
Example #2
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * If the number of records exceeds the number of records allowed in memory, spill to disk.
 * @param groupingIter
 * @param writer
 * @param outMetrics
 * @param header
 */
private void lowMemoryIteration (PeekableGroupingIterator<SAMRecord> groupingIter,									 
								 SAMFileWriter writer, PrintStream outMetrics, SAMFileHeader header) {
	log.info("Running (slower) memory efficient mode");				
       while (groupingIter.hasNext()) {
       	// for this group, get a SortingCollection.  Note that this is not used for sorting.  It is merely
		// an unsorted collection if there might be more objects than can fit in RAM.
       	SortingCollection<SAMRecord> sortingCollection = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(header), NO_OP_COMPARATOR, this.MAX_RECORDS_IN_RAM);

       	// you have to grab the next element, in case it's the first of the group but not the first group!
       	sortingCollection.add(groupingIter.next()); 
       	
       	// spool the reads for a whole group into the sorting collection to operate on - the code uses a multi-pass approach so we can't just iterate over the grouping iterator.
       	while (groupingIter.hasNextInGroup())         		
       		sortingCollection.add(groupingIter.next());
       	
       	// wrap up the sorting collection for adding records.
       	sortingCollection.doneAdding();
       	sortingCollection.setDestructiveIteration(false);
       	
       	processContext(sortingCollection, writer, false, outMetrics);        	
       }	
}
 
Example #3
Source File: RevertSam.java    From picard with MIT License 6 votes vote down vote up
RevertSamSorter(
        final boolean outputByReadGroup,
        final Map<String, SAMFileHeader> headerMap,
        final SAMFileHeader singleOutHeader,
        final int maxRecordsInRam) {

    this.outputByReadGroup = outputByReadGroup;
    if (outputByReadGroup) {
        for (final Map.Entry<String, SAMFileHeader> entry : headerMap.entrySet()) {
            final String readGroupId = entry.getKey();
            final SAMFileHeader outHeader = entry.getValue();
            final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader), new SAMRecordQueryNameComparator(), maxRecordsInRam);
            sorterMap.put(readGroupId, sorter);
        }
        singleSorter = null;
    } else {
        singleSorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(singleOutHeader), new SAMRecordQueryNameComparator(), maxRecordsInRam);
    }
}
 
Example #4
Source File: BAMSplitGuesser.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public BAMSplitGuesser(
		SeekableStream ss, InputStream headerStream, Configuration conf)
	throws IOException
{
	inFile = ss;

	header = SAMHeaderReader.readSAMHeaderFrom(headerStream, conf);
	referenceSequenceCount = header.getSequenceDictionary().size();

	bamCodec = new BAMRecordCodec(null, new LazyBAMRecordFactory());
}
 
Example #5
Source File: SAMRecordWritable.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
@Override public void write(DataOutput out) throws IOException {
	// In theory, it shouldn't matter whether we give a header to
	// BAMRecordCodec or not, since the representation of an alignment in BAM
	// doesn't depend on the header data at all. Only its interpretation
	// does, and a simple read/write codec shouldn't really have anything to
	// say about that. (But in practice, it already does matter for decode(),
	// which is why LazyBAMRecordFactory exists.)
	final BAMRecordCodec codec = new BAMRecordCodec(record.getHeader());
	codec.setOutputStream(new DataOutputWrapper(out));
	codec.encode(record);
}
 
Example #6
Source File: CramToBam_OBA_Function.java    From cramtools with Apache License 2.0 5 votes vote down vote up
CramToBam_OBA_Function(CramHeader header, ReferenceSource referenceSource) {
	this.header = header;
	parser = new ContainerParser(header.getSamFileHeader());
	f = new Cram2SamRecordFactory(header.getSamFileHeader());
	codec = new BAMRecordCodec(header.getSamFileHeader());
	n = new CramNormalizer(header.getSamFileHeader(), referenceSource);
	log.info("converter created");
}
 
Example #7
Source File: BAMRecordViewTest.java    From cramtools with Apache License 2.0 5 votes vote down vote up
private List<SAMRecord> toSAMRecord(BAMRecordView view, SAMFileHeader samHeader) {
	BAMRecordCodec bc = new BAMRecordCodec(samHeader);
	bc.setInputStream(new ByteArrayInputStream(view.buf, 0, view.start));
	List<SAMRecord> records = new ArrayList<SAMRecord>();
	SAMRecord record;
	while ((record = bc.decode()) != null) {
		records.add(record);
	}
	return records;
}
 
Example #8
Source File: BAMRecordViewTest.java    From cramtools with Apache License 2.0 5 votes vote down vote up
@Test
public void test1() {
	BAMRecordView view = new BAMRecordView(new byte[1024]);
	view.setReadName("readName");
	view.setFlags(4);
	view.setRefID(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
	view.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
	view.setMappingScore(SAMRecord.NO_MAPPING_QUALITY);
	view.setCigar(new Cigar());
	view.setMateRefID(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
	view.setMateAlStart(SAMRecord.NO_ALIGNMENT_START);
	view.setInsertSize(0);
	view.setBases("A".getBytes());
	view.setQualityScores(new byte[] { 0 });
	view.addTag(SAMTagUtil.getSingleton().AM, new byte[] { 'c', 0 }, 0, 1);
	view.finish();

	SAMFileHeader samHeader = new SAMFileHeader();

	BAMRecordCodec bc = new BAMRecordCodec(samHeader);
	bc.setInputStream(new ByteArrayInputStream(view.buf));
	SAMRecord record = bc.decode();
	assertThat(record.getReadName(), is("readName"));
	assertThat(record.getFlags(), is(4));
	assertThat(record.getReferenceIndex(), is(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
	assertThat(record.getAlignmentStart(), is(SAMRecord.NO_ALIGNMENT_START));
	assertThat(record.getMappingQuality(), is(SAMRecord.NO_MAPPING_QUALITY));
	assertThat(record.getCigar().getCigarElements().size(), is(0));
	assertThat(record.getMateReferenceIndex(), is(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
	assertThat(record.getMateAlignmentStart(), is(SAMRecord.NO_ALIGNMENT_START));
	assertThat(record.getInferredInsertSize(), is(0));
	assertThat(record.getReadString(), is("A"));
	assertThat(record.getBaseQualityString(), is("!"));

	Object amTag = record.getAttribute("AM");
	assertTrue(amTag instanceof Byte);
	Byte amValue = (Byte) amTag;
	assertThat(amValue, equalTo((byte) 0));
}
 
Example #9
Source File: ComputeUMISharing.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    // Make sure this is modifiable
    EDIT_DISTANCE = new ArrayList<>(EDIT_DISTANCE);
    while (EDIT_DISTANCE.size() < COUNT_TAG.size()) {
        EDIT_DISTANCE.add(0);
    }
    parentEditDistanceMatcher = new ParentEditDistanceMatcher(this.COUNT_TAG, this.EDIT_DISTANCE, this.FIND_INDELS, this.NUM_THREADS);

    SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
    final ProgressLogger progressLogger = new ProgressLogger(log, 1000000, "Sorting");
    Iterator<SAMRecord> iter = reader.iterator();
    if (LOCUS_FUNCTION_LIST.size() > 0) {
        iter = new GeneFunctionIteratorWrapper(iter, this.GENE_NAME_TAG,
                this.GENE_STRAND_TAG, this.GENE_FUNCTION_TAG, false, this.STRAND_STRATEGY,
                this.LOCUS_FUNCTION_LIST);
    }

    CloseableIterator<SAMRecord> sortedIter = SortingIteratorFactory.create(SAMRecord.class, iter,
            PARENT_CHILD_COMPARATOR, new BAMRecordCodec(reader.getFileHeader()), MAX_RECORDS_IN_RAM,
            (SortingIteratorFactory.ProgressCallback<SAMRecord>) progressLogger::record);

    PeekableIterator<List<SAMRecord>> subgroupIterator =
            new PeekableIterator<List<SAMRecord>>(new GroupingIterator<SAMRecord>(
                    new ProgressLoggingIterator(sortedIter, new ProgressLogger(log, 1000000, "Grouping")),
                    GROUPING_COMPARATOR));

    MetricsFile<UmiSharingMetrics, Integer> outFile = getMetricsFile();
    List<SAMRecord> parentSubgroup = null;
    Set<TagValues> parentTuples = new HashSet<>();

    while (subgroupIterator.hasNext()) {
        if (parentSubgroup == null ||
        !parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).equals(subgroupIterator.peek().get(0).getAttribute(COLLAPSE_TAG))) {
            parentSubgroup = subgroupIterator.next();
            parentTuples = parentEditDistanceMatcher.getValues(parentSubgroup);                
        } else {                
            final List<SAMRecord> childSubgroup = subgroupIterator.next();
            final Set<TagValues> childTuples = parentEditDistanceMatcher.getValues(childSubgroup);                
            final UmiSharingMetrics metrics = new UmiSharingMetrics();
            metrics.PARENT = parentSubgroup.get(0).getAttribute(COLLAPSE_TAG).toString();
            metrics.CHILD = childSubgroup.get(0).getAttribute(UNCOLLAPSED_TAG).toString();
            metrics.NUM_PARENT = parentTuples.size();
            metrics.NUM_CHILD = childTuples.size();
            metrics.NUM_SHARED = parentEditDistanceMatcher.computeNumShared(parentTuples, childTuples);
            metrics.FRAC_SHARED = metrics.NUM_SHARED/(double)metrics.NUM_CHILD;
            outFile.addMetric(metrics);
        }
    }
    BufferedWriter w = IOUtil.openFileForBufferedWriting(OUTPUT);
    outFile.write(w);
    try {
        w.close();
    } catch (IOException e) {
        throw new RuntimeIOException("Problem writing " + OUTPUT.getAbsolutePath(), e);
    }
    CloserUtil.close(reader);
    return 0;
}
 
Example #10
Source File: SortingSAMRecordCollection.java    From abra2 with MIT License 4 votes vote down vote up
private SortingSAMRecordCollection(SAMRecord[] recordArray, SAMFileHeader header, java.util.Comparator<SAMRecord> comparator, int maxRecordsInRAM, String tempDir) {
	reads = SortingCollection2.newInstance(recordArray, SAMRecord.class, new BAMRecordCodec(header), comparator, maxRecordsInRAM, new File(tempDir));
}