Java Code Examples for htsjdk.samtools.SAMFileHeader#addComment()

The following examples show how to use htsjdk.samtools.SAMFileHeader#addComment() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CollapseBarcodesInPlace.java    From Drop-seq with MIT License 6 votes vote down vote up
public void processOnlyPrimary () {
       final SamHeaderAndIterator inputs = openInputs();
	CloseableIterator<SAMRecord> inputSam = inputs.iterator;
	SAMFileHeader header = inputs.header;
	header.addComment("Edit distance collapsed tag " +  this.PRIMARY_BARCODE + " to new tag " + this.OUT_BARCODE+ " with edit distance "+ this.EDIT_DISTANCE);
       SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, this.OUTPUT);

	// gather up the barcodes that exist in the BAM
       final SamHeaderAndIterator inputs2 = openInputs();
	ObjectCounter<String> barcodes = new BamTagHistogram().getBamTagCounts(inputs2.iterator, this.PRIMARY_BARCODE,this.MINIMUM_MAPPING_QUALITY, this.FILTER_PCR_DUPLICATES);
       CloserUtil.close(inputs2.iterator);

	// filter barcodes by #reds in each barcode.
	barcodes=filterBarcodesByNumReads(barcodes, this.MIN_NUM_READS_NONCORE);

	// collapse them
	Map<String, String> childParentBarcodes=collapseBarcodes(this.MIN_NUM_READS_CORE, this.NUM_CORE_BARCODES, barcodes, this.FIND_INDELS, this.EDIT_DISTANCE);
	// iterate through the reads and retag with the proper reads.
	// log.info("STUFF");
	retagReads(inputSam, writer, childParentBarcodes, this.PRIMARY_BARCODE, this.OUT_BARCODE);
	// collapsed.size();

	CloserUtil.close(inputSam);
	writer.close();
}
 
Example 2
Source File: SdfStatistics.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
static void printSAMHeader(SequencesReader reader, final Appendable out, boolean specified) throws IOException {
  final ReferenceGenome rg = new ReferenceGenome(reader, ReferenceGenome.SEX_ALL, ReferenceGenome.ReferencePloidy.AUTO);
  final SAMFileHeader header = new SAMFileHeader();
  header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
  SamUtils.addProgramRecord(header);
  final int[] lengths = reader.sequenceLengths(0, reader.numberSequences());
  for (int i = 0; i < lengths.length; ++i) {
    final String name = reader.hasNames() ? reader.name(i) : ("sequence_" + i);
    final ReferenceSequence s = specified ? rg.sequence(name) : null;
    if (s == null || s.isSpecified()) {
      final SAMSequenceRecord record = new SAMSequenceRecord(name, lengths[i]);
      header.addSequence(record);
    }
  }
  if (reader.getSdfId().available()) {
    header.addComment(SamUtils.TEMPLATE_SDF_ATTRIBUTE + reader.getSdfId());
  }
  final ByteArrayOutputStream bo = new ByteArrayOutputStream();
  new SAMFileWriterFactory().makeSAMWriter(header, true, bo).close();
  out.append(bo.toString());
}
 
Example 3
Source File: SamUtilsTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void testRunCommentUpdate() {
  final SAMFileHeader sf = new SAMFileHeader();
  sf.addComment("READ-SDF-ID:" + Long.toHexString(123456));
  final UUID uuid = UUID.randomUUID();
  sf.addComment(SamUtils.RUN_ID_ATTRIBUTE + uuid);
  sf.addComment("TEMPLATE-SDF-ID:$$$$$$$");
  sf.addComment("BLAH:KLJERLWJ");

  SamUtils.updateRunId(sf);

  for (final String s : sf.getComments()) {
    if (s.startsWith(SamUtils.RUN_ID_ATTRIBUTE)) {
      assertFalse(s.contains(uuid.toString()));
    }
  }
}
 
Example 4
Source File: AddCommentsToBam.java    From picard with MIT License 6 votes vote down vote up
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    if (INPUT.getAbsolutePath().endsWith(".sam")) {
        throw new PicardException("SAM files are not supported");
    }

    final SAMFileHeader samFileHeader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(INPUT);
    for (final String comment : COMMENT) {
        if (comment.contains("\n")) {
            throw new PicardException("Comments can not contain a new line");
        }
        samFileHeader.addComment(comment);
    }

    BamFileIoUtils.reheaderBamFile(samFileHeader, INPUT, OUTPUT, CREATE_MD5_FILE, CREATE_INDEX);

    return 0;
}
 
Example 5
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
/** Creates a simple header with the values provided on the command line. */
public SAMFileHeader createSamFileHeader() {
    final SAMReadGroupRecord rgroup = new SAMReadGroupRecord(this.READ_GROUP_NAME);
    rgroup.setSample(this.SAMPLE_NAME);
    if (this.LIBRARY_NAME != null) rgroup.setLibrary(this.LIBRARY_NAME);
    if (this.PLATFORM != null) rgroup.setPlatform(this.PLATFORM);
    if (this.PLATFORM_UNIT != null) rgroup.setPlatformUnit(this.PLATFORM_UNIT);
    if (this.SEQUENCING_CENTER != null) rgroup.setSequencingCenter(SEQUENCING_CENTER);
    if (this.PREDICTED_INSERT_SIZE != null) rgroup.setPredictedMedianInsertSize(PREDICTED_INSERT_SIZE);
    if (this.DESCRIPTION != null) rgroup.setDescription(this.DESCRIPTION);
    if (this.RUN_DATE != null) rgroup.setRunDate(this.RUN_DATE);
    if (this.PLATFORM_MODEL != null) rgroup.setPlatformModel(this.PLATFORM_MODEL);
    if (this.PROGRAM_GROUP != null) rgroup.setProgramGroup(this.PROGRAM_GROUP);

    final SAMFileHeader header = new SAMFileHeader();
    header.addReadGroup(rgroup);

    for (final String comment : COMMENT) {
        header.addComment(comment);
    }

    header.setSortOrder(this.SORT_ORDER);
    return header ;
}
 
Example 6
Source File: Merge.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static SAMFileHeader mergeHeaders(List<RecordSource> sources) {
	SAMFileHeader header = new SAMFileHeader();
	for (RecordSource source : sources) {
		SAMFileHeader h = source.reader.getFileHeader();

		for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) {
			if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null)
				header.addSequence(seq);
		}

		for (SAMProgramRecord pro : h.getProgramRecords()) {
			if (header.getProgramRecord(pro.getProgramGroupId()) == null)
				header.addProgramRecord(pro);
		}

		for (String comment : h.getComments())
			header.addComment(comment);

		for (SAMReadGroupRecord rg : h.getReadGroups()) {
			if (header.getReadGroup(rg.getReadGroupId()) == null)
				header.addReadGroup(rg);
		}
	}

	return header;
}
 
Example 7
Source File: CollapseTagWithContext.java    From Drop-seq with MIT License 5 votes vote down vote up
private SAMFileWriter getWriter (final SamReader reader) {
	SAMFileHeader header = reader.getFileHeader();
	SamHeaderUtil.addPgRecord(header, this);
	String context = StringUtil.join(" ", this.CONTEXT_TAGS);
	header.addComment("Edit distance collapsed tag " +  this.COLLAPSE_TAG + " to new tag " + this.OUT_TAG+ " with edit distance "+ this.EDIT_DISTANCE + "using indels=" + this.FIND_INDELS + " in the context of tags [" + context + "]");
       SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, false, this.OUTPUT);
       return writer;
}
 
Example 8
Source File: SamUtilsTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public void testSamRunId() {
  final MemoryPrintStream mps = new MemoryPrintStream();
  Diagnostic.setLogStream(mps.printStream());
  final SAMFileHeader header = new SAMFileHeader();
  header.addComment(SamUtils.RUN_ID_ATTRIBUTE + "booyahhhhh");
  SamUtils.logRunId(header);
  assertTrue(mps.toString().contains("Referenced SAM file with RUN-ID: booyahhhhh"));
  mps.reset();
  assertEquals("", mps.toString().trim());
  SamUtils.logRunId(header);
  assertEquals("", mps.toString().trim());
}
 
Example 9
Source File: BAMTestUtil.java    From Hadoop-BAM with MIT License 5 votes vote down vote up
public static File writeBamFileWithLargeHeader() throws IOException {
  SAMRecordSetBuilder samRecordSetBuilder =
      new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname);
  for (int i = 0; i < 1000; i++) {
    int chr = 20;
    int start1 = (i + 1) * 1000;
    int start2 = start1 + 100;
    samRecordSetBuilder.addPair(String.format("test-read-%03d", i), chr, start1,
        start2);
  }

  final File bamFile = File.createTempFile("test", ".bam");
  bamFile.deleteOnExit();
  SAMFileHeader samHeader = samRecordSetBuilder.getHeader();
  StringBuffer sb = new StringBuffer();
  for (int i = 0; i < 1000000; i++) {
    sb.append("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
  }
  samHeader.addComment(sb.toString());
  final SAMFileWriter bamWriter = new SAMFileWriterFactory()
      .makeSAMOrBAMWriter(samHeader, true, bamFile);
  for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
    bamWriter.addAlignment(rec);
  }
  bamWriter.close();

  return bamFile;
}
 
Example 10
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 4 votes vote down vote up
/**
 * creates a header with the contents from the first file except the read group information is merged from all headers. Does some checking that headers are compatible
 *
 * @param reference The reference (required if any input files are CRAM)
 * @param files SAM files
 * @param ignoreHeaderIncompatibility true if should not care about incompatible header
 * @param expectedSamples if non-null, check that headers contain sample information that overlaps the supplied names   @return the combined header
 * @return the combined header
 * @throws IOException if an I/O error occurs
 */
public static SAMFileHeader getUberHeader(SequencesReader reference, Collection<File> files, boolean ignoreHeaderIncompatibility, String[] expectedSamples) throws IOException {
  if (files.size() == 0) {
    throw new IllegalArgumentException("File list is empty!");
  }
  final HashMap<String, SAMReadGroupRecord> readGroups = new HashMap<>();
  final HashMap<String, String> readGroupsSampleMap = new HashMap<>();
  SAMFileHeader first = null;
  File firstFile = null;
  final StringBuilder errorMessage = new StringBuilder();
  SdfId currentGuid = null;
  final HashSet<String> expectedSamplesSet = expectedSamples == null ? null : new HashSet<>(Arrays.asList(expectedSamples));
  boolean guidMismatch = false;
  for (final File file : files) {
    if (!file.isFile()) {
      errorMessage.append("Input file \"").append(file.getPath()).append("\" is not an ordinary file").append(StringUtils.LS);
      continue;
    }
    try (SamReader sfr = SamUtils.makeSamReader(file, reference)) {
      if (first == null) {
        first = sfr.getFileHeader();
        firstFile = file;
        currentGuid = getReferenceGuid(first);
      } else if (!checkHeaderDictionary(first, sfr.getFileHeader())) {
        Diagnostic.warning(WarningType.SAM_INCOMPATIBLE_HEADERS, firstFile.getPath(), file.getPath());
        if (!ignoreHeaderIncompatibility) {
          throw new NoTalkbackSlimException(ErrorType.SAM_INCOMPATIBLE_HEADER_ERROR, "1");
        }
      }
      final SdfId fileGuid = getReferenceGuid(sfr.getFileHeader());
      if (!currentGuid.check(fileGuid)) {
        guidMismatch = true;
      } else if (!currentGuid.available()) {
        currentGuid = fileGuid;
      }
      if (!sfr.getFileHeader().getReadGroups().isEmpty()) {
        for (final SAMReadGroupRecord r : sfr.getFileHeader().getReadGroups()) {
          final String sample = r.getSample();
          if (!readGroups.containsKey(r.getReadGroupId())) {
            readGroups.put(r.getReadGroupId(), r);
            readGroupsSampleMap.put(r.getReadGroupId(), sample);
          } else {
            //check that the sample isn't different for the same read group id
            if (sample != null && !r.getSample().equals(readGroupsSampleMap.get(r.getReadGroupId()))) {
              Diagnostic.warning(file.getPath() + " contained read group with ID \"" + r.getId() + "\" and sample \"" + sample + "\" but this read group has already been associated with sample \"" + readGroupsSampleMap.get(r.getReadGroupId()) + "\"");
              if (!ignoreHeaderIncompatibility) {
                throw new NoTalkbackSlimException(ErrorType.SAM_INCOMPATIBLE_HEADER_ERROR, "1");
              }
            }
          }
          if (expectedSamplesSet != null) {
            if (sample != null) {
              if (!expectedSamplesSet.contains(sample)) {
                errorMessage.append("Unexpected read group sample name: ").append(sample).append('.').append(StringUtils.LS);
              }
            } else {
              errorMessage.append("Input file \"").append(file.getPath()).append("\" contains a read group with no sample tag: ").append(r.getId()).append('.').append(StringUtils.LS);
            }
          }
        }
      } else if (expectedSamples != null) {
        errorMessage.append("Input file \"").append(file.getPath()).append("\" does not contain read group information.").append(StringUtils.LS);
      }
    }
  }
  if (guidMismatch) {
    Diagnostic.warning("Input SAM files contain mismatching template GUIDs");
  }
  if (errorMessage.length() > 0) {
    throw new NoTalkbackSlimException(errorMessage.toString().trim());
  }

  final SAMFileHeader header = first;
  if (readGroups.size() > 0) {
    final List<SAMReadGroupRecord> recList = new ArrayList<>(readGroups.values());
    header.setReadGroups(recList);
  }
  if (currentGuid.available() && !getReferenceGuid(header).available()) {
    header.addComment(TEMPLATE_SDF_ATTRIBUTE + currentGuid);
  }
  return header;
}
 
Example 11
Source File: MergeSamFiles.java    From picard with MIT License 4 votes vote down vote up
/** Combines multiple SAM/BAM files into one. */
@Override
protected int doWork() {
    boolean matchedSortOrders = true;
    
    // read interval list if it is defined
    final List<Interval> intervalList = (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals() );
    // map reader->iterator used if INTERVALS is defined
    final Map<SamReader, CloseableIterator<SAMRecord> > samReaderToIterator = new HashMap<>(INPUT.size());

    final List<Path> inputPaths = IOUtil.getPaths(INPUT);

    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<>();
    final List<SAMFileHeader> headers = new ArrayList<>();
    {
        SAMSequenceDictionary dict = null; // Used to try and reduce redundant SDs in memory

        for (final Path inFile : inputPaths) {
            IOUtil.assertFileIsReadable(inFile);
            final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
            if (INTERVALS != null) {
                if (!in.hasIndex()) {
                    throw new PicardException("Merging with interval but BAM file is not indexed: " + inFile);
                }
                final CloseableIterator<SAMRecord> samIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(in, intervalList, true);
                samReaderToIterator.put(in, samIterator);
            }

            readers.add(in);
            headers.add(in.getFileHeader());

            // A slightly hackish attempt to keep memory consumption down when merging multiple files with
            // large sequence dictionaries (10,000s of sequences). If the dictionaries are identical, then
            // replace the duplicate copies with a single dictionary to reduce the memory footprint. 
            if (dict == null) {
                dict = in.getFileHeader().getSequenceDictionary();
            } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
                in.getFileHeader().setSequenceDictionary(dict);
            }

            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
        }
    }

    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;

    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED || INTERVALS != null ) {
        log.info("Input files are in same order as output so sorting to temp directory is not needed.");
        headerMergerSortOrder = SORT_ORDER;
        mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
        presorted = true;
    } else {
        log.info("Sorting input files using temp directory " + TMP_DIR);
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        mergingSamRecordIteratorAssumeSorted = false;
        presorted = false;
    }
    final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator;
    // no interval defined, get an iterator for the whole bam
    if (intervalList == null) {
        iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    } else {
        // show warning related to https://github.com/broadinstitute/picard/pull/314/files
        log.info("Warning: merged bams from different interval lists may contain the same read in both files");
        iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true);
    }
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
        header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
        samFileWriterFactory.setUseAsyncIo(true);
    }
    final SAMFileWriter out = samFileWriterFactory.makeSAMOrBAMWriter(header, presorted, OUTPUT);

    // Lastly loop through and write out the records
    final ProgressLogger progress = new ProgressLogger(log, PROGRESS_INTERVAL);
    while (iterator.hasNext()) {
        final SAMRecord record = iterator.next();
        out.addAlignment(record);
        progress.record(record);
    }

    log.info("Finished reading inputs.");
    for(final CloseableIterator<SAMRecord> iter : samReaderToIterator.values())  CloserUtil.close(iter);
    CloserUtil.close(readers);
    out.close();
    return 0;
}