htsjdk.samtools.SAMProgramRecord Java Examples

The following examples show how to use htsjdk.samtools.SAMProgramRecord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BAMDiff.java    From dataflow-java with Apache License 2.0 6 votes vote down vote up
private boolean compareProgramRecords(final SAMFileHeader h1, final SAMFileHeader h2) throws Exception {
  final List<SAMProgramRecord> l1 = h1.getProgramRecords();
  final List<SAMProgramRecord> l2 = h2.getProgramRecords();
  if (!compareValues(l1.size(), l2.size(), "Number of program records")) {
      return false;
  }
  boolean ret = true;
  for (SAMProgramRecord pr1 : l1) {
    for (SAMProgramRecord pr2 : l2) {
      if (pr1.getId().equals(pr2.getId())) {
        ret = compareProgramRecord(pr1, pr2) && ret;
      }
    }
  }

  return ret;
}
 
Example #2
Source File: BAMDiff.java    From dataflow-java with Apache License 2.0 6 votes vote down vote up
private boolean compareProgramRecord(final SAMProgramRecord programRecord1, final SAMProgramRecord programRecord2) throws Exception {
  if (programRecord1 == null && programRecord2 == null) {
      return true;
  }
  if (programRecord1 == null) {
      reportDifference("null", programRecord2.getProgramGroupId(), "Program Record");
      return false;
  }
  if (programRecord2 == null) {
      reportDifference(programRecord1.getProgramGroupId(), "null", "Program Record");
      return false;
  }
  boolean ret = compareValues(programRecord1.getProgramGroupId(), programRecord2.getProgramGroupId(),
          "Program Name");
  final String[] attributes = {"VN", "CL"};
  for (final String attribute : attributes) {
      ret = compareValues(programRecord1.getAttribute(attribute), programRecord2.getAttribute(attribute),
              attribute + " Program Record attribute") && ret;
  }
  return ret;
}
 
Example #3
Source File: HaplotypeBAMDestination.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a new HaplotypeBAMDestination
 *
 * @param sourceHeader SAMFileHeader used to seed the output SAMFileHeader for this destination.
 * @param haplotypeReadGroupID read group ID used when writing haplotypes as reads
 */
protected HaplotypeBAMDestination(SAMFileHeader sourceHeader, final String haplotypeReadGroupID) {
    Utils.nonNull(sourceHeader, "sourceHeader cannot be null");
    Utils.nonNull(haplotypeReadGroupID, "haplotypeReadGroupID cannot be null");
    this.haplotypeReadGroupID = haplotypeReadGroupID;

    bamOutputHeader = new SAMFileHeader();
    bamOutputHeader.setSequenceDictionary(sourceHeader.getSequenceDictionary());
    bamOutputHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);

    final List<SAMReadGroupRecord> readGroups = new ArrayList<>();
    readGroups.addAll(sourceHeader.getReadGroups()); // include the original read groups

    // plus an artificial read group for the haplotypes
    final SAMReadGroupRecord rgRec = new SAMReadGroupRecord(getHaplotypeReadGroupID());
    rgRec.setSample(haplotypeSampleTag);
    rgRec.setSequencingCenter("BI");
    readGroups.add(rgRec);
    bamOutputHeader.setReadGroups(readGroups);
    final List<SAMProgramRecord> programRecords = new ArrayList<>(sourceHeader.getProgramRecords());
    programRecords.add(new SAMProgramRecord("HaplotypeBAMWriter"));
    bamOutputHeader.setProgramRecords(programRecords);
}
 
Example #4
Source File: Merge.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static SAMFileHeader mergeHeaders(List<RecordSource> sources) {
	SAMFileHeader header = new SAMFileHeader();
	for (RecordSource source : sources) {
		SAMFileHeader h = source.reader.getFileHeader();

		for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) {
			if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null)
				header.addSequence(seq);
		}

		for (SAMProgramRecord pro : h.getProgramRecords()) {
			if (header.getProgramRecord(pro.getProgramGroupId()) == null)
				header.addProgramRecord(pro);
		}

		for (String comment : h.getComments())
			header.addComment(comment);

		for (SAMReadGroupRecord rg : h.getReadGroups()) {
			if (header.getReadGroup(rg.getReadGroupId()) == null)
				header.addReadGroup(rg);
		}
	}

	return header;
}
 
Example #5
Source File: SamUtils.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
/**
 * Creates a suitable program record from command line arguments and adds it to the header,
 * chaining to other program records as required.
 * @param header header to apply to
 */
public static void addProgramRecord(SAMFileHeader header) {
  final SAMProgramRecord pg = new SAMProgramRecord(Constants.APPLICATION_NAME);
  if (CommandLine.getCommandLine() != null) {
    pg.setCommandLine(CommandLine.getCommandLine());
  } else {
    pg.setCommandLine("Internal");
  }
  pg.setProgramVersion(Environment.getVersion());
  addProgramRecord(header, pg);
}
 
Example #6
Source File: AbstractMarkDuplicatesCommandLineProgram.java    From picard with MIT License 5 votes vote down vote up
/**
 * We have to re-chain the program groups based on this algorithm.  This returns the map from existing program group ID
 * to new program group ID.
 */
protected Map<String, String> getChainedPgIds(final SAMFileHeader outputHeader) {
    final Map<String, String> chainedPgIds;
    // Generate new PG record(s)
    if (PROGRAM_RECORD_ID != null) {
        final SAMFileHeader.PgIdGenerator pgIdGenerator = new SAMFileHeader.PgIdGenerator(outputHeader);
        if (PROGRAM_GROUP_VERSION == null) {
            PROGRAM_GROUP_VERSION = this.getVersion();
        }
        if (PROGRAM_GROUP_COMMAND_LINE == null) {
            PROGRAM_GROUP_COMMAND_LINE = this.getCommandLine();
        }
        chainedPgIds = new HashMap<>();
        for (final String existingId : this.pgIdsSeen) {
            final String newPgId = pgIdGenerator.getNonCollidingId(PROGRAM_RECORD_ID);
            chainedPgIds.put(existingId, newPgId);
            final SAMProgramRecord programRecord = new SAMProgramRecord(newPgId);
            programRecord.setProgramVersion(PROGRAM_GROUP_VERSION);
            programRecord.setCommandLine(PROGRAM_GROUP_COMMAND_LINE);
            programRecord.setProgramName(PROGRAM_GROUP_NAME);
            programRecord.setPreviousProgramGroupId(existingId);
            outputHeader.addProgramRecord(programRecord);
        }
    } else {
        chainedPgIds = null;
    }
    return chainedPgIds;
}
 
Example #7
Source File: GATKTool.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns the SAM header suitable for writing SAM/BAM/CRAM files produced by this tool.
 *
 * The default implementation calls {@link #getHeaderForReads} (and makes an empty header if that call returns null)
 * and optionally adds program tag to the header with a program version {@link #getVersion()}, program name {@link #getToolName()}
 * and command line {@link #getCommandLine()}.
 *
 * Subclasses may override.
 *
 * @return SAM header for the SAM writer with (optionally, if {@link #addOutputSAMProgramRecord} is true) program record appropriately.
 */
protected SAMFileHeader getHeaderForSAMWriter(){
    final SAMFileHeader header = getHeaderForReads() == null ? new SAMFileHeader(): getHeaderForReads();
    if (addOutputSAMProgramRecord) {
        final SAMProgramRecord programRecord = new SAMProgramRecord(createProgramGroupID(header));
        programRecord.setProgramVersion(getVersion());
        programRecord.setCommandLine(getCommandLine());
        programRecord.setProgramName(getToolName());
        header.addProgramRecord(programRecord);
    }
    return header;
}
 
Example #8
Source File: GATKTool.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Returns the program group ID that will be used in the SAM writer.
 * Starts with {@link #getToolName} and looks for the first available ID by appending consecutive integers.
 */
private String createProgramGroupID(final SAMFileHeader header) {
    final String toolName = getToolName();

    String pgID = toolName;
    SAMProgramRecord record = header.getProgramRecord(pgID);
    int count = 1;
    while (record != null){
        pgID = toolName + "." + String.valueOf(count++);
        record = header.getProgramRecord(pgID);
    }
    return pgID;
}
 
Example #9
Source File: SingleCellRnaSeqMetricsCollector.java    From Drop-seq with MIT License 4 votes vote down vote up
/**
    * Sets up the reads in cell barcode order.
    * Only adds reads that pass the map quality and are in the set of cell barcodes requested.
    *
    * I've tried adapting this to the TagOrderIterator API, but it seems like I need to add the read groups to the header of the temporary BAM that gets
    * iterated on or this doesn't work.
    */
   private CloseableIterator<SAMRecord> getReadsInTagOrder (final File bamFile, final String primaryTag,
                                                            final List<SAMReadGroupRecord> rg,
                                                            final List<String> allCellBarcodes, final int mapQuality) {

	SamReader reader = SamReaderFactory.makeDefault().open(bamFile);
	SAMSequenceDictionary dict= reader.getFileHeader().getSequenceDictionary();
	List<SAMProgramRecord> programs =reader.getFileHeader().getProgramRecords();

	final Set<String> cellBarcodeSet = new HashSet<> (allCellBarcodes);

	final SAMFileHeader writerHeader = new SAMFileHeader();
	// reader.getFileHeader().setReadGroups(rg);
	for (SAMReadGroupRecord z: rg) {
		reader.getFileHeader().addReadGroup(z);
		writerHeader.addReadGroup(z);
	}
       writerHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
       writerHeader.setSequenceDictionary(dict);
       for (SAMProgramRecord spr : programs)
		writerHeader.addProgramRecord(spr);

       // This not only filters, but sets the RG attribute on reads it allows through.
       final FilteredIterator<SAMRecord> rgAddingFilter = new FilteredIterator<SAMRecord>(reader.iterator()) {
           @Override
           public boolean filterOut(final SAMRecord r) {
               String cellBarcode = r.getStringAttribute(primaryTag);
               if (cellBarcodeSet.contains(cellBarcode) & r.getMappingQuality() >= mapQuality) {
                   r.setAttribute("RG", cellBarcode);
                   return false;
               } else
				return true;
           }
       };

       ProgressLogger p = new ProgressLogger(log, 1000000, "Preparing reads in core barcodes");
       CloseableIterator<SAMRecord> sortedIterator = SamRecordSortingIteratorFactory.create(writerHeader, rgAddingFilter, new StringTagComparator(primaryTag), p);

	log.info("Sorting finished.");
	return (sortedIterator);
}
 
Example #10
Source File: MergeBamAlignmentTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testMerger() throws Exception {
    final File output = File.createTempFile("mergeTest", ".sam");
    output.deleteOnExit();

    doMergeAlignment(unmappedBam, Collections.singletonList(alignedBam),
            null, null, null, null,
            false, true, false, 1,
            "0", "1.0", "align!", "myAligner",
            true, fasta, output,
            SamPairUtil.PairOrientation.FR, null, null, null, null, null);

    SamReader result = SamReaderFactory.makeDefault().open(output);
    Assert.assertEquals(result.getFileHeader().getSequenceDictionary().getSequences().size(), 8,
            "Number of sequences did not match");
    SAMProgramRecord pg = result.getFileHeader().getProgramRecords().get(0);
    Assert.assertEquals(pg.getProgramGroupId(), "0");
    Assert.assertEquals(pg.getProgramVersion(), "1.0");
    Assert.assertEquals(pg.getCommandLine(), "align!");
    Assert.assertEquals(pg.getProgramName(), "myAligner");

    final SAMReadGroupRecord rg = result.getFileHeader().getReadGroups().get(0);
    Assert.assertEquals(rg.getReadGroupId(), "0");
    Assert.assertEquals(rg.getSample(), "Hi,Mom!");

    Assert.assertEquals(result.getFileHeader().getSortOrder(), SAMFileHeader.SortOrder.coordinate);

    for (final SAMRecord sam : result) {
        // This tests that we clip both (a) when the adapter is marked in the unmapped BAM file and
        // (b) when the insert size is less than the read length
        if (sam.getReadName().equals("both_reads_align_clip_adapter") ||
                sam.getReadName().equals("both_reads_align_clip_marked")) {
            Assert.assertEquals(sam.getReferenceName(), "chr7");
            if (sam.getReadNegativeStrandFlag()) {
                Assert.assertEquals(sam.getCigarString(), "5S96M", "Incorrect CIGAR string for " +
                        sam.getReadName());
            } else {
                Assert.assertEquals(sam.getCigarString(), "96M5S", "Incorrect CIGAR string for " +
                        sam.getReadName());
            }
        }
        // This tests that we DON'T clip when we run off the end if there are equal to or more than
        // MIN_ADAPTER_BASES hanging off the end
        else if (sam.getReadName().equals("both_reads_align_min_adapter_bases_exceeded")) {
            Assert.assertEquals(sam.getReferenceName(), "chr7");
            Assert.assertTrue(sam.getCigarString().indexOf("S") == -1,
                    "Read was clipped when it should not be.");
        } else if (sam.getReadName().equals("neither_read_aligns_or_present")) {
            Assert.assertTrue(sam.getReadUnmappedFlag(), "Read should be unmapped but isn't");
        }
        // Two pairs in which only the first read should align
        else if (sam.getReadName().equals("both_reads_present_only_first_aligns") ||
                sam.getReadName().equals("read_2_too_many_gaps")) {
            if (sam.getFirstOfPairFlag()) {
                Assert.assertEquals(sam.getReferenceName(), "chr7", "Read should be mapped but isn't");
            } else {
                Assert.assertTrue(sam.getReadUnmappedFlag(), "Read should not be mapped but is");
            }
        } else {
            throw new Exception("Unexpected read name: " + sam.getReadName());
        }

        final boolean pos = !sam.getReadNegativeStrandFlag();
        Assert.assertEquals(sam.getAttribute("aa"), pos ? "Hello" : "olleH");
        Assert.assertEquals(sam.getAttribute("ab"), pos ? "ATTCGG" : "CCGAAT");
        Assert.assertEquals(sam.getAttribute("ac"), pos ? new byte[] {1,2,3} : new byte[] {3,2,1});
        Assert.assertEquals(sam.getAttribute("as"), pos ? new short[]{1,2,3} : new short[]{3,2,1});
        Assert.assertEquals(sam.getAttribute("ai"), pos ? new int[]  {1,2,3} : new int[]  {3,2,1});
        Assert.assertEquals(sam.getAttribute("af"), pos ? new float[]{1,2,3} : new float[]{3,2,1});
    }

    // Quick test to make sure the program record gets picked up from the file if not specified
    // on the command line.
    doMergeAlignment(unmappedBam, Collections.singletonList(alignedBam),
            null, null, null, null,
            false, true, false, 1,
            null, null, null, null,
            true, fasta, output,
            SamPairUtil.PairOrientation.FR, null, null, null, null, null);

    CloserUtil.close(result);

    result = SamReaderFactory.makeDefault().open(output);
    pg = result.getFileHeader().getProgramRecords().get(0);
    Assert.assertEquals(pg.getProgramGroupId(), "1",
            "Program group ID not picked up correctly from aligned BAM");
    Assert.assertEquals(pg.getProgramVersion(), "2.0",
            "Program version not picked up correctly from aligned BAM");
    Assert.assertNull(pg.getCommandLine(),
            "Program command line not picked up correctly from aligned BAM");
    Assert.assertEquals(pg.getProgramName(), "Hey!",
            "Program name not picked up correctly from aligned BAM");

    CloserUtil.close(result);
}
 
Example #11
Source File: MarkDuplicatesTest.java    From picard with MIT License 4 votes vote down vote up
/**
 * Test that PG header records are created & chained appropriately (or not created), and that the PG record chains
 * are as expected.  MarkDuplicates is used both to merge and to mark dupes in this case.
 * @param suppressPg If true, do not create PG header record.
 * @param expectedPnVnByReadName For each read, info about the expect chain of PG records.
 */
@Test(dataProvider = "pgRecordChainingTest")
public void pgRecordChainingTest(final boolean suppressPg,
                                 final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) {
    final File outputDir = IOUtil.createTempDir(TEST_BASE_NAME + ".", ".tmp");
    outputDir.deleteOnExit();
    try {
        // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
        // record creation according to suppressPg.
        final MarkDuplicates markDuplicates = new MarkDuplicates();
        final ArrayList<String> args = new ArrayList<>();
        for (int i = 1; i <= 3; ++i) {
            args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath());
        }
        final File outputSam = new File(outputDir, TEST_BASE_NAME + ".sam");
        args.add("OUTPUT=" + outputSam.getAbsolutePath());
        args.add("METRICS_FILE=" + new File(outputDir, TEST_BASE_NAME + ".duplicate_metrics").getAbsolutePath());
        args.add("ADD_PG_TAG_TO_READS=true");
        if (suppressPg) args.add("PROGRAM_RECORD_ID=null");

        // I generally prefer to call doWork rather than invoking the argument parser, but it is necessary
        // in this case to initialize the command line.
        // Note that for the unit test, version won't come through because it is obtained through jar
        // manifest, and unit test doesn't run code from a jar.
        Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0);

        // Read the MarkDuplicates output file, and get the PG ID for each read.  In this particular test,
        // the PG ID should be the same for both ends of a pair.
        final SamReader reader = SamReaderFactory.makeDefault().open(outputSam);

        final Map<String, String> pgIdForReadName = new HashMap<>();
        for (final SAMRecord rec : reader) {
            final String existingPgId = pgIdForReadName.get(rec.getReadName());
            final String thisPgId = rec.getStringAttribute(SAMTag.PG.name());
            if (existingPgId != null) {
                Assert.assertEquals(thisPgId, existingPgId);
            } else {
                pgIdForReadName.put(rec.getReadName(), thisPgId);
            }
        }
        final SAMFileHeader header = reader.getFileHeader();
        CloserUtil.close(reader);

        // Confirm that for each read name, the chain of PG records contains exactly the number that is expected,
        // and that values in the PG chain are as expected.
        for (final Map.Entry<String, List<ExpectedPnAndVn>> entry : expectedPnVnByReadName.entrySet()) {
            final String readName = entry.getKey();
            final List<ExpectedPnAndVn> expectedList = entry.getValue();
            String pgId = pgIdForReadName.get(readName);
            for (final ExpectedPnAndVn expected : expectedList) {
                final SAMProgramRecord programRecord = header.getProgramRecord(pgId);
                if (expected.expectedPn != null) Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn);
                if (expected.expectedVn != null) Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn);
                pgId = programRecord.getPreviousProgramGroupId();
            }
            Assert.assertNull(pgId);
        }

    } finally {
        IOUtil.recursiveDelete(outputDir.toPath());
    }
}
 
Example #12
Source File: FixBAMFileHeader.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public void addPG(SAMFileHeader header, String program, String cmd, String version) {
	SAMProgramRecord programRecord = header.createProgramRecord();
	programRecord.setCommandLine(cmd);
	programRecord.setProgramName(program);
	programRecord.setProgramVersion(version);
}