Java Code Examples for htsjdk.samtools.SAMFileHeader#addReadGroup()

The following examples show how to use htsjdk.samtools.SAMFileHeader#addReadGroup() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DefaultSamFilterTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
public void testFilterReadGroup() {
  final SamFilterParams.SamFilterParamsBuilder builder = SamFilterParams.builder().selectReadGroups(Collections.singleton("rg1"));
  final DefaultSamFilter f = new DefaultSamFilter(builder.create());
  final DefaultSamFilter notf = new DefaultSamFilter(builder.invertFilters(true).create());
  final SAMFileHeader h = new SAMFileHeader();
  final SAMReadGroupRecord r1 = new SAMReadGroupRecord("rg1");
  final SAMReadGroupRecord r2 = new SAMReadGroupRecord("rg2");
  h.addReadGroup(r1);
  h.addReadGroup(r2);
  final SAMRecord rec = new SAMRecord(h); // Not unmapped but alignment position == 0
  assertFalse(f.acceptRecord(rec));
  assertTrue(notf.acceptRecord(rec));
  rec.setAttribute(ReadGroupUtils.RG_ATTRIBUTE, r1.getReadGroupId());
  assertTrue(f.acceptRecord(rec));
  assertFalse(notf.acceptRecord(rec));
  rec.setAttribute(ReadGroupUtils.RG_ATTRIBUTE, r2.getReadGroupId());
  assertFalse(f.acceptRecord(rec));
  assertTrue(notf.acceptRecord(rec));
}
 
Example 2
Source File: Merge.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static SAMFileHeader mergeHeaders(List<RecordSource> sources) {
	SAMFileHeader header = new SAMFileHeader();
	for (RecordSource source : sources) {
		SAMFileHeader h = source.reader.getFileHeader();

		for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) {
			if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null)
				header.addSequence(seq);
		}

		for (SAMProgramRecord pro : h.getProgramRecords()) {
			if (header.getProgramRecord(pro.getProgramGroupId()) == null)
				header.addProgramRecord(pro);
		}

		for (String comment : h.getComments())
			header.addComment(comment);

		for (SAMReadGroupRecord rg : h.getReadGroups()) {
			if (header.getReadGroup(rg.getReadGroupId()) == null)
				header.addReadGroup(rg);
		}
	}

	return header;
}
 
Example 3
Source File: FragmentUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private GATKRead makeOverlappingRead(final String leftFlank, final int leftQual, final String overlapBases,
                                          final byte[] overlapQuals, final String rightFlank, final int rightQual,
                                          final int alignmentStart) {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
    header.addReadGroup(new SAMReadGroupRecord("RG1"));
    final String bases = leftFlank + overlapBases + rightFlank;
    final int readLength = bases.length();
    final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, alignmentStart, readLength);
    final byte[] leftQuals = Utils.dupBytes((byte) leftQual, leftFlank.length());
    final byte[] rightQuals = Utils.dupBytes((byte) rightQual, rightFlank.length());
    final byte[] quals = Utils.concat(leftQuals, overlapQuals, rightQuals);
    read.setCigar(readLength + "M");
    read.setBases(bases.getBytes());
    read.setBaseQualities(quals);
    read.setReadGroup("RG1");
    read.setMappingQuality(60);
    return read;
}
 
Example 4
Source File: AlignmentContextUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions = UserException.ReadMissingReadGroup.class)
public void testNoSample() throws Exception {
    final SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");

    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);

    final Locatable loc = new SimpleInterval("chr1", 1, 1);
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header,"read1",0,1,10);
    read1.setReadGroup(readGroupOne.getId());

    final ReadPileup pileup = new ReadPileup(loc, Arrays.asList(read1), 1);
    final AlignmentContext ac = new AlignmentContext(loc, pileup);
    ac.splitContextBySampleName(header);

}
 
Example 5
Source File: FastqToSam.java    From picard with MIT License 6 votes vote down vote up
/** Creates a simple header with the values provided on the command line. */
public SAMFileHeader createSamFileHeader() {
    final SAMReadGroupRecord rgroup = new SAMReadGroupRecord(this.READ_GROUP_NAME);
    rgroup.setSample(this.SAMPLE_NAME);
    if (this.LIBRARY_NAME != null) rgroup.setLibrary(this.LIBRARY_NAME);
    if (this.PLATFORM != null) rgroup.setPlatform(this.PLATFORM);
    if (this.PLATFORM_UNIT != null) rgroup.setPlatformUnit(this.PLATFORM_UNIT);
    if (this.SEQUENCING_CENTER != null) rgroup.setSequencingCenter(SEQUENCING_CENTER);
    if (this.PREDICTED_INSERT_SIZE != null) rgroup.setPredictedMedianInsertSize(PREDICTED_INSERT_SIZE);
    if (this.DESCRIPTION != null) rgroup.setDescription(this.DESCRIPTION);
    if (this.RUN_DATE != null) rgroup.setRunDate(this.RUN_DATE);
    if (this.PLATFORM_MODEL != null) rgroup.setPlatformModel(this.PLATFORM_MODEL);
    if (this.PROGRAM_GROUP != null) rgroup.setProgramGroup(this.PROGRAM_GROUP);

    final SAMFileHeader header = new SAMFileHeader();
    header.addReadGroup(rgroup);

    for (final String comment : COMMENT) {
        header.addComment(comment);
    }

    header.setSortOrder(this.SORT_ORDER);
    return header ;
}
 
Example 6
Source File: CollectGcBiasMetricsTest.java    From picard with MIT License 5 votes vote down vote up
public void setupTest1(final int ID, final String readGroupId, final SAMReadGroupRecord readGroupRecord, final String sample,
                  final String library, final SAMFileHeader header, final SAMRecordSetBuilder setBuilder)
        throws IOException {

    final String separator = ":";
    final int contig1 = 0;
    final int contig2 = 1;
    readGroupRecord.setSample(sample);
    readGroupRecord.setPlatform(platform);
    readGroupRecord.setLibrary(library);
    readGroupRecord.setPlatformUnit(readGroupId);
    header.addReadGroup(readGroupRecord);
    setBuilder.setReadGroup(readGroupRecord);
    setBuilder.setUseNmFlag(true);

    setBuilder.setHeader(header);

    final int max = 800;
    final int min = 1;
    final Random rg = new Random(5);

    //add records that align to chrM and O but not N
    for (int i = 0; i < NUM_READS; i++) {
        final int start = rg.nextInt(max) + min;
        final String newReadName = READ_NAME + separator + ID + separator + i;

        if (i != NUM_READS - 1) {
            setBuilder.addPair(newReadName, contig1, start + ID, start + ID + LENGTH);
        } else {
            setBuilder.addPair(newReadName, contig2, start + ID, start + ID + LENGTH);
        }
    }
}
 
Example 7
Source File: CollectMultipleMetricsTest.java    From picard with MIT License 5 votes vote down vote up
void setup(final int numReads,
           final String readName,
           final int ID,
           final String readGroupId,
           final SAMReadGroupRecord readGroupRecord,
           final String sample,
           final String library,
           final SAMFileHeader header,
           final SAMRecordSetBuilder setBuilder) throws IOException {
    final String separator = ":";
    readGroupRecord.setSample(sample);
    readGroupRecord.setPlatform(platform);
    readGroupRecord.setLibrary(library);
    readGroupRecord.setPlatformUnit(readGroupId);
    header.addReadGroup(readGroupRecord);
    setBuilder.setReadGroup(readGroupRecord);
    setBuilder.setUseNmFlag(true);
    setBuilder.setHeader(header);

    final int max = 15000;
    final int min = 1;
    final Random rg = new Random(5);

    for (int i = 0; i < numReads; i++) {
        final int start = rg.nextInt(max) + min;
        final String newReadName = readName + separator + ID + separator + i;
        setBuilder.addPair(newReadName, 0, start + ID, start + ID + 99);
    }
}
 
Example 8
Source File: SimpleSampleMetadata.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public SAMFileHeader toHeader() {
    final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(MetadataUtils.GATK_CNV_READ_GROUP_ID);
    readGroupRecord.setAttribute(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG, sampleName);
    final SAMFileHeader header = new SAMFileHeader();
    header.addReadGroup(readGroupRecord);
    return header;
}
 
Example 9
Source File: RevertSam.java    From picard with MIT License 5 votes vote down vote up
private Map<String, SAMFileHeader> createHeaderMap(
        final SAMFileHeader inHeader,
        final SortOrder sortOrder,
        final boolean removeAlignmentInformation) {

    final Map<String, SAMFileHeader> headerMap = new HashMap<>();
    for (final SAMReadGroupRecord readGroup : inHeader.getReadGroups()) {
        final SAMFileHeader header = createOutHeader(inHeader, sortOrder, removeAlignmentInformation);
        header.addReadGroup(readGroup);
        headerMap.put(readGroup.getId(), header);
    }
    return headerMap;
}
 
Example 10
Source File: LocusIteratorByStateUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads")
public void testLIBS_NotHoldingTooManyReads(final int nReadsPerLocus, final int downsampleTo, final int payloadInBytes) {
    logger.warn(String.format("testLIBS_NotHoldingTooManyReads %d %d %d", nReadsPerLocus, downsampleTo, payloadInBytes));
    final int readLength = 10;

    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 100000);
    final int nSamples = 1;
    final List<String> samples = new ArrayList<>(nSamples);
    for ( int i = 0; i < nSamples; i++ ) {
        final SAMReadGroupRecord rg = new SAMReadGroupRecord("rg" + i);
        final String sample = "sample" + i;
        samples.add(sample);
        rg.setSample(sample);
        rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform());
        header.addReadGroup(rg);
    }

    final boolean downsample = downsampleTo != -1;
    final DownsamplingMethod downsampler = downsample
            ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null)
            : new DownsamplingMethod(DownsampleType.NONE, null, null);

    final WeakReadTrackingIterator iterator = new WeakReadTrackingIterator(nReadsPerLocus, readLength, payloadInBytes, header);

    final LocusIteratorByState li;
    li = new LocusIteratorByState(
            iterator,
            downsampler,
            false,
            samples,
            header,
            true
    );

    while ( li.hasNext() ) {
        final AlignmentContext next = li.next();
        Assert.assertTrue(next.getBasePileup().size() <= downsampleTo, "Too many elements in pileup " + next);
        // TODO -- assert that there are <= X reads in memory after GC for some X
    }
}
 
Example 11
Source File: NGSPlatformUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * A unit test that creates an artificial read for testing some code that uses reads
 */
@Test(dataProvider = "TestMappings")
public void testPLFromReadWithRG(final String plField, final NGSPlatform expected) {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(sequenceDictionary);
    final String rgID = "ID";
    final SAMReadGroupRecord rg = new SAMReadGroupRecord(rgID);
    if ( plField != null )
        rg.setPlatform(plField);
    header.addReadGroup(rg);
    final GATKRead read = ArtificialReadUtils.createArtificialRead(header, "myRead", 0, 1, 10);
    read.setAttribute("RG", rgID);
    Assert.assertEquals(NGSPlatform.fromRead(read, header), expected);
}
 
Example 12
Source File: XGBoostEvidenceFilterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static SAMFileHeader initSAMFileHeader() {
    final SAMFileHeader samHeader = createArtificialSamHeader();
    SAMReadGroupRecord readGroup = new SAMReadGroupRecord(readGroupName);
    readGroup.setSample(DEFAULT_SAMPLE_NAME);
    samHeader.addReadGroup(readGroup);
    return samHeader;
}
 
Example 13
Source File: AlignmentContextUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void test1Sample2Readgroups() throws Exception {
    final SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    readGroupOne.setSample("sample1");
    final SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
    readGroupTwo.setSample("sample1");

    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    final Locatable loc = new SimpleInterval("chr1", 1, 1);
    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header,"read1",0,1,10);
    read1.setReadGroup(readGroupOne.getId());
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header,"read2",0,1,10);
    read2.setReadGroup(readGroupTwo.getId());
    final GATKRead read3 = ArtificialReadUtils.createArtificialRead(header,"read3",0,1,10);
    read3.setReadGroup(readGroupOne.getId());
    final GATKRead read4 = ArtificialReadUtils.createArtificialRead(header,"read4",0,1,10);
    read4.setReadGroup(readGroupTwo.getId());
    final GATKRead read5 = ArtificialReadUtils.createArtificialRead(header,"read5",0,1,10);
    read5.setReadGroup(readGroupTwo.getId());
    final GATKRead read6 = ArtificialReadUtils.createArtificialRead(header,"read6",0,1,10);
    read6.setReadGroup(readGroupOne.getId());
    final GATKRead read7 = ArtificialReadUtils.createArtificialRead(header,"read7",0,1,10);
    read7.setReadGroup(readGroupOne.getId());

    final ReadPileup pileup = new ReadPileup(loc, Arrays.asList(read1, read2, read3, read4, read5, read6, read7), 1);

    final AlignmentContext ac = new AlignmentContext(loc, pileup);
    Assert.assertSame(ac.getBasePileup(), pileup);
    Assert.assertEquals(ac.getContig(), loc.getContig());
    Assert.assertEquals(ac.getEnd(), loc.getEnd());
    Assert.assertEquals(ac.getLocation(), loc);
    Assert.assertEquals(ac.getPosition(), loc.getStart());
    Assert.assertEquals(ac.getStart(), loc.getStart());
    Assert.assertEquals(ac.hasPileupBeenDownsampled(), false);
    Assert.assertEquals(ac.size(), pileup.size());

    Assert.assertSame(ac.stratify(AlignmentContext.ReadOrientation.COMPLETE), ac, "Complete");

    final AlignmentContext acFWD = ac.stratify(AlignmentContext.ReadOrientation.FORWARD);
    Assert.assertEquals(acFWD.getLocation(), loc, "Forward Loc");
    Assert.assertEquals((Iterable<?>) acFWD.getBasePileup(), (Iterable<?>)pileup, "Forward Pileup");

    final AlignmentContext acREV = ac.stratify(AlignmentContext.ReadOrientation.REVERSE);
    AlignmentContext emptyAC= new AlignmentContext(loc, new ReadPileup(loc));
    Assert.assertEquals(acREV.getLocation(), loc, "Reverse Loc");
    Assert.assertEquals((Iterable<?>)acREV.getBasePileup(), (Iterable<?>)emptyAC.getBasePileup(), "Reverse pileup");
    Assert.assertNotNull(ac.toString());

    final Map<String, AlignmentContext> bySample = ac.splitContextBySampleName(header);
    Assert.assertEquals(bySample.size(), 1);
    Assert.assertEquals(bySample.keySet(), ReadUtils.getSamplesFromHeader(header));
    final AlignmentContext firstAC = bySample.values().iterator().next();
    Assert.assertEquals(firstAC.getLocation(), ac.getLocation());
    Assert.assertEquals(firstAC.getBasePileup(), ac.getBasePileup());

    final Map<String, AlignmentContext> bySampleAssume1 = ac.splitContextBySampleName("sample1", header);
    Assert.assertEquals(bySampleAssume1.size(), 1);
    Assert.assertEquals(bySampleAssume1.keySet(), ReadUtils.getSamplesFromHeader(header));
    final AlignmentContext firstACAssume1 = bySampleAssume1.values().iterator().next();
    Assert.assertEquals(firstACAssume1.getLocation(), ac.getLocation());
    Assert.assertEquals(firstACAssume1.getBasePileup(), ac.getBasePileup());

    final Map<String, AlignmentContext> stringAlignmentContextMap = AlignmentContext.splitContextBySampleName(pileup, header);
    Assert.assertEquals(stringAlignmentContextMap.keySet(), Collections.singleton("sample1"));
    Assert.assertEquals(stringAlignmentContextMap.get("sample1").getLocation(), loc);
    Assert.assertEquals(stringAlignmentContextMap.get("sample1").getBasePileup(), pileup);
}
 
Example 14
Source File: OrientationTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 4 votes vote down vote up
SAMRecord orientRec(int flag) {
  final SAMFileHeader sfh = new SAMFileHeader();
  sfh.addReadGroup(new SAMReadGroupRecord("RG1"));
  return  makeTestRecord(sfh, flag, "s1", "s2", 10, 85, "RG1");
}
 
Example 15
Source File: ReadPileupUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Ensure that basic read group splitting works.
 */
@Test
public void testSplitByReadGroup() {
    final SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
    final SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");

    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    header.addReadGroup(readGroupOne);
    header.addReadGroup(readGroupTwo);

    final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header,"read1",0,1,10);
    read1.setReadGroup(readGroupOne.getId());
    final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header,"read2",0,1,10);
    read2.setReadGroup(readGroupTwo.getId());
    final GATKRead read3 = ArtificialReadUtils.createArtificialRead(header,"read3",0,1,10);
    read3.setReadGroup(readGroupOne.getId());
    final GATKRead read4 = ArtificialReadUtils.createArtificialRead(header,"read4",0,1,10);
    read4.setReadGroup(readGroupTwo.getId());
    final GATKRead read5 = ArtificialReadUtils.createArtificialRead(header,"read5",0,1,10);
    read5.setReadGroup(readGroupTwo.getId());
    final GATKRead read6 = ArtificialReadUtils.createArtificialRead(header,"read6",0,1,10);
    read6.setReadGroup(readGroupOne.getId());
    final GATKRead read7 = ArtificialReadUtils.createArtificialRead(header,"read7",0,1,10);
    read7.setReadGroup(readGroupOne.getId());

    final ReadPileup pileup = new ReadPileup(loc, Arrays.asList(read1, read2, read3, read4, read5, read6, read7), 1);

    final ReadPileup rg1Pileup = pileup.makeFilteredPileup(pe -> "rg1".equals(pe.getRead().getReadGroup()));
    final List<GATKRead> rg1Reads = rg1Pileup.getReads();
    Assert.assertEquals(rg1Reads.size(), 4, "Wrong number of reads in read group rg1");
    Assert.assertEquals(rg1Reads.get(0), read1, "Read " + read1.getName() + " should be in rg1 but isn't");
    Assert.assertEquals(rg1Reads.get(1), read3, "Read " + read3.getName() + " should be in rg1 but isn't");
    Assert.assertEquals(rg1Reads.get(2), read6, "Read " + read6.getName() + " should be in rg1 but isn't");
    Assert.assertEquals(rg1Reads.get(3), read7, "Read " + read7.getName() + " should be in rg1 but isn't");

    final ReadPileup rg2Pileup = pileup.makeFilteredPileup(pe -> "rg2".equals(pe.getRead().getReadGroup()));
    final List<GATKRead> rg2Reads = rg2Pileup.getReads();
    Assert.assertEquals(rg2Reads.size(), 3, "Wrong number of reads in read group rg2");
    Assert.assertEquals(rg2Reads.get(0), read2, "Read " + read2.getName() + " should be in rg2 but isn't");
    Assert.assertEquals(rg2Reads.get(1), read4, "Read " + read4.getName() + " should be in rg2 but isn't");
    Assert.assertEquals(rg2Reads.get(2), read5, "Read " + read5.getName() + " should be in rg2 but isn't");
}
 
Example 16
Source File: ReadPileupUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void testGetPileupForSample() {
    // read groups and header
    final SAMReadGroupRecord[] readGroups = new SAMReadGroupRecord[5];
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
    int s = 0;
    // readGroups[4] is left null intentionally
    for(final String sample: Arrays.asList("sample1", "sample1", "sample2", null)) {
        readGroups[s] = new SAMReadGroupRecord("rg"+s);
        readGroups[s].setSample(sample);
        header.addReadGroup(readGroups[s++]);
    }

    // reads
    final int rgCoverage = 4;
    final List<GATKRead> reads = new ArrayList<>(rgCoverage*readGroups.length);
    for(int i = 0; i < rgCoverage; i++) {
        for(final SAMReadGroupRecord rg: readGroups) {
            final GATKRead r = ArtificialReadUtils.createArtificialRead(header, (rg == null) ? "null" : rg.getReadGroupId() + "_" + rg.getSample() + "_" + i, 0, 1, 10);
            if(rg != null) {
                r.setReadGroup(rg.getId());
            }
            reads.add(r);
        }
    }

    // pileup
    final ReadPileup pileup = new ReadPileup(loc, reads, 1);
    // sample1
    final ReadPileup pileupSample1 = pileup.getPileupForSample("sample1", header);
    Assert.assertEquals(pileupSample1.size(), rgCoverage*2, "Wrong number of elements for sample1");
    Assert.assertTrue( pileupSample1.getReadGroupIDs().contains("rg0"), "Pileup for sample1 should contain rg0");
    Assert.assertTrue( pileupSample1.getReadGroupIDs().contains("rg1"), "Pileup for sample1 should contain rg1");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg2"), "Pileup for sample1 shouldn't contain rg2");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains("rg3"), "Pileup for sample1 shouldn't contain rg3");
    Assert.assertFalse(pileupSample1.getReadGroupIDs().contains(null),  "Pileup for sample1 shouldn't contain null read group");

    // sample2
    final ReadPileup pileupSample2 = pileup.getPileupForSample("sample2", header);
    Assert.assertEquals(pileupSample2.size(), rgCoverage, "Wrong number of elements for sample2");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg0"), "Pileup for sample2 shouldn't contain rg0");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg1"), "Pileup for sample2 shouldn't contain rg1");
    Assert.assertTrue( pileupSample2.getReadGroupIDs().contains("rg2"), "Pileup for sample2 should contain rg2");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains("rg3"), "Pileup for sample2 shouldn't contain rg3");
    Assert.assertFalse(pileupSample2.getReadGroupIDs().contains(null),  "Pileup for sample2 shouldn't contain null read group");

    // null sample
    final ReadPileup pileupNull = pileup.getPileupForSample(null, header);
    Assert.assertEquals(pileupNull.size(), rgCoverage*2, "Wrong number of elements for null sample");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg0"), "Pileup for null sample shouldn't contain rg0");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg1"), "Pileup for null sample shouldn't contain rg1");
    Assert.assertFalse(pileupNull.getReadGroupIDs().contains("rg2"), "Pileup for null sample shouldn't contain rg2");
    Assert.assertTrue( pileupNull.getReadGroupIDs().contains("rg3"), "Pileup for null sample should contain rg3");
    Assert.assertTrue( pileupNull.getReadGroupIDs().contains(null),  "Pileup for null sample should contain null read group");

}
 
Example 17
Source File: Evidence2SAM.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, ParseException {
	EvidenceRecordFileIterator iterator = new EvidenceRecordFileIterator(new File(args[0]));
	Read context = new Read();
	SAMFileHeader header = new SAMFileHeader();
	header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
	SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr10", 135534747);
	samSequenceRecord.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, iterator.assembly_ID);
	String readGroup = String.format("%s-%s", iterator.assembly_ID, iterator.chromosome);
	SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG, iterator.sample);
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_UNIT_TAG, readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.SEQUENCING_CENTER_TAG, "\"Complete Genomics\"");
	Date date = new SimpleDateFormat("yyyy-MMM-dd hh:mm:ss.S").parse(iterator.generatedAt);
	readGroupRecord.setAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG,
			new SimpleDateFormat("yyyy-MM-dd").format(date));
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_TAG, "\"Complete Genomics\"");
	header.addReadGroup(readGroupRecord);

	header.addSequence(samSequenceRecord);

	SAMFileWriterFactory f = new SAMFileWriterFactory();
	SAMFileWriter samWriter;
	if (args.length > 1)
		samWriter = f.makeBAMWriter(header, false, new File(args[1]));
	else
		samWriter = f.makeSAMWriter(header, false, System.out);
	int i = 0;
	long time = System.currentTimeMillis();
	DedupIterator dedupIt = new DedupIterator(iterator);

	while (dedupIt.hasNext()) {
		EvidenceRecord evidenceRecord = dedupIt.next();
		if (evidenceRecord == null)
			throw new RuntimeException();
		try {
			context.reset(evidenceRecord);
			context.parse();
		} catch (Exception e) {
			System.err.println("Failed on line:");
			System.err.println(evidenceRecord.line);
			throw new RuntimeException(e);
		}

		SAMRecord[] samRecords = context.toSAMRecord(header);
		for (SAMRecord samRecord : samRecords) {
			samRecord.setAttribute(SAMTag.RG.name(), readGroup);
			samWriter.addAlignment(samRecord);
		}

		i++;
		if (i % 1000 == 0) {
			if (System.currentTimeMillis() - time > 10 * 1000) {
				time = System.currentTimeMillis();
				System.err.println(i);
			}
		}

		if (i > 10000)
			break;
	}
	samWriter.close();
}
 
Example 18
Source File: ReadsKeyUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@DataProvider
public Object[][] artificalReadsForKeys() {
    SAMReadGroupRecord library1 = new SAMReadGroupRecord("rg1");
    library1.setLibrary("library1");
    SAMReadGroupRecord library2 = new SAMReadGroupRecord("rg2");
    library2.setLibrary("library2");

    SAMFileHeader header = hg19Header.clone();
    header.addReadGroup(library1);
    header.addReadGroup(library2);

    return new Object[][]{
            // Test of two groups with different start positions
            {header, createTestRead("name1", "1", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name1", "1", 1200, "100M", library1.getReadGroupId(), true),
                    false,
                    createTestRead("name2", "1", 1010, "100M", library1.getReadGroupId(), false),
                    createTestRead("name2", "1", 1200, "100M", library1.getReadGroupId(), true),},

            // Test of two equivalent groups
            {header, createTestRead("name1", "1", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name1", "1", 1200, "100M", library1.getReadGroupId(), true),
                    true,
                    createTestRead("name2", "1", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name2", "1", 1200, "100M", library1.getReadGroupId(), true),},

            // Test of two equivalent group, different contig
            {header, createTestRead("name1", "1", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name1", "1", 1200, "100M", library1.getReadGroupId(), true),
                    false,
                    createTestRead("name2", "2", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name2", "2", 1200, "100M", library1.getReadGroupId(), true),},

            // Test of two equivalent groups, different orientation
            {header, createTestRead("name1", "1", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name1", "1", 1200, "100M", library1.getReadGroupId(), true),
                    false,
                    createTestRead("name2", "1", 1000, "100M", library1.getReadGroupId(), true),
                    createTestRead("name2", "1", 1200, "100M", library1.getReadGroupId(), true),},

            // Test of two equivalent grops, but different libraries
            {header, createTestRead("name1", "1", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name1", "1", 1200, "100M", library1.getReadGroupId(), true),
                    false,
                    createTestRead("name2", "1", 1000, "100M", library2.getReadGroupId(), false),
                    createTestRead("name2", "1", 1200, "100M", library2.getReadGroupId(), true),},

            // Test of two equivalent groups, but one was soft-clipped to a different start
            {header, createTestRead("name1", "1", 1010, "10S90M", library1.getReadGroupId(), false),
                    createTestRead("name1", "1", 1200, "100M", library1.getReadGroupId(), true),
                    true,
                    createTestRead("name2", "1", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name2", "1", 1200, "100M", library1.getReadGroupId(), true),},

            // Test of two equivalent groups, but one read is on a different contig
            {header, createTestRead("name1", "1", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name1", "1", 1200, "100M", library1.getReadGroupId(), true),
                    false,
                    createTestRead("name2", "1", 1000, "100M", library1.getReadGroupId(), false),
                    createTestRead("name2", "2", 1200, "100M", library1.getReadGroupId(), true),},

    };
}
 
Example 19
Source File: SingleCellRnaSeqMetricsCollector.java    From Drop-seq with MIT License 4 votes vote down vote up
/**
    * Sets up the reads in cell barcode order.
    * Only adds reads that pass the map quality and are in the set of cell barcodes requested.
    *
    * I've tried adapting this to the TagOrderIterator API, but it seems like I need to add the read groups to the header of the temporary BAM that gets
    * iterated on or this doesn't work.
    */
   private CloseableIterator<SAMRecord> getReadsInTagOrder (final File bamFile, final String primaryTag,
                                                            final List<SAMReadGroupRecord> rg,
                                                            final List<String> allCellBarcodes, final int mapQuality) {

	SamReader reader = SamReaderFactory.makeDefault().open(bamFile);
	SAMSequenceDictionary dict= reader.getFileHeader().getSequenceDictionary();
	List<SAMProgramRecord> programs =reader.getFileHeader().getProgramRecords();

	final Set<String> cellBarcodeSet = new HashSet<> (allCellBarcodes);

	final SAMFileHeader writerHeader = new SAMFileHeader();
	// reader.getFileHeader().setReadGroups(rg);
	for (SAMReadGroupRecord z: rg) {
		reader.getFileHeader().addReadGroup(z);
		writerHeader.addReadGroup(z);
	}
       writerHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
       writerHeader.setSequenceDictionary(dict);
       for (SAMProgramRecord spr : programs)
		writerHeader.addProgramRecord(spr);

       // This not only filters, but sets the RG attribute on reads it allows through.
       final FilteredIterator<SAMRecord> rgAddingFilter = new FilteredIterator<SAMRecord>(reader.iterator()) {
           @Override
           public boolean filterOut(final SAMRecord r) {
               String cellBarcode = r.getStringAttribute(primaryTag);
               if (cellBarcodeSet.contains(cellBarcode) & r.getMappingQuality() >= mapQuality) {
                   r.setAttribute("RG", cellBarcode);
                   return false;
               } else
				return true;
           }
       };

       ProgressLogger p = new ProgressLogger(log, 1000000, "Preparing reads in core barcodes");
       CloseableIterator<SAMRecord> sortedIterator = SamRecordSortingIteratorFactory.create(writerHeader, rgAddingFilter, new StringTagComparator(primaryTag), p);

	log.info("Sorting finished.");
	return (sortedIterator);
}