Java Code Examples for htsjdk.samtools.SAMFileHeader#addSequence()

The following examples show how to use htsjdk.samtools.SAMFileHeader#addSequence() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SdfStatistics.java    From rtg-tools with BSD 2-Clause "Simplified" License 6 votes vote down vote up
static void printSAMHeader(SequencesReader reader, final Appendable out, boolean specified) throws IOException {
  final ReferenceGenome rg = new ReferenceGenome(reader, ReferenceGenome.SEX_ALL, ReferenceGenome.ReferencePloidy.AUTO);
  final SAMFileHeader header = new SAMFileHeader();
  header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
  SamUtils.addProgramRecord(header);
  final int[] lengths = reader.sequenceLengths(0, reader.numberSequences());
  for (int i = 0; i < lengths.length; ++i) {
    final String name = reader.hasNames() ? reader.name(i) : ("sequence_" + i);
    final ReferenceSequence s = specified ? rg.sequence(name) : null;
    if (s == null || s.isSpecified()) {
      final SAMSequenceRecord record = new SAMSequenceRecord(name, lengths[i]);
      header.addSequence(record);
    }
  }
  if (reader.getSdfId().available()) {
    header.addComment(SamUtils.TEMPLATE_SDF_ATTRIBUTE + reader.getSdfId());
  }
  final ByteArrayOutputStream bo = new ByteArrayOutputStream();
  new SAMFileWriterFactory().makeSAMWriter(header, true, bo).close();
  out.append(bo.toString());
}
 
Example 2
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions={UserException.MalformedFile.class, UserException.MalformedGenomeLoc.class}, dataProvider="invalidIntervalTestData")
public void testLoadIntervalsInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                              String contig, int intervalStart, int intervalEnd ) throws Exception {

    SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    // Intentionally add an extra contig not in our genomeLocParser's sequence dictionary
    // so that we can add intervals to the Picard interval file for contigs not in our dictionary
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    File picardIntervalFile = createTempFile("testLoadIntervalsInvalidPicardIntervalHandling", ".intervals");
    picardIntervals.write(picardIntervalFile);

    List<String> intervalArgs = new ArrayList<>(1);
    intervalArgs.add(picardIntervalFile.getAbsolutePath());

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.loadIntervals(intervalArgs, IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, genomeLocParser);
}
 
Example 3
Source File: IntervalUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(expectedExceptions={UserException.CouldNotReadInputFile.class, UserException.MalformedGenomeLoc.class, UserException.MalformedFile.class}, dataProvider="invalidIntervalTestData")
public void testIntervalFileToListInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                   String contig, int intervalStart, int intervalEnd ) throws Exception {

    final SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    final IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    final File picardIntervalFile = createTempFile("testIntervalFileToListInvalidPicardIntervalHandling", ".interval_list");
    picardIntervals.write(picardIntervalFile);

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.parseIntervalArguments(genomeLocParser, picardIntervalFile.getAbsolutePath());
}
 
Example 4
Source File: Merge.java    From cramtools with Apache License 2.0 6 votes vote down vote up
private static SAMFileHeader mergeHeaders(List<RecordSource> sources) {
	SAMFileHeader header = new SAMFileHeader();
	for (RecordSource source : sources) {
		SAMFileHeader h = source.reader.getFileHeader();

		for (SAMSequenceRecord seq : h.getSequenceDictionary().getSequences()) {
			if (header.getSequenceDictionary().getSequence(seq.getSequenceName()) == null)
				header.addSequence(seq);
		}

		for (SAMProgramRecord pro : h.getProgramRecords()) {
			if (header.getProgramRecord(pro.getProgramGroupId()) == null)
				header.addProgramRecord(pro);
		}

		for (String comment : h.getComments())
			header.addComment(comment);

		for (SAMReadGroupRecord rg : h.getReadGroups()) {
			if (header.getReadGroup(rg.getReadGroupId()) == null)
				header.addReadGroup(rg);
		}
	}

	return header;
}
 
Example 5
Source File: BaseErrorAggregationTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testBaseErrorAggregation() {
    final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr1", 200);
    final SAMFileHeader samFileHeader = new SAMFileHeader();
    samFileHeader.addSequence(samSequenceRecord);

    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();

    final List<SAMRecord> samRecords = builder.addPair("Read1234", 0, 1, 1,
            false, false, "16M", "16M", true, false, 20);
    final SAMRecord samRecord1 = samRecords.get(0);
    final SAMRecord samRecord2 = samRecords.get(1);

    samRecord1.setReadBases("CgTGtGGAcAAAgAAA".getBytes());
    samRecord2.setReadBases("CcTGGtGAcAAAgAAA".getBytes());
    final byte[] refBases = "CATGGGGAAAAAAAAA".getBytes();

    BaseErrorAggregation<?> baseErrorAggregation = new BaseErrorAggregation<>(OverlappingReadsErrorCalculator::new, ReadBaseStratification.readOrdinalityStratifier);

    final int length = getLengthAndAddBases(samSequenceRecord, samRecord1, samRecord2, refBases, baseErrorAggregation);

    final ErrorMetric[] metrics = baseErrorAggregation.getMetrics();
    final OverlappingErrorMetric metric1 = (OverlappingErrorMetric) metrics[0];
    metric1.calculateDerivedFields();
    Assert.assertEquals(metric1.COVARIATE, ReadBaseStratification.ReadOrdinality.FIRST.name());
    Assert.assertEquals(metric1.TOTAL_BASES, length);
    Assert.assertEquals(metric1.NUM_BASES_WITH_OVERLAPPING_READS, length);
    Assert.assertEquals(metric1.NUM_DISAGREES_WITH_REFERENCE_ONLY, 2L);
    Assert.assertEquals(metric1.NUM_DISAGREES_WITH_REF_AND_MATE, 1L);
    Assert.assertEquals(metric1.NUM_THREE_WAYS_DISAGREEMENT, 1L);

    final OverlappingErrorMetric metric2 = (OverlappingErrorMetric) metrics[1];
    metric2.calculateDerivedFields();
    Assert.assertEquals(metric2.COVARIATE, ReadBaseStratification.ReadOrdinality.SECOND.name());
    Assert.assertEquals(metric2.TOTAL_BASES, length);
    Assert.assertEquals(metric2.NUM_BASES_WITH_OVERLAPPING_READS, length);
    Assert.assertEquals(metric2.NUM_DISAGREES_WITH_REFERENCE_ONLY, 2L);
    Assert.assertEquals(metric2.NUM_DISAGREES_WITH_REF_AND_MATE, 1L);
    Assert.assertEquals(metric2.NUM_THREE_WAYS_DISAGREEMENT, 1L);
}
 
Example 6
Source File: BaseErrorAggregationTest.java    From picard with MIT License 5 votes vote down vote up
@Test
public void testBaseErrorAggregation2() {
    final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr1", 200);
    final SAMFileHeader samFileHeader = new SAMFileHeader();
    samFileHeader.addSequence(samSequenceRecord);

    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();

    final List<SAMRecord> samRecords = builder.addPair("Read1234", 0, 1, 1,
            false, false, "16M", "16M", true, false, 20);
    final SAMRecord samRecord1 = samRecords.get(0);
    final SAMRecord samRecord2 = samRecords.get(1);

    samRecord1.setReadBases("CgTGtGGAcAAAgAAA".getBytes());
    samRecord2.setReadBases("CcTGGtGAcAAAgAAA".getBytes());
    final byte[] refBases = "CATGGGGAAAAAAAAA".getBytes();

    BaseErrorAggregation<?> baseErrorAggregation = new BaseErrorAggregation<>(OverlappingReadsErrorCalculator::new, ReadBaseStratification.readDirectionStratifier);

    final int length = getLengthAndAddBases(samSequenceRecord, samRecord1, samRecord2, refBases, baseErrorAggregation);

    final ErrorMetric[] metrics = baseErrorAggregation.getMetrics();
    final OverlappingErrorMetric metric1 = (OverlappingErrorMetric) metrics[0];
    metric1.calculateDerivedFields();
    Assert.assertEquals(metric1.COVARIATE, ReadBaseStratification.ReadDirection.POSITIVE.toString());
    Assert.assertEquals(metric1.TOTAL_BASES, length);
    Assert.assertEquals(metric1.NUM_BASES_WITH_OVERLAPPING_READS, length);
    Assert.assertEquals(metric1.NUM_DISAGREES_WITH_REFERENCE_ONLY, 2L);
    Assert.assertEquals(metric1.NUM_DISAGREES_WITH_REF_AND_MATE, 1L);
    Assert.assertEquals(metric1.NUM_THREE_WAYS_DISAGREEMENT, 1L);

    final OverlappingErrorMetric metric2 = (OverlappingErrorMetric) metrics[1];
    metric2.calculateDerivedFields();
    Assert.assertEquals(metric2.COVARIATE, ReadBaseStratification.ReadDirection.NEGATIVE.toString());
    Assert.assertEquals(metric2.TOTAL_BASES, length);
    Assert.assertEquals(metric2.NUM_BASES_WITH_OVERLAPPING_READS, length);
    Assert.assertEquals(metric2.NUM_DISAGREES_WITH_REFERENCE_ONLY, 2L);
    Assert.assertEquals(metric2.NUM_DISAGREES_WITH_REF_AND_MATE, 1L);
    Assert.assertEquals(metric2.NUM_THREE_WAYS_DISAGREEMENT, 1L);
}
 
Example 7
Source File: WgsMetricsTest.java    From picard with MIT License 5 votes vote down vote up
private IntervalList buildIntervalList(final int start, final int end) {
    final SAMFileHeader header = new SAMFileHeader();
    header.addSequence(new SAMSequenceRecord("CONTIG", 100000000));
    final IntervalList intervals = new IntervalList(header);
    if (0 < start) intervals.add(new Interval("CONTIG", start, end));
    return intervals;
}
 
Example 8
Source File: PSBwaUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
static void addReferenceSequencesToHeader(final SAMFileHeader header,
                                          final String referenceDictionaryPath) {
    final List<SAMSequenceRecord> refSeqs = getReferenceSequences(referenceDictionaryPath);
    for (final SAMSequenceRecord rec : refSeqs) {
        if (header.getSequence(rec.getSequenceName()) == null) {
            header.addSequence(rec);
        }
    }
}
 
Example 9
Source File: BaseErrorAggregationTest.java    From picard with MIT License 4 votes vote down vote up
@Test
public void testBaseErrorAggregation3() {
    final SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr1", 200);
    final SAMFileHeader samFileHeader = new SAMFileHeader();
    samFileHeader.addSequence(samSequenceRecord);

    final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();

    final List<SAMRecord> samRecords = builder.addPair("Read1234", 0, 1, 1,
            false, false, "16M", "16M", true, false, 20);
    final SAMRecord samRecord1 = samRecords.get(0);
    final SAMRecord samRecord2 = samRecords.get(1);

    samRecord1.setReadBases("CgTGtGGAcAAAgAAA".getBytes());
    samRecord2.setReadBases("CcTGGtGAcAAAgAAA".getBytes());
    final byte[] refBases = "CATGGGGAAAAAAAAA".getBytes();

    BaseErrorAggregation<?> baseErrorAggregation = new BaseErrorAggregation<>(SimpleErrorCalculator::new, ReadBaseStratification.referenceBaseStratifier);

    final int length = getLengthAndAddBases(samSequenceRecord, samRecord1, samRecord2, refBases, baseErrorAggregation);

    final ErrorMetric[] metrics = baseErrorAggregation.getMetrics();
    final BaseErrorMetric metricA = Arrays.stream(metrics).map(a -> (BaseErrorMetric) a)
            .filter(a -> a.COVARIATE.equals("A")).findFirst().get();
    metricA.calculateDerivedFields();
    Assert.assertEquals(metricA.COVARIATE, "A");
    Assert.assertEquals(metricA.TOTAL_BASES, 11);
    Assert.assertEquals(metricA.ERROR_BASES, 3);

    final BaseErrorMetric metricC = Arrays.stream(metrics).map(a -> (BaseErrorMetric) a)
            .filter(a -> a.COVARIATE.equals("C")).findFirst().get();
    metricA.calculateDerivedFields();
    Assert.assertEquals(metricC.COVARIATE, "C");
    Assert.assertEquals(metricC.TOTAL_BASES, 5);
    Assert.assertEquals(metricC.ERROR_BASES, 1);

    final BaseErrorMetric metricG = Arrays.stream(metrics).map(a -> (BaseErrorMetric) a)
            .filter(a -> a.COVARIATE.equals("G")).findFirst().get();
    metricA.calculateDerivedFields();
    Assert.assertEquals(metricG.COVARIATE, "G");
    Assert.assertEquals(metricG.TOTAL_BASES, 5);
    Assert.assertEquals(metricG.ERROR_BASES, 1);

    final BaseErrorMetric metricT = Arrays.stream(metrics).map(a -> (BaseErrorMetric) a)
            .filter(a -> a.COVARIATE.equals("T")).findFirst().get();
    metricA.calculateDerivedFields();
    Assert.assertEquals(metricT.COVARIATE, "T");
    Assert.assertEquals(metricT.TOTAL_BASES, 11);
    Assert.assertEquals(metricT.ERROR_BASES, 3);

}
 
Example 10
Source File: Evidence2SAM.java    From cramtools with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws IOException, ParseException {
	EvidenceRecordFileIterator iterator = new EvidenceRecordFileIterator(new File(args[0]));
	Read context = new Read();
	SAMFileHeader header = new SAMFileHeader();
	header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
	SAMSequenceRecord samSequenceRecord = new SAMSequenceRecord("chr10", 135534747);
	samSequenceRecord.setAttribute(SAMSequenceRecord.ASSEMBLY_TAG, iterator.assembly_ID);
	String readGroup = String.format("%s-%s", iterator.assembly_ID, iterator.chromosome);
	SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG, iterator.sample);
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_UNIT_TAG, readGroup);
	readGroupRecord.setAttribute(SAMReadGroupRecord.SEQUENCING_CENTER_TAG, "\"Complete Genomics\"");
	Date date = new SimpleDateFormat("yyyy-MMM-dd hh:mm:ss.S").parse(iterator.generatedAt);
	readGroupRecord.setAttribute(SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG,
			new SimpleDateFormat("yyyy-MM-dd").format(date));
	readGroupRecord.setAttribute(SAMReadGroupRecord.PLATFORM_TAG, "\"Complete Genomics\"");
	header.addReadGroup(readGroupRecord);

	header.addSequence(samSequenceRecord);

	SAMFileWriterFactory f = new SAMFileWriterFactory();
	SAMFileWriter samWriter;
	if (args.length > 1)
		samWriter = f.makeBAMWriter(header, false, new File(args[1]));
	else
		samWriter = f.makeSAMWriter(header, false, System.out);
	int i = 0;
	long time = System.currentTimeMillis();
	DedupIterator dedupIt = new DedupIterator(iterator);

	while (dedupIt.hasNext()) {
		EvidenceRecord evidenceRecord = dedupIt.next();
		if (evidenceRecord == null)
			throw new RuntimeException();
		try {
			context.reset(evidenceRecord);
			context.parse();
		} catch (Exception e) {
			System.err.println("Failed on line:");
			System.err.println(evidenceRecord.line);
			throw new RuntimeException(e);
		}

		SAMRecord[] samRecords = context.toSAMRecord(header);
		for (SAMRecord samRecord : samRecords) {
			samRecord.setAttribute(SAMTag.RG.name(), readGroup);
			samWriter.addAlignment(samRecord);
		}

		i++;
		if (i % 1000 == 0) {
			if (System.currentTimeMillis() - time > 10 * 1000) {
				time = System.currentTimeMillis();
				System.err.println(i);
			}
		}

		if (i > 10000)
			break;
	}
	samWriter.close();
}
 
Example 11
Source File: TestBAMRecordView.java    From cramtools with Apache License 2.0 4 votes vote down vote up
@Test
public void test() throws IOException {
	byte[] buf = new byte[1024];
	BAMRecordView view = new BAMRecordView(buf);
	view.setRefID(0);
	view.setAlignmentStart(77);
	view.setMappingScore(44);
	view.setIndexBin(99);
	view.setFlags(555);
	view.setMateRefID(0);
	view.setMateAlStart(78);
	view.setInsertSize(133);

	view.setReadName("name1");
	view.setCigar(TextCigarCodec.decode("10M"));
	view.setBases("AAAAAAAAAA".getBytes());
	view.setQualityScores("BBBBBBBBBB".getBytes());

	int id = 'A' << 16 | 'M' << 8 | 'A';
	view.addTag(id, "Q".getBytes(), 0, 1);

	int len = view.finish();

	System.out.println(Arrays.toString(Arrays.copyOf(buf, len)));

	ByteArrayOutputStream baos = new ByteArrayOutputStream();

	SAMFileHeader header = new SAMFileHeader();
	header.addSequence(new SAMSequenceRecord("14", 14));

	ByteArrayOutputStream baos2 = new ByteArrayOutputStream();
	SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, true, baos2);
	SAMRecord record = new SAMRecord(header);
	record.setReferenceIndex(0);
	record.setAlignmentStart(1);
	record.setCigarString("10M");
	record.setFlags(555);
	record.setMappingQuality(44);
	record.setMateReferenceIndex(0);
	record.setMateAlignmentStart(0);
	record.setInferredInsertSize(133);
	record.setReadName("name1");
	record.setReadBases("AAAAAAAAAA".getBytes());
	record.setBaseQualities("BBBBBBBBBB".getBytes());
	record.setAttribute("AM", 'Q');

	System.out.println("BAMFileWriter.addAlignment():");
	writer.addAlignment(record);
	System.out.println(".");
	writer.close();

	System.out.println("------------------------------------------");
	System.out.println();
	System.out.println(new String(baos2.toByteArray()));
	System.out.println();

	SAMFileReader.setDefaultValidationStringency(ValidationStringency.SILENT);
	SAMFileReader reader2 = new SAMFileReader(new ByteArrayInputStream(baos2.toByteArray()));
	SAMRecordIterator iterator = reader2.iterator();
	while (iterator.hasNext()) {
		record = iterator.next();
		System.out.println(record.getSAMString());
	}
	System.out.println("------------------------------------------");

	BlockCompressedOutputStream bcos = new BlockCompressedOutputStream(baos, null);
	bcos.write("BAM\1".getBytes());
	bcos.write(toByteArray(header));
	CramInt.writeInt32(header.getSequenceDictionary().size(), bcos);
	for (final SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) {
		byte[] bytes = sequenceRecord.getSequenceName().getBytes();
		CramInt.writeInt32(bytes.length + 1, bcos);
		bcos.write(sequenceRecord.getSequenceName().getBytes());
		bcos.write(0);
		CramInt.writeInt32(sequenceRecord.getSequenceLength(), bcos);
	}
	bcos.write(buf, 0, len);
	bcos.close();

	System.out.println(new String(baos.toByteArray()));

	SAMFileReader reader = new SAMFileReader(new ByteArrayInputStream(baos.toByteArray()));
	iterator = reader.iterator();
	while (iterator.hasNext()) {
		record = iterator.next();
		System.out.println(record.getSAMString());
	}
	reader.close();

}