Java Code Examples for htsjdk.samtools.SAMFileHeader#setSequenceDictionary()

The following examples show how to use htsjdk.samtools.SAMFileHeader#setSequenceDictionary() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BwaSparkEngine.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param ctx           the Spark context
 * @param referenceFile the path to the reference file named <i>_prefix_.fa</i>, which is used to find the image file with name <i>_prefix_.fa.img</i>.
 *                      Can be <code>null</code> if the indexFileName is provided.
 * @param indexFileName the index image file name that already exists, or <code>null</code> to have the image file automatically distributed.
 * @param inputHeader   the SAM file header to use for reads
 * @param refDictionary the sequence dictionary to use for reads if the SAM file header doesn't have one (or it's empty)
 */
public BwaSparkEngine(final JavaSparkContext ctx,
                      final String referenceFile,
                      final String indexFileName,
                      SAMFileHeader inputHeader,
                      final SAMSequenceDictionary refDictionary) {
    Utils.nonNull(referenceFile);
    Utils.nonNull(inputHeader);
    this.ctx = ctx;
    if (indexFileName != null) {
        this.indexFileName = indexFileName;
        this.resolveIndexFileName = false;
    } else {
        String indexFile = referenceFile + REFERENCE_INDEX_IMAGE_FILE_SUFFIX;
        ctx.addFile(indexFile); // distribute index file to all executors
        this.indexFileName = IOUtils.getPath(indexFile).getFileName().toString();
        this.resolveIndexFileName = true;
    }

    if (inputHeader.getSequenceDictionary() == null || inputHeader.getSequenceDictionary().isEmpty()) {
        Utils.nonNull(refDictionary);
        inputHeader = inputHeader.clone();
        inputHeader.setSequenceDictionary(refDictionary);
    }
    broadcastHeader = ctx.broadcast(inputHeader);
}
 
Example 2
Source File: CombineSegmentBreakpoints.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Do all the processing to create a SAMFileHeader for the given AnnotatedIntervalCollections.
 * Throws an exception if unable to generate a header with a non-empty sequence dictionary.
 *
 * @param annotatedIntervalCollection1 Never {@code null}
 * @param annotatedIntervalCollection2 Never {@code null}
 * @return Merged SAMFileHeader for the two annotated interval collections.  Never {@code null}.
 */
private SAMFileHeader createOutputSamFileHeader(final AnnotatedIntervalCollection annotatedIntervalCollection1, final AnnotatedIntervalCollection annotatedIntervalCollection2) {
    Utils.nonNull(annotatedIntervalCollection1);
    Utils.nonNull(annotatedIntervalCollection2);

    final SAMFileHeader samFileHeader1 = annotatedIntervalCollection1.getSamFileHeader();
    final SAMFileHeader samFileHeader2 = annotatedIntervalCollection2.getSamFileHeader();

    assertSequenceDictionaryCompatibility(samFileHeader1, samFileHeader2);
    warnIfOnlyOneSequenceDictionarySpecified(samFileHeader1, samFileHeader2);

    // Since this tool will sort the segments by coordinate, this will be coordinate sorted, regardless of the input.
    final SamFileHeaderMerger samFileHeaderMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,
            Arrays.asList(samFileHeader1, samFileHeader2),true);

    final SAMFileHeader outputSamFileHeader = samFileHeaderMerger.getMergedHeader();
    if ((outputSamFileHeader.getSequenceDictionary().getReferenceLength() == 0) && (getBestAvailableSequenceDictionary() == null)) {
        throw new UserException.BadInput("Cannot assemble a reference dictionary.  In order to use this tool, one " +
                "of the following conditions must be satisfied:  1)  One or both input files have a SAM File header " +
                "... 2)  A reference is provided (-" + StandardArgumentDefinitions.REFERENCE_SHORT_NAME + ")");
    }
    if (outputSamFileHeader.getSequenceDictionary().getReferenceLength() == 0) {
        outputSamFileHeader.setSequenceDictionary(getBestAvailableSequenceDictionary());
    }
    return outputSamFileHeader;
}
 
Example 3
Source File: HaplotypeMapTest.java    From picard with MIT License 6 votes vote down vote up
@DataProvider(name="haplotypeMapForWriting")
public Object[][] haplotypeMapForWriting() {

    SAMFileHeader header = new SAMFileHeader();
    header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
    SAMSequenceDictionary sd = new SAMSequenceDictionary();
    sd.addSequence(new SAMSequenceRecord("chr1", 101));
    sd.addSequence(new SAMSequenceRecord("chr2", 101));
    sd.addSequence(new SAMSequenceRecord("chr3", 101));
    header.setSequenceDictionary(sd);


    HaplotypeMap newMap = new HaplotypeMap(header);
    HaplotypeBlock t1 = new HaplotypeBlock(0.151560926);
    t1.addSnp(new Snp("snp2", "chr1", 83, (byte)'A', (byte)'G', .16, null));
    t1.addSnp(new Snp("snp1", "chr1", 51, (byte)'T', (byte)'C', 0.15, Collections.singletonList("SQNM_1CHIP_FingerprintAssays")));
    newMap.addHaplotype(t1);
    HaplotypeBlock t2 = new HaplotypeBlock(.02d);
    t2.addSnp(new Snp("snp3", "chr2", 24, (byte)'C', (byte)'A', .20, null));
    newMap.addHaplotype(t2);

    return new Object[][]{{newMap}};
}
 
Example 4
Source File: GatherGeneGCLength.java    From Drop-seq with MIT License 6 votes vote down vote up
/**
 * For a GC record and a fasta sequence, calculate the GC content.
 * Builds intervals of the unique sequences overlapped by exons, calculates the GC content for each, and aggregates results.
 * @param fastaRef
 * @return
 */
private GCResult calculateGCContentUnionExons(final Gene gene, final ReferenceSequence fastaRef, final SAMSequenceDictionary dict) {
	// make an interval list.
	SAMFileHeader h = new SAMFileHeader();
	h.setSequenceDictionary(dict);
	h.setSortOrder(SAMFileHeader.SortOrder.unsorted);
	IntervalList intervalList  = new IntervalList(h);

	for (Transcript t : gene)
		for (Exon e: t.exons)
			intervalList.add(new Interval (gene.getContig(), e.start, e.end, gene.isNegativeStrand(), gene.getName()));

	List<Interval> uniqueIntervals = IntervalList.getUniqueIntervals(intervalList, false);

	// track aggregated GC.
	GCResult result = new GCResult(0, 0, 0);

	for (Interval i: uniqueIntervals) {
		GCResult gcResultInterval = calculateGCContentExon(i, fastaRef);
		result.increment(gcResultInterval);
	}
	return result;
}
 
Example 5
Source File: PSBwaUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Returns header with sequences that were aligned to at least once in reads
 */
static SAMFileHeader removeUnmappedHeaderSequences(final SAMFileHeader header,
                                                   final JavaRDD<GATKRead> reads,
                                                   final Logger logger) {
    final List<String> usedSequences = PSBwaUtils.getAlignedSequenceNames(reads);
    final List<SAMSequenceRecord> usedSequenceRecords = usedSequences.stream()
            .map(seqName -> header.getSequence(seqName))
            .filter(seq -> {
                if (seq != null) return true;
                logger.warn("One or more reads are aligned to sequence " + seq + " but it is not in the header");
                return false;
            })
            .collect(Collectors.toList());
    header.setSequenceDictionary(new SAMSequenceDictionary(usedSequenceRecords));
    return header;
}
 
Example 6
Source File: SamCompareUtilsTest.java    From rtg-tools with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public void test() {
  final SAMFileHeader header = new SAMFileHeader();
  header.setSequenceDictionary(new SAMSequenceDictionary(Arrays.asList(new SAMSequenceRecord("raga", 100), new SAMSequenceRecord("yaga", 100), new SAMSequenceRecord("zaga", 100))));
  final SAMRecord rec1 = new SAMRecord(header);
  rec1.setReferenceIndex(1);
  final SAMRecord rec2 = new SAMRecord(header);
  rec2.setReferenceIndex(2);
  assertEquals(-1, SamCompareUtils.compareSamRecords(rec1, rec2));
  assertEquals(1, SamCompareUtils.compareSamRecords(rec2, rec1));
  rec1.setReferenceIndex(2);
  rec1.setAlignmentStart(50);
  rec2.setAlignmentStart(25);
  assertEquals(1, SamCompareUtils.compareSamRecords(rec1, rec2));
  assertEquals(-1, SamCompareUtils.compareSamRecords(rec2, rec1));
  rec1.setReadPairedFlag(true);
  rec2.setReadPairedFlag(true);
  rec1.setProperPairFlag(true);
  rec2.setProperPairFlag(false);
  rec1.setAlignmentStart(25);
  assertEquals(-1, SamCompareUtils.compareSamRecords(rec1, rec2));
  assertEquals(1, SamCompareUtils.compareSamRecords(rec2, rec1));
  rec2.setProperPairFlag(true);
  rec1.setReadUnmappedFlag(true);
  assertEquals(1, SamCompareUtils.compareSamRecords(rec1, rec2));
  assertEquals(-1, SamCompareUtils.compareSamRecords(rec2, rec1));
  rec2.setReadUnmappedFlag(true);
  assertEquals(0, SamCompareUtils.compareSamRecords(rec1, rec2));
  assertEquals(0, SamCompareUtils.compareSamRecords(rec2, rec1));
  rec1.setReferenceIndex(-1);
  assertEquals(1, SamCompareUtils.compareSamRecords(rec1, rec2));
  assertEquals(-1, SamCompareUtils.compareSamRecords(rec2, rec1));
  rec2.setReferenceIndex(-1);
  assertEquals(0, SamCompareUtils.compareSamRecords(rec2, rec1));
}
 
Example 7
Source File: XGBoostEvidenceFilterUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create synthetic SAM Header comptible with genome tracts (e.g. has all the primary contigs)
 */
public static SAMFileHeader createArtificialSamHeader() {
    final SAMFileHeader header = new SAMFileHeader();
    header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
    header.setSequenceDictionary(TestUtilsForAssemblyBasedSVDiscovery.bareBoneHg38SAMSeqDict);
    return header;
}
 
Example 8
Source File: GATKBaseTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@BeforeClass
public void initializeHG19Reference() {
    hg19ReferenceReader = new CachingIndexedFastaSequenceFile(IOUtils.getPath(hg19MiniReference));
    hg19Header = new SAMFileHeader();
    hg19Header.setSequenceDictionary(hg19ReferenceReader.getSequenceDictionary());
    hg19GenomeLocParser = new GenomeLocParser(hg19ReferenceReader);
}
 
Example 9
Source File: ReadClipperUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testSoftClippingOpEdgeCase() {
    final SAMFileHeader header = new SAMFileHeader();
    header.setSequenceDictionary(hg19GenomeLocParser.getSequenceDictionary());

    final GATKRead read = ReadClipperTestUtils.makeReadFromCigar("8M");
    final ReadClipper clipper = new ReadClipper(read);
    final ClippingOp op = new ClippingOp(0, 7);
    clipper.addOp(op);
    final GATKRead softResult = clipper.clipRead(ClippingRepresentation.SOFTCLIP_BASES);
    Assert.assertEquals(softResult.getCigar().toString(), "7S1M");
}
 
Example 10
Source File: ByIntervalListVariantContextIteratorTest.java    From picard with MIT License 5 votes vote down vote up
private SAMFileHeader getSAMFileHeader() {
    final VCFFileReader reader = getReader(CEU_TRIOS_SNPS_VCF);
    final SAMSequenceDictionary dict = reader.getFileHeader().getSequenceDictionary();
    reader.close();
    final SAMFileHeader header = new SAMFileHeader();
    header.setSequenceDictionary(dict);
    return header;
}
 
Example 11
Source File: MergeBamAlignmentTest.java    From picard with MIT License 4 votes vote down vote up
/**
 * @return a 2-element array in which the first element is the unmapped SAM, and the second the mapped SAM.
 */
private File[] createSamFilesToBeMerged(final MultipleAlignmentSpec[] specs) {
    try {
        final File unmappedSam = File.createTempFile("unmapped.", ".sam");
        unmappedSam.deleteOnExit();
        final SAMFileWriterFactory factory = new SAMFileWriterFactory();
        final SAMFileHeader header = new SAMFileHeader();
        header.setSortOrder(SAMFileHeader.SortOrder.queryname);
        final SAMRecord unmappedRecord = new SAMRecord(header);

        unmappedRecord.setReadName("theRead");
        unmappedRecord.setReadString("ACGTACGTACGTACGT");
        unmappedRecord.setBaseQualityString("5555555555555555");
        unmappedRecord.setReadUnmappedFlag(true);

        final SAMFileWriter unmappedWriter = factory.makeSAMWriter(header, false, unmappedSam);
        unmappedWriter.addAlignment(unmappedRecord);
        unmappedWriter.close();

        final File alignedSam = File.createTempFile("aligned.", ".sam");
        alignedSam.deleteOnExit();

        final String sequence = "chr1";
        // Populate the header with SAMSequenceRecords
        header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(sequenceDict2.toPath()));

        final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam);
        for (final MultipleAlignmentSpec spec : specs) {
            final SAMRecord alignedRecord = new SAMRecord(header);
            alignedRecord.setReadName(unmappedRecord.getReadName());
            alignedRecord.setReadBases(unmappedRecord.getReadBases());
            alignedRecord.setBaseQualities(unmappedRecord.getBaseQualities());
            alignedRecord.setReferenceName(sequence);
            alignedRecord.setAlignmentStart(1);
            alignedRecord.setReadNegativeStrandFlag(spec.reverseStrand);
            alignedRecord.setCigarString(spec.cigar);
            alignedRecord.setMappingQuality(spec.mapQ);
            if (spec.oneOfTheBest) {
                alignedRecord.setAttribute(ONE_OF_THE_BEST_TAG, 1);
            }
            alignedWriter.addAlignment(alignedRecord);
        }
        alignedWriter.close();

        return new File[]{unmappedSam, alignedSam};
    } catch (IOException e) {
        throw new PicardException(e.getMessage(), e);
    }
}
 
Example 12
Source File: SplitNCigarReadsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
public void splitReadAtN() {
    final int maxCigarElements = 9;
    final List<Cigar> cigarList = ReadClipperTestUtils.generateCigarList(maxCigarElements, true);

    // For Debugging use those lines (instead of above cigarList) to create specific read:
    //------------------------------------------------------------------------------------
    // final SAMRecord tmpRead = SAMRecord.createRandomRead(6);
    // tmpRead.setCigarString("1M1N1M");

    // final List<Cigar> cigarList = new ArrayList<>();
    // cigarList.add(tmpRead.getCigar());

    for(Cigar cigar: cigarList){

        final int numOfSplits = numOfNElements(cigar.getCigarElements());

        if(numOfSplits != 0 && isCigarDoesNotHaveEmptyRegionsBetweenNs(cigar)){
            final SAMFileHeader header = new SAMFileHeader();
            header.setSequenceDictionary(hg19GenomeLocParser.getSequenceDictionary());
            final TestManager manager = new TestManager(header, new DummyTestWriter());
            manager.activateWriting();

            GATKRead read = ReadClipperTestUtils.makeReadFromCigar(cigar);
            SplitNCigarReads.splitNCigarRead(read, manager, true, header, true);
            List<List<OverhangFixingManager.SplitRead>> splitReadGroups = manager.getReadsInQueueForTesting();
            final int expectedReads = numOfSplits+1;
            Assert.assertEquals(manager.getNReadsInQueue(), expectedReads, "wrong number of reads after split read with cigar: " + cigar + " at Ns [expected]: " + expectedReads + " [actual value]: " + manager.getNReadsInQueue());
            final int readLengths = consecutiveNonNElements(read.getCigar().getCigarElements());
            int index = 0;
            for(final OverhangFixingManager.SplitRead splitRead: splitReadGroups.get(0)){
                Assert.assertTrue(splitRead.read.getLength() == readLengths,
                        "the " + index + " (starting with 0) split read has a wrong length.\n" +
                                "cigar of original read: " + cigar + "\n" +
                                "expected length: " + readLengths + "\n" +
                                "actual length: " + splitRead.read.getLength() + "\n");
                assertBases(splitRead.read.getBases(), read.getBases());
                index++;
            }
        }
    }
}
 
Example 13
Source File: SAMFileHeader_Utils.java    From cramtools with Apache License 2.0 4 votes vote down vote up
static SAMFileHeader readHeader(final BinaryCodec stream, final ValidationStringency validationStringency,
		final String source) throws IOException {

	final byte[] buffer = new byte[4];
	stream.readBytes(buffer);
	if (!Arrays.equals(buffer, "BAM\1".getBytes())) {
		throw new IOException("Invalid BAM file header");
	}

	final int headerTextLength = stream.readInt();
	final String textHeader = stream.readString(headerTextLength);
	final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec();
	headerCodec.setValidationStringency(validationStringency);
	final SAMFileHeader samFileHeader = headerCodec.decode(new StringLineReader(textHeader), source);

	final int sequenceCount = stream.readInt();
	if (samFileHeader.getSequenceDictionary().size() > 0) {
		// It is allowed to have binary sequences but no text sequences, so
		// only validate if both are present
		if (sequenceCount != samFileHeader.getSequenceDictionary().size()) {
			throw new SAMFormatException("Number of sequences in text header ("
					+ samFileHeader.getSequenceDictionary().size() + ") != number of sequences in binary header ("
					+ sequenceCount + ") for file " + source);
		}
		for (int i = 0; i < sequenceCount; i++) {
			final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(stream, source);
			final SAMSequenceRecord sequenceRecord = samFileHeader.getSequence(i);
			if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) {
				throw new SAMFormatException("For sequence " + i
						+ ", text and binary have different names in file " + source);
			}
			if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) {
				throw new SAMFormatException("For sequence " + i
						+ ", text and binary have different lengths in file " + source);
			}
		}
	} else {
		// If only binary sequences are present, copy them into
		// samFileHeader
		final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>(sequenceCount);
		for (int i = 0; i < sequenceCount; i++) {
			sequences.add(readSequenceRecord(stream, source));
		}
		samFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences));
	}

	return samFileHeader;
}
 
Example 14
Source File: MergeBamAlignmentTest.java    From picard with MIT License 4 votes vote down vote up
/**
 * Confirm that paired reads are rejected by PrimaryAlignmentStrategy.EarliestFragment.
 */
@Test(expectedExceptions = UnsupportedOperationException.class)
public void testEarliestFragmentStrategyPaired() throws Exception {
    final File output = File.createTempFile("mergeTest", ".sam");
    output.deleteOnExit();

    final File unmappedSam = File.createTempFile("unmapped.", ".sam");
    unmappedSam.deleteOnExit();
    final SAMFileWriterFactory factory = new SAMFileWriterFactory();
    final SAMFileHeader header = new SAMFileHeader();
    header.setSortOrder(SAMFileHeader.SortOrder.queryname);
    final String cigar = "16M";

    final SAMRecord firstOfPair = new SAMRecord(header);
    firstOfPair.setReadName("theRead");
    firstOfPair.setReadString("ACGTACGTACGTACGT");
    firstOfPair.setBaseQualityString("5555555555555555");
    firstOfPair.setReadUnmappedFlag(true);
    firstOfPair.setReadPairedFlag(true);
    firstOfPair.setFirstOfPairFlag(true);

    final SAMRecord secondOfPair = new SAMRecord(header);
    secondOfPair.setReadName("theRead");
    secondOfPair.setReadString("ACGTACGTACGTACGT");
    secondOfPair.setBaseQualityString("5555555555555555");
    secondOfPair.setReadUnmappedFlag(true);
    secondOfPair.setReadPairedFlag(true);
    secondOfPair.setSecondOfPairFlag(true);
    SamPairUtil.setMateInfo(firstOfPair, secondOfPair);

    final SAMFileWriter unmappedWriter = factory.makeSAMWriter(header, false, unmappedSam);
    unmappedWriter.addAlignment(firstOfPair);
    unmappedWriter.addAlignment(secondOfPair);
    unmappedWriter.close();

    final File alignedSam = File.createTempFile("aligned.", ".sam");
    alignedSam.deleteOnExit();

    // Populate the header with SAMSequenceRecords
    header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(sequenceDict2.toPath()));

    // Create 2 alignments for each end of pair
    final SAMFileWriter alignedWriter = factory.makeSAMWriter(header, false, alignedSam);
    for (int i = 1; i <= 2; ++i) {
        final SAMRecord firstOfPairAligned = new SAMRecord(header);
        firstOfPairAligned.setReadName(firstOfPair.getReadName());
        firstOfPairAligned.setReadBases(firstOfPair.getReadBases());
        firstOfPairAligned.setBaseQualities(firstOfPair.getBaseQualities());
        firstOfPairAligned.setReferenceName("chr1");
        firstOfPairAligned.setAlignmentStart(i);
        firstOfPairAligned.setCigarString(cigar);
        firstOfPairAligned.setMappingQuality(100);
        firstOfPairAligned.setReadPairedFlag(true);
        firstOfPairAligned.setFirstOfPairFlag(true);
        firstOfPairAligned.setAttribute(SAMTag.HI.name(), i);

        final SAMRecord secondOfPairAligned = new SAMRecord(header);
        secondOfPairAligned.setReadName(secondOfPair.getReadName());
        secondOfPairAligned.setReadBases(secondOfPair.getReadBases());
        secondOfPairAligned.setBaseQualities(secondOfPair.getBaseQualities());
        secondOfPairAligned.setReferenceName("chr1");
        secondOfPairAligned.setAlignmentStart(i + 10);
        secondOfPairAligned.setCigarString(cigar);
        secondOfPairAligned.setMappingQuality(100);
        secondOfPairAligned.setReadPairedFlag(true);
        secondOfPairAligned.setSecondOfPairFlag(true);
        secondOfPairAligned.setAttribute(SAMTag.HI.name(), i);

        SamPairUtil.setMateInfo(firstOfPairAligned, secondOfPairAligned);

        alignedWriter.addAlignment(firstOfPairAligned);
        alignedWriter.addAlignment(secondOfPairAligned);
    }
    alignedWriter.close();

    doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam),
            null, null, null, null,
            false, true, false, 1,
            "0", "1.0", "align!", "myAligner",
            true,
            fasta, output,
            SamPairUtil.PairOrientation.FR,
            MergeBamAlignment.PrimaryAlignmentStrategy.EarliestFragment,
            null, null, null, null);

    Assert.fail("Exception was not thrown");
}
 
Example 15
Source File: MergeBamAlignmentTest.java    From picard with MIT License 4 votes vote down vote up
/**
 * Read a single fragment read from a file, and create one or more aligned records for the read pair based on
 * the lists, merge with the original read, and assert expected results.
 * @param description
 * @param hitSpecs List that describes the aligned SAMRecords to create.
 * @param expectedPrimaryHitIndex Expected value for the HI tag in the primary alignment in the merged output.
 * @param expectedNumReads Expected number of SAMRecords in the merged output.
 * @param expectedPrimaryMapq Mapq of both ends of primary alignment in the merged output.
 * @throws Exception
 */
@Test(dataProvider = "testFragmentMultiHitWithFilteringTestCases")
public void testFragmentMultiHitWithFiltering(final String description, final List<HitSpec> hitSpecs,
                                            final Integer expectedPrimaryHitIndex, final int expectedNumReads,
                                            final int expectedPrimaryMapq) throws Exception {

    // Create the aligned file by copying bases, quals, readname from the unmapped read, and conforming to each HitSpec.
    final File unmappedSam = new File(TEST_DATA_DIR, "multihit.filter.fragment.unmapped.sam");
    final SAMRecordIterator unmappedSamFileIterator = SamReaderFactory.makeDefault().open(unmappedSam).iterator();
    final SAMRecord unmappedRec = unmappedSamFileIterator.next();
    unmappedSamFileIterator.close();
    final File alignedSam = File.createTempFile("aligned.", ".sam");
    alignedSam.deleteOnExit();
    final SAMFileHeader alignedHeader = new SAMFileHeader();
    alignedHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
    alignedHeader.setSequenceDictionary(SamReaderFactory.makeDefault().getFileHeader(sequenceDict).getSequenceDictionary());
    final SAMFileWriter alignedWriter = new SAMFileWriterFactory().makeSAMWriter(alignedHeader, true, alignedSam);
    for (int i = 0; i < hitSpecs.size(); ++i) {
        final HitSpec hitSpec = hitSpecs.get(i);
        final SAMRecord mappedRec = makeRead(alignedHeader, unmappedRec, hitSpec, i);
        if (mappedRec != null) {
            alignedWriter.addAlignment(mappedRec);
        }
    }
    alignedWriter.close();

    // Merge aligned file with original unmapped file.
    final File mergedSam = File.createTempFile("merged.", ".sam");
    mergedSam.deleteOnExit();

    doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam),
            null, null, null, null,
            false, true, false, 1,
            "0", "1.0", "align!", "myAligner",
            false, fasta, mergedSam,
            null, null, null, null, null, null);

    assertSamValid(mergedSam);

    // Tally metrics and check for agreement with expected.
    final SamReader mergedReader = SamReaderFactory.makeDefault().open(mergedSam);
    int numReads = 0;
    Integer primaryHitIndex = null;
    int primaryMapq = 0;
    for (final SAMRecord rec : mergedReader) {
        ++numReads;
        if (!rec.getNotPrimaryAlignmentFlag()  && !rec.getReadUnmappedFlag()) {
            final Integer hitIndex = rec.getIntegerAttribute(SAMTag.HI.name());
            final int newHitIndex = (hitIndex == null? -1: hitIndex);
            Assert.assertNull(primaryHitIndex);
            primaryHitIndex = newHitIndex;
            primaryMapq = rec.getMappingQuality();
        }
    }
    Assert.assertEquals(numReads, expectedNumReads);
    Assert.assertEquals(primaryHitIndex, expectedPrimaryHitIndex);
    Assert.assertEquals(primaryMapq, expectedPrimaryMapq);
}
 
Example 16
Source File: MergeBamAlignmentTest.java    From picard with MIT License 4 votes vote down vote up
/**
 * Read a single paired-end read from a file, and create one or more aligned records for the read pair based on
 * the lists, merge with the original paired-end read, and assert expected results.
 * @param description
 * @param firstOfPair List that describes the aligned SAMRecords to create for the first end.
 * @param secondOfPair List that describes the aligned SAMRecords to create for the second end.
 * @param expectedPrimaryHitIndex Expected value for the HI tag in the primary alignment in the merged output.
 * @param expectedNumFirst Expected number of first-of-pair SAMRecords in the merged output.
 * @param expectedNumSecond Expected number of second-of-pair SAMRecords in the merged output.
 * @param expectedPrimaryMapq Sum of mapqs of both ends of primary alignment in the merged output.
 * @throws Exception
 */
@Test(dataProvider = "testPairedMultiHitWithFilteringTestCases")
public void testPairedMultiHitWithFiltering(final String description, final List<HitSpec> firstOfPair, final List<HitSpec> secondOfPair,
                                            final Integer expectedPrimaryHitIndex, final int expectedNumFirst,
                                            final int expectedNumSecond, final int expectedPrimaryMapq) throws Exception {

    // Create the aligned file by copying bases, quals, readname from the unmapped read, and conforming to each HitSpec.
    final File unmappedSam = new File(TEST_DATA_DIR, "multihit.filter.unmapped.sam");
    final SAMRecordIterator unmappedSamFileIterator = SamReaderFactory.makeDefault().open(unmappedSam).iterator();
    final SAMRecord firstUnmappedRec = unmappedSamFileIterator.next();
    final SAMRecord secondUnmappedRec = unmappedSamFileIterator.next();
    unmappedSamFileIterator.close();
    final File alignedSam = File.createTempFile("aligned.", ".sam");
    alignedSam.deleteOnExit();
    final SAMFileHeader alignedHeader = new SAMFileHeader();
    alignedHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
    alignedHeader.setSequenceDictionary(SamReaderFactory.makeDefault().getFileHeader(sequenceDict).getSequenceDictionary());
    final SAMFileWriter alignedWriter = new SAMFileWriterFactory().makeSAMWriter(alignedHeader, true, alignedSam);
    for (int i = 0; i < Math.max(firstOfPair.size(), secondOfPair.size()); ++i) {
        final HitSpec firstHitSpec = firstOfPair.isEmpty()? null: firstOfPair.get(i);
        final HitSpec secondHitSpec = secondOfPair.isEmpty()? null: secondOfPair.get(i);
        final SAMRecord first = makeRead(alignedHeader, firstUnmappedRec, firstHitSpec, true, i);
        final SAMRecord second = makeRead(alignedHeader, secondUnmappedRec, secondHitSpec, false, i);
        if (first != null && second != null) SamPairUtil.setMateInfo(first, second);
        if (first != null) {
            if (second == null) first.setMateUnmappedFlag(true);
            alignedWriter.addAlignment(first);
        }
        if (second != null) {
            if (first == null) second.setMateUnmappedFlag(true);
            alignedWriter.addAlignment(second);
        }
    }
    alignedWriter.close();

    // Merge aligned file with original unmapped file.
    final File mergedSam = File.createTempFile("merged.", ".sam");
    mergedSam.deleteOnExit();

    doMergeAlignment(unmappedSam, Collections.singletonList(alignedSam),
            null, null, null, null,
            false, true, false, 1,
            "0", "1.0", "align!", "myAligner",
            true, fasta, mergedSam,
            null, null, null, null, null, null);

    assertSamValid(mergedSam);

    // Tally metrics and check for agreement with expected.
    final SamReader mergedReader = SamReaderFactory.makeDefault().open(mergedSam);
    int numFirst = 0;
    int numSecond = 0;
    Integer primaryHitIndex = null;
    int primaryMapq = 0;
    for (final SAMRecord rec : mergedReader) {
        if (rec.getFirstOfPairFlag()) ++numFirst;
        if (rec.getSecondOfPairFlag()) ++numSecond;
        if (!rec.getNotPrimaryAlignmentFlag()  && !rec.getReadUnmappedFlag()) {
            final Integer hitIndex = rec.getIntegerAttribute(SAMTag.HI.name());
            final int newHitIndex = (hitIndex == null? -1: hitIndex);
            if (primaryHitIndex == null) primaryHitIndex = newHitIndex;
            else Assert.assertEquals(newHitIndex, primaryHitIndex.intValue());
            primaryMapq += rec.getMappingQuality();
        }
    }
    Assert.assertEquals(numFirst, expectedNumFirst);
    Assert.assertEquals(numSecond, expectedNumSecond);
    Assert.assertEquals(primaryHitIndex, expectedPrimaryHitIndex);
    Assert.assertEquals(primaryMapq, expectedPrimaryMapq);
}
 
Example 17
Source File: BedToIntervalList.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to.
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());
        header.setSequenceDictionary(samSequenceDictionary);
        // set the sort order to be sorted by coordinate, which is actually done below
        // by getting the .uniqued() intervals list before we write out the file
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        final IntervalList intervalList = new IntervalList(header);

        final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false);
        final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
        final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

        while (iterator.hasNext()) {
            final BEDFeature bedFeature = iterator.next();
            final String sequenceName = bedFeature.getContig();
            final int start = bedFeature.getStart();
            final int end = bedFeature.getEnd();
            // NB: do not use an empty name within an interval
            final String name;
            if (bedFeature.getName().isEmpty()) {
                name = null;
            } else {
                name = bedFeature.getName();
            }

            final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

            // Do some validation
            if (null == sequenceRecord) {
                if (DROP_MISSING_CONTIGS) {
                    LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd()));
                    missingIntervals++;
                    missingRegion += bedFeature.getEnd() - bedFeature.getStart();
                    continue;
                }
                throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
            } else if (start < 1) {
                throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
            } else if (sequenceRecord.getSequenceLength() < start) {
                throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
            } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig
                    || end < 0 ) {
                throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
            } else if (sequenceRecord.getSequenceLength() < end) {
                throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
            } else if (end < start - 1) {
                throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
            }

            final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
            final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
            intervalList.add(interval);

            progressLogger.record(sequenceName, start);
        }
        CloserUtil.close(bedReader);

        if (DROP_MISSING_CONTIGS) {
            if (missingRegion == 0) {
                LOG.info("There were no missing regions.");
            } else {
                LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
            }
        }
        // Sort and write the output
        IntervalList out = intervalList;
        if (SORT) {
            out = out.sorted();
        }
        if (UNIQUE) {
            out = out.uniqued();
        }
        out.write(OUTPUT);
        LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount()));

    } catch (final IOException e) {
        throw new RuntimeException(e);
    }

    return 0;
}
 
Example 18
Source File: AlignmentsTagsTest.java    From cramtools with Apache License 2.0 4 votes vote down vote up
private void doTest(byte[] ref, int alignmentStart, byte[] readBases) throws IOException,
		CloneNotSupportedException {
	SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("1", ref.length);
	SAMSequenceDictionary sequenceDictionary = new SAMSequenceDictionary();
	sequenceDictionary.addSequence(sequenceRecord);

	SAMFileHeader header = new SAMFileHeader();
	header.setSequenceDictionary(sequenceDictionary);
	SAMRecord samRecord = new SAMRecord(header);
	samRecord.setReadUnmappedFlag(false);
	samRecord.setAlignmentStart(alignmentStart);
	samRecord.setReferenceIndex(0);
	samRecord.setReadBases(readBases);
	samRecord.setCigarString(samRecord.getReadLength() + "M");

	ReferenceSource referenceSource = new ReferenceSource() {
		@Override
		public synchronized ReferenceRegion getRegion(SAMSequenceRecord record, int start_1based,
				int endInclusive_1based) throws IOException {
			int zbInclusiveStart = start_1based - 1;
			int zbExlcusiveEnd = endInclusive_1based;
			return new ReferenceRegion(Arrays.copyOfRange(ref, zbInclusiveStart, zbExlcusiveEnd),
					sequenceRecord.getSequenceIndex(), sequenceRecord.getSequenceName(), start_1based);
		}
	};

	AlignmentsTags.calculateMdAndNmTags(samRecord, referenceSource, sequenceDictionary, true, true);

	SAMRecord checkRecord = (SAMRecord) samRecord.clone();
	SequenceUtil.calculateMdAndNmTags(checkRecord, ref, true, true);
	// System.out.printf("TEST: ref %s, start %d, read bases %s\n", new
	// String(ref), alignmentStart, new String(
	// readBases));
	// System.out
	// .println(referenceSource.getRegion(sequenceRecord, alignmentStart,
	// alignmentStart + readBases.length));
	// System.out.printf("NM:  %s x %s\n", samRecord.getAttribute("NM"),
	// checkRecord.getAttribute("NM"));
	// System.out.printf("MD: %s x %s\n", samRecord.getAttribute("MD"),
	// checkRecord.getAttribute("MD"));

	Assert.assertEquals(checkRecord.getAttribute("NM"), samRecord.getAttribute("NM"));
	Assert.assertEquals(checkRecord.getAttribute("MD"), samRecord.getAttribute("MD"));
}
 
Example 19
Source File: CreateSnpIntervalFromVcf.java    From Drop-seq with MIT License 4 votes vote down vote up
public IntervalList processData(final File vcfFile, final File sdFile, final Set<String> sample, int GQThreshold, final boolean hetSNPsOnly) {

		final VCFFileReader reader = new VCFFileReader(vcfFile, false);
		if (!VCFUtils.GQInHeader(reader)) {
			GQThreshold=-1;
			log.info("Genotype Quality [GQ] not found in header.  Disabling GQ_THRESHOLD parameter");
		}
		
		final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
		SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
		Set<String> sampleListFinal = sample;
		if (sample==null || sample.isEmpty()) {
			ArrayList<String> s = reader.getFileHeader().getSampleNamesInOrder();
			sampleListFinal=new TreeSet<String>(s);
		}

		if (sdFile != null)
			sequenceDictionary = getSequenceDictionary(sdFile);

		final ProgressLogger progress = new ProgressLogger(this.log, 500000);

		final SAMFileHeader samHeader = new SAMFileHeader();
		samHeader.setSequenceDictionary(sequenceDictionary);
		IntervalList result = new IntervalList(samHeader);

		// Go through the input, find sites we want to keep.
		final PeekableIterator<VariantContext> iterator = new PeekableIterator<>(reader.iterator());

		validateRequestedSamples (iterator, sampleListFinal);

		while (iterator.hasNext()) {
			final VariantContext site = iterator.next();
			progress.record(site.getContig(), site.getStart());
			// for now drop any filtered site.
			if (site.isFiltered())
				continue;
			// move onto the next record if the site is not a SNP or the samples aren't all heterozygous.
			if (!site.isSNP())
				continue;
			if (!sitePassesFilters(site, sampleListFinal, GQThreshold, hetSNPsOnly))
				continue;
			Interval varInt = new Interval(site.getContig(), site.getStart(),
					site.getEnd(), true, site.getID());

			// final Interval site = findHeterozygousSites(full, SAMPLE);
			result.add(varInt);

		}

		CloserUtil.close(iterator);
		CloserUtil.close(reader);
		return (result);
	}
 
Example 20
Source File: SingleCellRnaSeqMetricsCollector.java    From Drop-seq with MIT License 4 votes vote down vote up
/**
    * Sets up the reads in cell barcode order.
    * Only adds reads that pass the map quality and are in the set of cell barcodes requested.
    *
    * I've tried adapting this to the TagOrderIterator API, but it seems like I need to add the read groups to the header of the temporary BAM that gets
    * iterated on or this doesn't work.
    */
   private CloseableIterator<SAMRecord> getReadsInTagOrder (final File bamFile, final String primaryTag,
                                                            final List<SAMReadGroupRecord> rg,
                                                            final List<String> allCellBarcodes, final int mapQuality) {

	SamReader reader = SamReaderFactory.makeDefault().open(bamFile);
	SAMSequenceDictionary dict= reader.getFileHeader().getSequenceDictionary();
	List<SAMProgramRecord> programs =reader.getFileHeader().getProgramRecords();

	final Set<String> cellBarcodeSet = new HashSet<> (allCellBarcodes);

	final SAMFileHeader writerHeader = new SAMFileHeader();
	// reader.getFileHeader().setReadGroups(rg);
	for (SAMReadGroupRecord z: rg) {
		reader.getFileHeader().addReadGroup(z);
		writerHeader.addReadGroup(z);
	}
       writerHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
       writerHeader.setSequenceDictionary(dict);
       for (SAMProgramRecord spr : programs)
		writerHeader.addProgramRecord(spr);

       // This not only filters, but sets the RG attribute on reads it allows through.
       final FilteredIterator<SAMRecord> rgAddingFilter = new FilteredIterator<SAMRecord>(reader.iterator()) {
           @Override
           public boolean filterOut(final SAMRecord r) {
               String cellBarcode = r.getStringAttribute(primaryTag);
               if (cellBarcodeSet.contains(cellBarcode) & r.getMappingQuality() >= mapQuality) {
                   r.setAttribute("RG", cellBarcode);
                   return false;
               } else
				return true;
           }
       };

       ProgressLogger p = new ProgressLogger(log, 1000000, "Preparing reads in core barcodes");
       CloseableIterator<SAMRecord> sortedIterator = SamRecordSortingIteratorFactory.create(writerHeader, rgAddingFilter, new StringTagComparator(primaryTag), p);

	log.info("Sorting finished.");
	return (sortedIterator);
}