Java Code Examples for htsjdk.samtools.SAMSequenceRecord#getSequenceIndex()

The following examples show how to use htsjdk.samtools.SAMSequenceRecord#getSequenceIndex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GenomeLocParser.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Creates a loc to the left (starting at the loc start + 1) of maxBasePairs size.
 * @param loc The original loc
 * @param maxBasePairs The maximum number of basePairs
 * @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
 */
public GenomeLoc createGenomeLocAtStart(final GenomeLoc loc, final int maxBasePairs) {
    if (GenomeLoc.isUnmapped(loc))
        return null;
    final String contigName = loc.getContig();
    final SAMSequenceRecord contig = contigInfo.getSequence(contigName);
    final int contigIndex = contig.getSequenceIndex();

    int start = loc.getStart() - maxBasePairs;
    int stop = loc.getStart() - 1;

    if (start < 1)
        start = 1;
    if (stop < 1)
        return null;

    return createGenomeLoc(contigName, contigIndex, start, stop, true);
}
 
Example 2
Source File: GenomeLocParser.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Creates a loc to the right (starting at the loc stop + 1) of maxBasePairs size.
 * @param loc The original loc
 * @param maxBasePairs The maximum number of basePairs
 * @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the end of the contig.
 */
public GenomeLoc createGenomeLocAtStop(final GenomeLoc loc, final int maxBasePairs) {
    if (GenomeLoc.isUnmapped(loc))
        return null;
    String contigName = loc.getContig();
    SAMSequenceRecord contig = contigInfo.getSequence(contigName);
    int contigIndex = contig.getSequenceIndex();
    int contigLength = contig.getSequenceLength();

    int start = loc.getStop() + 1;
    int stop = loc.getStop() + maxBasePairs;

    if (start > contigLength)
        return null;
    if (stop > contigLength)
        stop = contigLength;

    return createGenomeLoc(contigName, contigIndex, start, stop, true);
}
 
Example 3
Source File: VcfUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static VCFContigHeaderLine makeContigHeaderLine(final SAMSequenceRecord contig, final String assembly) {
    final Map<String, String> map = new LinkedHashMap<>(3);
    map.put(GATKVCFConstants.CONTIG_ID_KEY, contig.getSequenceName());
    map.put(GATKVCFConstants.CONTIG_LENGTH_KEY, String.valueOf(contig.getSequenceLength()));
    if (assembly != null) {
        map.put(GATKVCFConstants.ASSEMBLY_NAME_KEY, assembly);
    }
    return new VCFContigHeaderLine(map, contig.getSequenceIndex());
}
 
Example 4
Source File: SequenceDictionaryUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Checks whether the common contigs in the given sequence dictionaries occur at the same indices
 * in both dictionaries
 *
 * @param commonContigs Set of names of the contigs that occur in both dictionaries
 * @param dict1 first sequence dictionary
 * @param dict2 second sequence dictionary
 * @return true if the contigs common to dict1 and dict2 occur at the same indices in both dictionaries,
 *         otherwise false
 */
private static boolean commonContigsAreAtSameIndices( final Set<String> commonContigs, final SAMSequenceDictionary dict1, final SAMSequenceDictionary dict2 ) {
    for ( String commonContig : commonContigs ) {
        SAMSequenceRecord dict1Record = dict1.getSequence(commonContig);
        SAMSequenceRecord dict2Record = dict2.getSequence(commonContig);

        // Each common contig must have the same index in both dictionaries
        if ( dict1Record.getSequenceIndex() != dict2Record.getSequenceIndex() ) {
            return false;
        }
    }

    return true;
}
 
Example 5
Source File: GenomeLocParser.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * validate a position or interval on the genome as valid
 *
 * Requires that contig exist in the master sequence dictionary, and that contig index be valid as well.  Requires
 * that start <= stop.
 *
 * if mustBeOnReference is true,
 * performs boundary validation for genome loc INTERVALS:
 * start and stop are on contig and start <= stop
 *
 * @param contig the contig name
 * @param start  the start position
 * @param stop   the stop position
 *
 * @return the interned contig name, an optimization that ensures that contig == the string in the sequence dictionary
 */
protected String validateGenomeLoc(final String contig, final int contigIndex, final int start, final int stop, final boolean mustBeOnReference) {
    if ( validationLevel == ValidationLevel.NONE )
        return contig;
    else {
        if (stop < start)
            vglHelper(String.format("The stop position %d is less than start %d in contig %s", stop, start, contig));

        final SAMSequenceRecord contigInfo = this.contigInfo.getSequence(contig);
        if ( contigInfo.getSequenceIndex() != contigIndex )
            vglHelper(String.format("The contig index %d is bad, doesn't equal the contig index %d of the contig from a string %s",
                    contigIndex, contigInfo.getSequenceIndex(), contig));

        if ( mustBeOnReference ) {
            if (start < 1)
                vglHelper(String.format("The start position %d is less than 1", start));

            if (stop < 1)
                vglHelper(String.format("The stop position %d is less than 1", stop));

            final int contigSize = contigInfo.getSequenceLength();
            if (contigSize == SAMSequenceRecord.UNKNOWN_SEQUENCE_LENGTH) {
                logger.warn(String.format("The available sequence dictionary does not contain a sequence length for contig (%s). " +
                        "Skipping validation of the genome loc end coordinate (%d).",
                        contig, stop));
            }
            else if (start > contigSize || stop > contigSize) {
                vglHelper(String.format("The genome loc coordinates %d-%d exceed the contig size (%d)", start, stop, contigSize));
            }
        }

        return contigInfo.getSequenceName();
    }
}
 
Example 6
Source File: MRUCachingSAMSequenceDictionary.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * The key algorithm.  Given a new record, update the last used record, contig
 * name, and index.
 *
 * @param contig the contig we want to look up.  If null, index is used instead
 * @param index the contig index we want to look up.  Only used if contig is null
 * @throws GATKException if index isn't present in the dictionary
 * @return the SAMSequenceRecord for contig / index
 */
private SAMSequenceRecord updateCache(final String contig, int index ) {
    SAMSequenceRecord rec = contig == null ? dict.getSequence(index) : dict.getSequence(contig);
    if ( rec == null ) {
        throw new GATKException("BUG: requested unknown contig=" + contig + " index=" + index);
    } else {
        lastSSR = rec;
        lastContig = rec.getSequenceName();
        lastIndex = rec.getSequenceIndex();
        return rec;
    }
}
 
Example 7
Source File: AlignmentsTagsTest.java    From cramtools with Apache License 2.0 4 votes vote down vote up
private void doTest(byte[] ref, int alignmentStart, byte[] readBases) throws IOException,
		CloneNotSupportedException {
	SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("1", ref.length);
	SAMSequenceDictionary sequenceDictionary = new SAMSequenceDictionary();
	sequenceDictionary.addSequence(sequenceRecord);

	SAMFileHeader header = new SAMFileHeader();
	header.setSequenceDictionary(sequenceDictionary);
	SAMRecord samRecord = new SAMRecord(header);
	samRecord.setReadUnmappedFlag(false);
	samRecord.setAlignmentStart(alignmentStart);
	samRecord.setReferenceIndex(0);
	samRecord.setReadBases(readBases);
	samRecord.setCigarString(samRecord.getReadLength() + "M");

	ReferenceSource referenceSource = new ReferenceSource() {
		@Override
		public synchronized ReferenceRegion getRegion(SAMSequenceRecord record, int start_1based,
				int endInclusive_1based) throws IOException {
			int zbInclusiveStart = start_1based - 1;
			int zbExlcusiveEnd = endInclusive_1based;
			return new ReferenceRegion(Arrays.copyOfRange(ref, zbInclusiveStart, zbExlcusiveEnd),
					sequenceRecord.getSequenceIndex(), sequenceRecord.getSequenceName(), start_1based);
		}
	};

	AlignmentsTags.calculateMdAndNmTags(samRecord, referenceSource, sequenceDictionary, true, true);

	SAMRecord checkRecord = (SAMRecord) samRecord.clone();
	SequenceUtil.calculateMdAndNmTags(checkRecord, ref, true, true);
	// System.out.printf("TEST: ref %s, start %d, read bases %s\n", new
	// String(ref), alignmentStart, new String(
	// readBases));
	// System.out
	// .println(referenceSource.getRegion(sequenceRecord, alignmentStart,
	// alignmentStart + readBases.length));
	// System.out.printf("NM:  %s x %s\n", samRecord.getAttribute("NM"),
	// checkRecord.getAttribute("NM"));
	// System.out.printf("MD: %s x %s\n", samRecord.getAttribute("MD"),
	// checkRecord.getAttribute("MD"));

	Assert.assertEquals(checkRecord.getAttribute("NM"), samRecord.getAttribute("NM"));
	Assert.assertEquals(checkRecord.getAttribute("MD"), samRecord.getAttribute("MD"));
}