Java Code Examples for htsjdk.samtools.SAMSequenceDictionary#getSequenceIndex()

The following examples show how to use htsjdk.samtools.SAMSequenceDictionary#getSequenceIndex() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IntervalTagComparator.java    From Drop-seq with MIT License 6 votes vote down vote up
public static int compare (final Interval i1, final Interval i2, final SAMSequenceDictionary dict) {
 	int result = 0;
 	// if there's a sequence dictionary, compare on the index of the sequence instead of the contig name.
 	if (dict!=null) {
 		int seqIdx1 = dict.getSequenceIndex(i1.getContig());
 		int seqIdx2 = dict.getSequenceIndex(i2.getContig());
 		result = seqIdx1 - seqIdx2;
 	} else
result = i1.getContig().compareTo(i2.getContig());
 	// same as Interval.compareTo
 	if (result == 0) {
         if (i1.getStart() == i2.getStart())
	result = i1.getEnd() - i2.getEnd();
else
	result = i1.getStart() - i2.getStart();
         // added bonus, sort on interval names to tie break, if both intervals have names.
         if (result==0) {
         	String n1 = i1.getName();
         	String n2 = i2.getName();
         	if (n1!=null && n2!=null)
		result = n1.compareTo(n2);
         }
     }
 	return result;
 }
 
Example 2
Source File: CollectSamErrorMetrics.java    From picard with MIT License 6 votes vote down vote up
/**
 * Compares a VariantContext to a Locus providing information regarding possible overlap, or relative location
 *
 * @param dictionary     The {@link SAMSequenceDictionary} to use for ordering the sequences
 * @param variantContext the {@link VariantContext} to compare
 * @param locus          the {@link Locus} to compare
 * @return negative if variantContext comes before locus (with no overlap)
 * zero if variantContext and locus overlap
 * positive if variantContext comes after locus (with no overlap)
 * <p/>
 * if variantContext and locus are in the same contig the return value will be the number of bases apart they are,
 * otherwise it will be MIN_INT/MAX_INT
 */
public static int CompareVariantContextToLocus(final SAMSequenceDictionary dictionary, final VariantContext variantContext, final Locus locus) {

    final int indexDiff = dictionary.getSequenceIndex(variantContext.getContig()) - locus.getSequenceIndex();
    if (indexDiff != 0) {
        return indexDiff < 0 ? Integer.MIN_VALUE : Integer.MAX_VALUE;
    }

    //same SequenceIndex, can compare by genomic position
    if (locus.getPosition() < variantContext.getStart()) {
        return variantContext.getStart() - locus.getPosition();
    }
    if (locus.getPosition() > variantContext.getEnd()) {
        return variantContext.getEnd() - locus.getPosition();
    }
    return 0;
}
 
Example 3
Source File: BAMInputFormat.java    From Hadoop-BAM with MIT License 6 votes vote down vote up
/**
 * Converts an interval in SimpleInterval format into an htsjdk QueryInterval.
 *
 * In doing so, a header lookup is performed to convert from contig name to index
 *
 * @param interval interval to convert
 * @param sequenceDictionary sequence dictionary used to perform the conversion
 * @return an equivalent interval in QueryInterval format
 */
private static QueryInterval convertSimpleIntervalToQueryInterval( final Interval interval,	final SAMSequenceDictionary sequenceDictionary ) {
	if (interval == null) {
		throw new IllegalArgumentException("interval may not be null");
	}
	if (sequenceDictionary == null) {
		throw new IllegalArgumentException("sequence dictionary may not be null");
	}

	final int contigIndex = sequenceDictionary.getSequenceIndex(interval.getContig());
	if ( contigIndex == -1 ) {
		throw new IllegalArgumentException("Contig " + interval.getContig() + " not present in reads sequence " +
				"dictionary");
	}

	return new QueryInterval(contigIndex, interval.getStart(), interval.getEnd());
}
 
Example 4
Source File: PairedEndAndSplitReadEvidenceCollection.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@VisibleForTesting
public DiscordantRead getReportableDiscordantReadPair(final GATKRead read, final Set<String> observedDiscordantNamesAtThisLocus,
                                                      final SAMSequenceDictionary samSequenceDictionary) {
    final int readSeqId = samSequenceDictionary.getSequenceIndex(read.getContig());
    final int mateSeqId = samSequenceDictionary.getSequenceIndex(read.getMateContig());
    if (readSeqId < mateSeqId) {
        return new DiscordantRead(read);
    } else if (readSeqId == mateSeqId) {
        if (read.getStart() < read.getMateStart()) {
            return new DiscordantRead(read);
        } else if (read.getStart() == read.getMateStart()) {
            final boolean seenBefore = observedDiscordantNamesAtThisLocus.remove(read.getName());
            if (! seenBefore) {
                final DiscordantRead discordantRead = new DiscordantRead(read);
                observedDiscordantNamesAtThisLocus.add(read.getName());
                return discordantRead;
            }
        }
    }
    return null;
}
 
Example 5
Source File: FindBreakpointEvidenceSpark.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/** Read a file of contig names that will be ignored when checking for inter-contig pairs. */
private static Set<Integer> readCrossContigsToIgnoreFile( final String crossContigsToIgnoreFile,
                                                          final SAMSequenceDictionary dictionary ) {
    final Set<Integer> ignoreSet = new HashSet<>();
    try ( final BufferedReader rdr =
                  new BufferedReader(
                          new InputStreamReader(BucketUtils.openFile(crossContigsToIgnoreFile))) ) {
        String line;
        while ( (line = rdr.readLine()) != null ) {
            final int tigId = dictionary.getSequenceIndex(line);
            if ( tigId == -1 ) {
                throw new UserException("crossContigToIgnoreFile contains an unrecognized contig name: "+line);
            }
            ignoreSet.add(tigId);
        }
    }
    catch ( final IOException ioe ) {
        throw new UserException("Can't read crossContigToIgnore file "+crossContigsToIgnoreFile, ioe);
    }
    return ignoreSet;
}
 
Example 6
Source File: ReadsAtLocus.java    From abra2 with MIT License 5 votes vote down vote up
public int compareLoci(ReadsAtLocus that, SAMSequenceDictionary dict) {
	int compare = dict.getSequenceIndex(this.getChromosome()) - dict.getSequenceIndex(that.getChromosome());
	if (compare == 0) {
		compare = this.getPosition() - that.getPosition();
	}
	
	return compare;
}
 
Example 7
Source File: FingerprintUtils.java    From picard with MIT License 5 votes vote down vote up
VariantContextSet(final SAMSequenceDictionary dict) {
    super((lhs, rhs) -> {
        final int lhsContig = dict.getSequenceIndex(lhs.getContig());
        final int rhsContig = dict.getSequenceIndex(rhs.getContig());

        final int retval = lhsContig - rhsContig;
        if (retval != 0) return retval;

        return lhs.getStart() - rhs.getStart();
    });
}
 
Example 8
Source File: SparkSharder.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * @return <code>true</code> if the locatable is to the right of the given interval
 */
private static <I extends Locatable, L extends Locatable> boolean toRightOf(I interval, L locatable, SAMSequenceDictionary sequenceDictionary) {
    int intervalContigIndex = sequenceDictionary.getSequenceIndex(interval.getContig());
    int locatableContigIndex = sequenceDictionary.getSequenceIndex(locatable.getContig());
    return (intervalContigIndex == locatableContigIndex && interval.getEnd() < locatable.getStart()) // locatable on same contig, to the right
            || intervalContigIndex < locatableContigIndex; // locatable on subsequent contig
}
 
Example 9
Source File: IntervalUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Determines the relative contig ordering of first and second using the provided sequence dictionary
 *
 * @param first first Locatable
 * @param second second Locatable
 * @param dictionary sequence dictionary used to determine contig ordering
 * @return 0 if the two contigs are the same, a negative value if first's contig comes before second's contig,
 *         or a positive value if first's contig comes after second's contig
 */
public static int compareContigs(final Locatable first, final Locatable second, final SAMSequenceDictionary dictionary) {
    Utils.nonNull(first);
    Utils.nonNull(second);
    Utils.nonNull(dictionary);

    final int firstRefIndex = dictionary.getSequenceIndex(first.getContig());
    final int secondRefIndex = dictionary.getSequenceIndex(second.getContig());
    if (firstRefIndex == -1 || secondRefIndex == -1) {
        throw new IllegalArgumentException("Can't do comparison because Locatables' contigs not found in sequence dictionary");
    }
    // compare the contigs
    return Integer.compare(firstRefIndex, secondRefIndex);
}
 
Example 10
Source File: IntervalUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Converts an interval in SimpleInterval format into an htsjdk QueryInterval.
 *
 * In doing so, a header lookup is performed to convert from contig name to index
 *
 * @param interval interval to convert
 * @param sequenceDictionary sequence dictionary used to perform the conversion
 * @return an equivalent interval in QueryInterval format
 */
public static QueryInterval convertSimpleIntervalToQueryInterval( final SimpleInterval interval, final SAMSequenceDictionary sequenceDictionary ) {
    Utils.nonNull(interval);
    Utils.nonNull(sequenceDictionary);

    final int contigIndex = sequenceDictionary.getSequenceIndex(interval.getContig());
    if ( contigIndex == -1 ) {
        throw new UserException("Contig " + interval.getContig() + " not present in reads sequence dictionary");
    }

    return new QueryInterval(contigIndex, interval.getStart(), interval.getEnd());
}
 
Example 11
Source File: SVVCFReader.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public static SVIntervalTree<String> readBreakpointsFromTruthVCF(final String truthVCF,
                                                                 final SAMSequenceDictionary dictionary,
                                                                 final int padding ) {
    SVIntervalTree<String> breakpoints = new SVIntervalTree<>();
    try ( final FeatureDataSource<VariantContext> dataSource =
                  new FeatureDataSource<>(truthVCF, null, 0, VariantContext.class) ) {
        for ( final VariantContext vc : dataSource ) {
            final StructuralVariantType svType = vc.getStructuralVariantType();
            if ( svType == null ) continue;
            final String eventName = vc.getID();
            final int contigID = dictionary.getSequenceIndex(vc.getContig());
            if ( contigID < 0 ) {
                throw new UserException("VCF contig " + vc.getContig() + " does not appear in dictionary.");
            }
            final int start = vc.getStart();
            switch ( svType ) {
                case DEL:
                case INV:
                case CNV:
                    final int end = vc.getEnd();
                    breakpoints.put(new SVInterval(contigID,start-padding, end+padding), eventName);
                    break;
                case INS:
                case DUP:
                case BND:
                    breakpoints.put(new SVInterval(contigID,start-padding, start+padding), eventName);
                    break;
            }
        }
    }
    return breakpoints;
}
 
Example 12
Source File: GatherGeneGCLength.java    From Drop-seq with MIT License 4 votes vote down vote up
@Override
   protected int doWork() {

       IOUtil.assertFileIsReadable(ANNOTATIONS_FILE);
       IOUtil.assertFileIsWritable(this.OUTPUT);
       PrintStream out = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(OUTPUT));
       writeHeader(out);

       PrintStream outTranscript = null;
       if (this.OUTPUT_TRANSCRIPT_LEVEL!=null) {
		outTranscript = new ErrorCheckingPrintStream(IOUtil.openFileForWriting(OUTPUT_TRANSCRIPT_LEVEL));
		writeHeaderTranscript(outTranscript);
       }

       FastaSequenceFileWriter  outSequence = null;

       if (this.OUTPUT_TRANSCRIPT_SEQUENCES!=null) {
       	IOUtil.assertFileIsWritable(this.OUTPUT_TRANSCRIPT_SEQUENCES);
		outSequence = new FastaSequenceFileWriter (this.OUTPUT_TRANSCRIPT_SEQUENCES);
       }
       ReferenceSequenceFileWalker refFileWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);

       SAMSequenceDictionary dict= refFileWalker.getSequenceDictionary();
       if (dict==null) {
       	CloserUtil.close(refFileWalker);
       	throw new IllegalArgumentException("Reference file" + this.REFERENCE_SEQUENCE.getAbsolutePath()+" is missing a dictionary file [.dict].  Please make one!");
       }

       OverlapDetector<Gene> geneOverlapDetector= GeneAnnotationReader.loadAnnotationsFile(ANNOTATIONS_FILE, dict);

       List<SAMSequenceRecord> records = dict.getSequences();

	for (SAMSequenceRecord record: records) {
		String seqName = record.getSequenceName();
		int seqIndex=dict.getSequenceIndex(seqName);
		ReferenceSequence fastaRef=refFileWalker.get(seqIndex);

		// get the genes for this contig.
		Interval i = new Interval(seqName, 1, record.getSequenceLength());
		Collection< Gene> genes = geneOverlapDetector.getOverlaps(i);
		for (Gene g: genes) {
			List<GCResult> gcList = calculateGCContentGene(g, fastaRef, dict);
			if (this.OUTPUT_TRANSCRIPT_LEVEL!=null)
				writeResultTranscript(gcList, outTranscript);
			GCIsoformSummary summary = new GCIsoformSummary(g, gcList);
			if (this.OUTPUT_TRANSCRIPT_SEQUENCES!=null)
				writeTranscriptSequence(g, fastaRef, dict, outSequence);

			GCResult gc = calculateGCContentUnionExons(g, fastaRef, dict);

			writeResult(gc, summary, out);
		}
	}
	CloserUtil.close(refFileWalker);
	CloserUtil.close(out);
	if (this.OUTPUT_TRANSCRIPT_LEVEL!=null) CloserUtil.close(outTranscript);
	if (this.OUTPUT_TRANSCRIPT_SEQUENCES!=null) CloserUtil.close(outSequence);
       return 0;
}
 
Example 13
Source File: GenomeSJ.java    From halvade with GNU General Public License v3.0 4 votes vote down vote up
public void parseSJString(String sjString, SAMSequenceDictionary dict) {
    String columns[] = sjString.split("\t");
    this.type = -1;
    this.secondary_key = dict.getSequenceIndex(columns[0]);
}