Java Code Examples for htsjdk.samtools.util.Interval

The following examples show how to use htsjdk.samtools.util.Interval. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Hadoop-BAM   Source File: TestBAMInputFormat.java    License: MIT License 6 votes vote down vote up
@Test
public void testIntervalsAndUnmapped() throws Exception {
  input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
      .getAbsolutePath();
  List<Interval> intervals = new ArrayList<Interval>();
  intervals.add(new Interval("chr21", 5000, 9999)); // includes two unpaired fragments
  intervals.add(new Interval("chr21", 20000, 22999));

  completeSetupWithBoundedTraversal(intervals, true);

  jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000);
  BAMInputFormat inputFormat = new BAMInputFormat();
  List<InputSplit> splits = inputFormat.getSplits(jobContext);
  assertEquals(2, splits.size());
  List<SAMRecord> split0Records = getSAMRecordsFromSplit(inputFormat, splits.get(0));
  List<SAMRecord> split1Records = getSAMRecordsFromSplit(inputFormat, splits.get(1));
  assertEquals(16, split0Records.size());
  assertEquals(2, split1Records.size());
}
 
Example 2
Source Project: Drop-seq   Source File: SNPUMICellReadIteratorWrapper.java    License: MIT License 6 votes vote down vote up
/**
 * Filters and copies reads for generating SNPCellUMIBasePileups.
 * Filters out reads in which the read cell barcode does not match a barcode in the list (if list is not null)
 * Reads that are marked as secondary or supplementary are filtered out
 * Filters reads based on read map quality, removing reads below that quality
 * Optionally filters reads where the annotated gene and the strand of the read don't match, or can clone a read and return it multiple times
 * if the read maps to more than one gene and <assignReadsToAllGenes> is true.
 * @param underlyingIterator Source of reads
 * @param cellBarcodeTag The cell barcode BAM tag
 * @param cellBarcodeList A list of cell barcodes, or null to ignore.  If populated, reads where the cellBarcodeTag matches one of these Strings will be retained
 * @param geneTag The gene/exon tag.
 * @param strandTag The strand tag
 * @param readMQ The minimum map quality of a read to be retained.
 */
public SNPUMICellReadIteratorWrapper(final Iterator<SAMRecord> underlyingIterator,
                                        final IntervalList snpIntervals,
                                        final String cellBarcodeTag,
                                        final Collection<String> cellBarcodeList,
                                        final String geneTag,
                                        final String snpTag,
                                        final int readMQ) {
       super(underlyingIterator);
	this.cellBarcodeTag = cellBarcodeTag;
	this.cellBarcodeList = new HashSet<String>(cellBarcodeList);
	this.geneTag=geneTag;
	this.snpTag=snpTag;


	// construct OverlapDetector
	OverlapDetector<Interval> od = new OverlapDetector<>(0, 0);
	od.addAll(snpIntervals.getIntervals(), snpIntervals.getIntervals());
	this.snpIntervals=od;
}
 
Example 3
Source Project: Drop-seq   Source File: MultiCellDigitalAlleleCountsIterator.java    License: MIT License 6 votes vote down vote up
@Override
public MultiCellDigitalAlleleCounts next() {
	if (!iter.hasNext()) return (null);
	
	// get the pileup object and make the initial DAC with the first pileup.
	DigitalAlleleCounts dac = iter.next();
	
	MultiCellDigitalAlleleCounts multiDAC = new MultiCellDigitalAlleleCounts(dac.getSnpInterval(), dac.getGene());
	String currentGene = dac.getGene();
	Interval currentSNP = dac.getSnpInterval();
	multiDAC.add(dac);
	
	// get SNPUMIBasePileup objects until the gene, cell, or snpInterval changes.
	while (this.iter.hasNext()) {
		dac=iter.peek(); // check the next object to see if it should be added.
		if (dac.getGene().equals(currentGene) && dac.getSnpInterval().equals(currentSNP)) {
			dac=iter.next(); // convert from the peeked object to the real object to advance the iterator.
			multiDAC.add(dac);
		} else {
			 break; // the next peeked object was a a pileup for a different DAC, quit the loop.
		}
	}		
	return (multiDAC);
			
	
}
 
Example 4
Source Project: gatk   Source File: FilterIntervals.java    License: BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Object doWork() {
    validateArguments();

    resolveAndValidateIntervals();

    final SimpleIntervalCollection filteredIntervals = filterIntervals();
    logger.info(String.format("Writing filtered intervals to %s...", outputFilteredIntervalsFile.getAbsolutePath()));
    final IntervalList filteredIntervalList = new IntervalList(filteredIntervals.getMetadata().getSequenceDictionary());
    filteredIntervals.getIntervals().forEach(i -> filteredIntervalList.add(new Interval(i)));
    filteredIntervalList.write(outputFilteredIntervalsFile);

    logger.info(String.format("%s complete.", getClass().getSimpleName()));

    return null;
}
 
Example 5
Source Project: Drop-seq   Source File: IntervalTagComparator.java    License: MIT License 6 votes vote down vote up
public static int compare (final Interval i1, final Interval i2, final SAMSequenceDictionary dict) {
 	int result = 0;
 	// if there's a sequence dictionary, compare on the index of the sequence instead of the contig name.
 	if (dict!=null) {
 		int seqIdx1 = dict.getSequenceIndex(i1.getContig());
 		int seqIdx2 = dict.getSequenceIndex(i2.getContig());
 		result = seqIdx1 - seqIdx2;
 	} else
result = i1.getContig().compareTo(i2.getContig());
 	// same as Interval.compareTo
 	if (result == 0) {
         if (i1.getStart() == i2.getStart())
	result = i1.getEnd() - i2.getEnd();
else
	result = i1.getStart() - i2.getStart();
         // added bonus, sort on interval names to tie break, if both intervals have names.
         if (result==0) {
         	String n1 = i1.getName();
         	String n2 = i2.getName();
         	if (n1!=null && n2!=null)
		result = n1.compareTo(n2);
         }
     }
 	return result;
 }
 
Example 6
Source Project: Hadoop-BAM   Source File: TestVCFInputFormat.java    License: MIT License 6 votes vote down vote up
@Parameterized.Parameters
    public static Collection<Object> data() {
        return Arrays.asList(new Object[][] {
            {"test.vcf", NUM_SPLITS.ANY, null},
            {"test.vcf.gz", NUM_SPLITS.EXACTLY_ONE, null},
            {"test.vcf.bgzf.gz", NUM_SPLITS.ANY, null},
            // BCF tests currently fail due to https://github.com/samtools/htsjdk/issues/507
//            {"test.uncompressed.bcf", NUM_SPLITS.ANY, null},
//            {"test.bgzf.bcf", NUM_SPLITS.ANY, null},
            {"HiSeq.10000.vcf", NUM_SPLITS.MORE_THAN_ONE, null},
            {"HiSeq.10000.vcf.gz", NUM_SPLITS.EXACTLY_ONE, null},
            {"HiSeq.10000.vcf.bgzf.gz", NUM_SPLITS.MORE_THAN_ONE, null},
            {"HiSeq.10000.vcf.bgzf.gz", NUM_SPLITS.EXACTLY_ONE,
                new Interval("chr1", 2700000, 2800000)}, // chosen to fall in one split
            {"HiSeq.10000.vcf.bgz", NUM_SPLITS.MORE_THAN_ONE, null},
            {"HiSeq.10000.vcf.bgz", NUM_SPLITS.EXACTLY_ONE,
                new Interval("chr1", 2700000, 2800000)} // chosen to fall in one split
        });
    }
 
Example 7
Source Project: Drop-seq   Source File: SNPUMIBasePileupTest.java    License: MIT License 6 votes vote down vote up
@Test(enabled=true)
public void testAllReadsPileup() {
	SamReader reader = SamReaderFactory.makeDefault().open(smallBAMFile);
	Iterator<SAMRecord> iter = reader.iterator();
	int snpPos=76227022;
	Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
	SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
	while (iter.hasNext())
		p.addRead(iter.next());

	List<Character> bases= p.getBasesAsCharacters();
	ObjectCounter<Character> baseCounter = new ObjectCounter<>();
	for (Character c: bases)
		baseCounter.increment(c);
	Assert.assertEquals(6, baseCounter.getCountForKey('A'));
	Assert.assertEquals(7, baseCounter.getCountForKey('G'));

	Assert.assertNotNull(p.toString());

}
 
Example 8
Source Project: Drop-seq   Source File: SNPUMIBasePileupTest.java    License: MIT License 6 votes vote down vote up
@Test(enabled=true)
public void testAddBaseQuals () {
	int snpPos=76227022;
	Interval snpInterval = new Interval("HUMAN_1", snpPos, snpPos, true, "test");
	SNPUMIBasePileup p = new SNPUMIBasePileup(snpInterval, "ACADM", "fake_cell", "AAAAA");
	char [] bases = {'A', 'A'};
	byte [] quals = {27,55};
	byte [] bases2 = new byte [bases.length];
	StringUtil.charsToBytes(bases, 0, bases.length, bases2, 0);
	boolean passes=true;
	try {
		p.setBasesAndQualities(bases2, quals);
	} catch (IllegalArgumentException e) {
		passes=false;
	}
	Assert.assertTrue(passes);
}
 
Example 9
@Test(expectedExceptions={UserException.MalformedFile.class, UserException.MalformedGenomeLoc.class}, dataProvider="invalidIntervalTestData")
public void testLoadIntervalsInvalidPicardIntervalHandling(GenomeLocParser genomeLocParser,
                                              String contig, int intervalStart, int intervalEnd ) throws Exception {

    SAMFileHeader picardFileHeader = new SAMFileHeader();
    picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
    // Intentionally add an extra contig not in our genomeLocParser's sequence dictionary
    // so that we can add intervals to the Picard interval file for contigs not in our dictionary
    picardFileHeader.addSequence(new SAMSequenceRecord("2", 100000));

    IntervalList picardIntervals = new IntervalList(picardFileHeader);
    picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname"));

    File picardIntervalFile = createTempFile("testLoadIntervalsInvalidPicardIntervalHandling", ".intervals");
    picardIntervals.write(picardIntervalFile);

    List<String> intervalArgs = new ArrayList<>(1);
    intervalArgs.add(picardIntervalFile.getAbsolutePath());

    // loadIntervals() will validate all intervals against the sequence dictionary in our genomeLocParser,
    // and should throw for all intervals in our invalidIntervalTestData set
    IntervalUtils.loadIntervals(intervalArgs, IntervalSetRule.UNION, IntervalMergingRule.ALL, 0, genomeLocParser);
}
 
Example 10
/**
 * Before traversal, fix configuration parameters and initialize
 * GenomicsDB. Hard-coded to handle only VCF files and headers
 */
@Override
public void onTraversalStart() {
    if (getIntervalsFromExistingWorkspace) {
        final IntervalList outputList = new IntervalList(getBestAvailableSequenceDictionary());
        intervals.forEach(i -> outputList.add(new Interval(i.getContig(), i.getStart(), i.getEnd())));
        final Path intervalListOutputPath = IOUtils.getPath(intervalListOutputPathString);
        outputList.write(intervalListOutputPath.toFile());
        return;
    }
    String workspaceDir = BucketUtils.makeFilePathAbsolute(overwriteCreateOrCheckWorkspace());
    vidMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME);
    callsetMapJSONFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME);
    vcfHeaderFile = IOUtils.appendPathToDir(workspaceDir, GenomicsDBConstants.DEFAULT_VCFHEADER_FILE_NAME);

    if (doIncrementalImport) {
        logger.info("Callset Map JSON file will be re-written to " + callsetMapJSONFile);
        logger.info("Incrementally importing to workspace - " + workspaceDir);
    } else {
        logger.info("Vid Map JSON file will be written to " + vidMapJSONFile);
        logger.info("Callset Map JSON file will be written to " + callsetMapJSONFile);
        logger.info("Complete VCF Header will be written to " + vcfHeaderFile);
        logger.info("Importing to workspace - " + workspaceDir);
    }
    initializeInputPreloadExecutorService();
}
 
Example 11
Source Project: Drop-seq   Source File: CreateSnpIntervalFromVcfTest.java    License: MIT License 6 votes vote down vote up
@Test
public void testBasic() throws IOException {
    final CreateSnpIntervalFromVcf clp = new CreateSnpIntervalFromVcf();
    clp.INPUT = TEST_FILE;
    clp.OUTPUT = File.createTempFile("CreateSnpIntervalFromVcfTest.", ".intervals");
    clp.OUTPUT.deleteOnExit();
    Assert.assertEquals(clp.doWork(), 0);
    final IntervalList intervals = IntervalList.fromFile(clp.OUTPUT);
    // 6 variants in input, but one is an indel and one is filtered
    Assert.assertEquals(intervals.size(), EXPECTED_START_POS.length);
    final Iterator<Interval> it = intervals.iterator();
    for (final int startPos : EXPECTED_START_POS) {
        Assert.assertTrue(it.hasNext());
        final Interval interval = it.next();
        Assert.assertEquals(interval.getContig(), EXPECTED_CONTIG);
        Assert.assertEquals(interval.getStart(), startPos);
        Assert.assertEquals(interval.length(), 1);
    }
    Assert.assertFalse(it.hasNext());
}
 
Example 12
Source Project: picard   Source File: TargetMetricsCollector.java    License: MIT License 6 votes vote down vote up
/** Emits a file with per base coverage if an output file has been set. */
private void emitPerBaseCoverageIfRequested() {
    if (this.perBaseOutput == null) return;

    final PrintWriter out = new PrintWriter(IOUtil.openFileForBufferedWriting(this.perBaseOutput));
    out.println("chrom\tpos\ttarget\tcoverage");
    for (final Map.Entry<Interval,Coverage> entry : this.highQualityCoverageByTarget.entrySet()) {
        final Interval interval = entry.getKey();
        final String chrom = interval.getContig();
        final int firstBase = interval.getStart();

        final int[] cov = entry.getValue().getDepths();
        for (int i = 0; i < cov.length; ++i) {
            out.print(chrom);
            out.print('\t');
            out.print(firstBase + i);
            out.print('\t');
            out.print(interval.getName());
            out.print('\t');
            out.print(cov[i]);
            out.println();
        }
    }

    out.close();
}
 
Example 13
Source Project: Hadoop-BAM   Source File: BAMInputFormat.java    License: MIT License 6 votes vote down vote up
/**
 * Converts an interval in SimpleInterval format into an htsjdk QueryInterval.
 *
 * In doing so, a header lookup is performed to convert from contig name to index
 *
 * @param interval interval to convert
 * @param sequenceDictionary sequence dictionary used to perform the conversion
 * @return an equivalent interval in QueryInterval format
 */
private static QueryInterval convertSimpleIntervalToQueryInterval( final Interval interval,	final SAMSequenceDictionary sequenceDictionary ) {
	if (interval == null) {
		throw new IllegalArgumentException("interval may not be null");
	}
	if (sequenceDictionary == null) {
		throw new IllegalArgumentException("sequence dictionary may not be null");
	}

	final int contigIndex = sequenceDictionary.getSequenceIndex(interval.getContig());
	if ( contigIndex == -1 ) {
		throw new IllegalArgumentException("Contig " + interval.getContig() + " not present in reads sequence " +
				"dictionary");
	}

	return new QueryInterval(contigIndex, interval.getStart(), interval.getEnd());
}
 
Example 14
Source Project: picard   Source File: IntervalListToBed.java    License: MIT License 6 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);

    IntervalList intervals = IntervalList.fromFile(INPUT);
    if (SORT) intervals = intervals.sorted();

    try {
        final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
        for (final Interval i : intervals) {
            final String strand = i.isNegativeStrand() ? "-" : "+";
            final List<?> fields = CollectionUtil.makeList(i.getContig(), i.getStart()-1, i.getEnd(), i.getName(), SCORE, strand);
            out.append(fields.stream().map(String::valueOf).collect(Collectors.joining("\t")));
            out.newLine();
        }

        out.close();
    }
    catch (IOException ioe) {
        throw new RuntimeIOException(ioe);
    }

    return 0;
}
 
Example 15
Source Project: picard   Source File: BaitDesigner.java    License: MIT License 6 votes vote down vote up
@Override
List<Bait> design(final BaitDesigner designer, final Interval target, final ReferenceSequence reference) {
    final List<Bait> baits = new LinkedList<Bait>();
    final int baitSize = designer.BAIT_SIZE;
    final int baitOffset = designer.BAIT_OFFSET;
    final int lastPossibleBaitStart = Math.min(target.getEnd(), reference.length() - baitSize);
    final int baitCount = 1 + (int) Math.floor((lastPossibleBaitStart - target.getStart()) / (double) baitOffset);

    int i = 0;
    for (int start = target.getStart(); start < lastPossibleBaitStart; start += baitOffset) {
        final Bait bait = new Bait(target.getContig(),
                start,
                CoordMath.getEnd(start, baitSize),
                target.isNegativeStrand(),
                designer.makeBaitName(target.getName(), ++i, baitCount));
        bait.addBases(reference, designer.DESIGN_ON_TARGET_STRAND);
        baits.add(bait);
    }
    return baits;
}
 
Example 16
Source Project: Drop-seq   Source File: AnnotationUtils.java    License: MIT License 5 votes vote down vote up
/**
 * For a read, split into the alignment blocks.
 * For each alignment block, determine the genes the block overlaps, and the locus function of those genes.
 *
 * Each alignment block can generate a different functional annotation on the same gene.  These should be retained.
 * For example, block one can align to an exon, and block two to an intron, and both those annotations are retained.
 *
 * Only retain genes where alignment blocks all reference that gene.  If block one refers to genes A,B and block two to gene A only, then only retain gene A.
 *
 */
public Map<Gene, List<LocusFunction>> getFunctionalDataForRead (final SAMRecord rec, final OverlapDetector<Gene> geneOverlapDetector) {
	List<AlignmentBlock> alignmentBlocks = rec.getAlignmentBlocks();

	Map<AlignmentBlock, Map<Gene, List<LocusFunction>>> map = new HashMap<>();
	// gather the locus functions for each alignment block.
	for (AlignmentBlock block: alignmentBlocks) {
		Interval interval = getInterval(rec.getReferenceName(), block);
		Map<Gene, List<LocusFunction>> locusFunctionsForGeneMap = getFunctionalDataForInterval(interval, geneOverlapDetector);
		map.put(block, locusFunctionsForGeneMap);
	}
	// simplify genes by only using genes that are common to all alignment blocks.
	Map<Gene, List<LocusFunction>> result = simplifyFunctionalDataAcrossAlignmentBlocks(map);
	return result;
}
 
Example 17
Source Project: picard   Source File: ByIntervalListVariantContextIteratorTest.java    License: MIT License 5 votes vote down vote up
@Test
public void testNoOverlapDifferentContig() {
    final IntervalList intervalList         = new IntervalList(header);
    intervalList.add(new Interval("3", 167166899, 167166899));
    final VCFFileReader reader              = getReader(CEU_TRIOS_SNPS_VCF);
    final Iterator<VariantContext> iterator = new ByIntervalListVariantContextIterator(reader, intervalList);
    Assert.assertFalse(iterator.hasNext());
    reader.close();
}
 
Example 18
Source Project: picard   Source File: WgsMetricsTest.java    License: MIT License 5 votes vote down vote up
private IntervalList buildIntervalList(final int start, final int end) {
    final SAMFileHeader header = new SAMFileHeader();
    header.addSequence(new SAMSequenceRecord("CONTIG", 100000000));
    final IntervalList intervals = new IntervalList(header);
    if (0 < start) intervals.add(new Interval("CONTIG", start, end));
    return intervals;
}
 
Example 19
Source Project: Drop-seq   Source File: AnnotationUtils.java    License: MIT License 5 votes vote down vote up
/**
 * Generates the locus function at each base of the interval.
 * @param interval
 * @param overlappingGenes
 * @return
 */
public LocusFunction[] getLocusFunctionsByInterval (final Interval i, final Gene gene) {
	// Get functional class for each position in the alignment block.
       final LocusFunction[] locusFunctions = new LocusFunction[i.length()];

       // By default, if base does not overlap with rRNA or gene, it is intergenic.
       Arrays.fill(locusFunctions, 0, locusFunctions.length, LocusFunction.INTERGENIC);

	for (final Gene.Transcript transcript : gene)
		transcript.assignLocusFunctionForRange(i.getStart(), locusFunctions);
       return locusFunctions;
}
 
Example 20
Source Project: Drop-seq   Source File: GTFRecord.java    License: MIT License 5 votes vote down vote up
public GTFRecord(final String chromsome, final int start, final int end, final boolean negativeStrand, final String geneID, final String geneName,
                    final String transcriptName, final String transcriptID, final String transcriptType, final String featureType,
                    final Integer geneVersion) {
	this.interval=new Interval(chromsome, start, end, negativeStrand, null);
	this.geneID=geneID;
	this.geneName=geneName;
	this.transcriptName=transcriptName;
	this.transcriptID=transcriptID;
	this.transcriptType=transcriptType;
	this.featureType=featureType;
       this.geneVersion=geneVersion;
}
 
Example 21
Source Project: Drop-seq   Source File: EnhanceGTFRecords.java    License: MIT License 5 votes vote down vote up
private List<GTFRecord> getGTFRecordsFromIntronIntervals (List<Interval> introns, GeneFromGTF.TranscriptFromGTF t) {
	List<GTFRecord> result = new ArrayList<>();
	GeneFromGTF g = t.getGene();
	for (Interval i: introns) {
		@SuppressWarnings("deprecation") GTFRecord intronRecord = new GTFRecord(g.getContig(), i.getStart(), i.getEnd(), g.isNegativeStrand(),
                   g.getGeneID(), g.getName(), t.getTranscriptName(), t.getTranscriptID(), t.getTranscriptType(),
                   INTRON_FEATURE_TYPE, g.getGeneVersion());
		result.add(intronRecord);
	}		
	return (result);
}
 
Example 22
Source Project: picard   Source File: LiftoverUtils.java    License: MIT License 5 votes vote down vote up
protected static VariantContextBuilder reverseComplementVariantContext(final VariantContext source, final Interval target, final ReferenceSequence refSeq) {
    if (target.isPositiveStrand()) {
        throw new IllegalArgumentException("This should only be called for negative strand liftovers");
    }
    final int start = target.getStart();
    final int stop = target.getEnd();

    final List<Allele> origAlleles = new ArrayList<>(source.getAlleles());
    final VariantContextBuilder vcb = new VariantContextBuilder(source);

    vcb.rmAttribute(VCFConstants.END_KEY)
            .chr(target.getContig())
            .start(start)
            .stop(stop)
            .alleles(reverseComplementAlleles(origAlleles));

    if (isIndelForLiftover(source)) {
        // check that the reverse complemented bases match the new reference
        if (referenceAlleleDiffersFromReferenceForIndel(vcb.getAlleles(), refSeq, start, stop)) {
            return null;
        }
        leftAlignVariant(vcb, start, stop, vcb.getAlleles(), refSeq);
    }

    vcb.genotypes(fixGenotypes(source.getGenotypes(), origAlleles, vcb.getAlleles()));

    return vcb;
}
 
Example 23
Source Project: picard   Source File: IntervalListScattererTest.java    License: MIT License 5 votes vote down vote up
private static Interval lookupIntervalContainingLocus(final IntervalList source, final String chromosome, final int startIndex) {
    for (final Interval interval : source) {
        if (interval.getContig().equals(chromosome) && startIndex >= interval.getStart() && startIndex <= interval.getEnd()) {
            return interval;
        }
    }
    throw new NoSuchElementException();
}
 
Example 24
Source Project: Drop-seq   Source File: TagReadWithInterval.java    License: MIT License 5 votes vote down vote up
@Override
protected int doWork() {
	IOUtil.assertFileIsReadable(INPUT);
	IOUtil.assertFileIsWritable(OUTPUT);
	SamReader inputSam = SamReaderFactory.makeDefault().open(INPUT);
	SAMFileHeader header = inputSam.getFileHeader();
       header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
	SamHeaderUtil.addPgRecord(header, this);

	SAMFileWriter writer= new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, OUTPUT);

	IntervalList loci = IntervalList.fromFile(this.INTERVALS);
	OverlapDetector<Interval> od =getOverlapDetector (loci);
	ProgressLogger processLogger = new ProgressLogger(log);

	for (SAMRecord record: inputSam) {
		processLogger.record(record);

		if (record.getReadUnmappedFlag()==false)
			// use alignment blocks instead of start/end to properly deal with split reads mapped over exon/exon boundaries.
			record=tagRead(record, od);
		else
			record.setAttribute(this.TAG, null);
		writer.addAlignment(record);

	}

	CloserUtil.close(inputSam);
	writer.close();

	return(0);


}
 
Example 25
Source Project: Drop-seq   Source File: TagReadWithInterval.java    License: MIT License 5 votes vote down vote up
/**
 *
 * @param intervals
 * @return Returns a comma separated list of interval names, or null if the list is empty.
 */
String getIntervalName (final Collection<Interval> intervals) {
	if (intervals.isEmpty()) return (null);
	List<String> names = intervals.stream().map(x-> x.getName()).collect(Collectors.toList());
	String result = StringUtils.join(names, ",");
	return result;

}
 
Example 26
Source Project: picard   Source File: LiftoverVcfTest.java    License: MIT License 5 votes vote down vote up
@Test(dataProvider = "indelFlipDataWithOriginalAllele")
public void testFlipIndelWithOriginalAlleles(final VariantContext source, final ReferenceSequence reference, final VariantContext result) {

    final LiftOver liftOver = new LiftOver(CHAIN_FILE);
    final Interval originalLocus = new Interval(source.getContig(), source.getStart(), source.getEnd());
    final Interval target = liftOver.liftOver(originalLocus);
    if (target != null && !target.isNegativeStrand()) {
        throw new RuntimeException("not reversed");
    }

    final VariantContext flipped = LiftoverUtils.liftVariant(source, target, reference, false, true);

    VcfTestUtils.assertEquals(flipped, result);
}
 
Example 27
private static IntervalList filterBinsContainingOnlyNs(final IntervalList unfilteredBins, final ReferenceDataSource reference) {
    final IntervalList bins = new IntervalList(reference.getSequenceDictionary());
    for (final Interval unfilteredBin : unfilteredBins) {
        if (!Utils.stream(reference.query(new SimpleInterval(unfilteredBin))).allMatch(b -> Nucleotide.decode(b) == Nucleotide.N)) {
            bins.add(unfilteredBin);
        }
    }
    return bins;
}
 
Example 28
Source Project: Hadoop-BAM   Source File: VCFRecordReader.java    License: MIT License 5 votes vote down vote up
private boolean overlaps(VariantContext v) {
	if (intervals == null) {
		return true;
	}
	final Interval interval = new Interval(v.getContig(), v.getStart(), v.getEnd());
	return overlapDetector.overlapsAny(interval);
}
 
Example 29
Source Project: Drop-seq   Source File: SNPUMIBasePileup.java    License: MIT License 5 votes vote down vote up
public SNPUMIBasePileup (final Interval snpInterval, final String gene, final String cell, final String molecularBarcode) {
	super(snpInterval);
	this.gene=gene;
	this.cell=cell;
	this.molecularBarcode=molecularBarcode;
	this.locusFunctionSet=new HashSet<>();
}
 
Example 30
Source Project: Drop-seq   Source File: SNPUMIBasePileupIterator.java    License: MIT License 5 votes vote down vote up
private SNPUMIBasePileup getInitialPileup (final SAMRecord rec) {
	String snpID=rec.getStringAttribute(this.snpTag);
	Interval snpInterval = IntervalTagComparator.fromString(snpID);
	String gene=rec.getStringAttribute(this.geneTag);
	String cell =rec.getStringAttribute(this.cellBarcodeTag);
	String molecularBarcode = rec.getStringAttribute(this.molecularBarcodeTag);
	SNPUMIBasePileup result =new SNPUMIBasePileup(snpInterval, gene, cell, molecularBarcode);
	result=addLocusFunction(result, rec);
	return result;
}