htsjdk.tribble.bed.BEDFeature Java Examples

The following examples show how to use htsjdk.tribble.bed.BEDFeature. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConvertBedToTargetFile.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected Object doWork() {
    final FeatureCodec<? extends Feature, ?> codec = FeatureManager.getCodecForFile(inputBedFile);
    final Class<? extends Feature> featureType = codec.getFeatureType();
    if (BEDFeature.class.isAssignableFrom(featureType)) {
        final FeatureDataSource<? extends BEDFeature> source = new FeatureDataSource<>(inputBedFile);
        try {
            final List<Target> targets = StreamSupport.stream(source.spliterator(), false).map(ConvertBedToTargetFile::createTargetFromBEDFeature)
                    .collect(Collectors.toList());
            TargetWriter.writeTargetsToFile(outFile, targets);
        } catch (final TribbleException e) {
            throw new UserException.BadInput(String.format("'%s' has a .bed extension but does not seem to be a valid BED file.", inputBedFile.getAbsolutePath()));
        }
    } else {
        throw new UserException.BadInput(String.format("'%s' does not seem to be a BED file.", inputBedFile.getAbsolutePath()));
    }
    return "SUCCESS";
}
 
Example #2
Source File: FeatureManagerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testHandleRequestForValidFeatureInputIterator() {
    final ValidFeatureArgumentSource toolInstance = new ValidFeatureArgumentSource();

    // Initialize two of the FeatureInput fields as they would be initialized by the argument-parsing
    // system to simulate a run of the tool with two FeatureInputs.
    toolInstance.variantContextFeatureInput = new FeatureInput<>(FEATURE_MANAGER_TEST_DIRECTORY + "feature_data_source_test.vcf");
    toolInstance.bedListFeatureInput.add(new FeatureInput<>(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bed_file.bed"));

    final FeatureManager manager = new FeatureManager(toolInstance);
    final Iterator<VariantContext> vcIterator = manager.getFeatureIterator(toolInstance.variantContextFeatureInput);
    final Iterator<BEDFeature> bedIterator = manager.getFeatureIterator(toolInstance.bedListFeatureInput.get(0));

    final List<VariantContext> variants = Utils.stream(vcIterator).collect(Collectors.toList());
    final List<BEDFeature> beds = Utils.stream(bedIterator).collect(Collectors.toList());

    Assert.assertEquals(variants.size(), 26);
    Assert.assertEquals(variants.get(4).getStart(),280);
    Assert.assertEquals(beds.size(), 1);
}
 
Example #3
Source File: ExampleFeatureWalker.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void apply(final BEDFeature feature, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
    outputStream.println("Current feature: " + toBedFeatureString(feature));

    if ( referenceContext.hasBackingDataSource() ) {
        printReferenceBases(referenceContext);
    }

    if ( readsContext.hasBackingDataSource() ) {
        printReads(readsContext);
    }

    if ( featureContext.hasBackingDataSource() ) {
        printVariants(featureContext);
    }
}
 
Example #4
Source File: FeatureManagerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test
public void testHandleRequestForValidFeatureInputs() {
    ValidFeatureArgumentSource toolInstance = new ValidFeatureArgumentSource();

    // Initialize two of the FeatureInput fields as they would be initialized by the argument-parsing
    // system to simulate a run of the tool with two FeatureInputs.
    toolInstance.variantContextFeatureInput = new FeatureInput<>(FEATURE_MANAGER_TEST_DIRECTORY + "feature_data_source_test.vcf");
    toolInstance.bedListFeatureInput.add(new FeatureInput<>(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bed_file.bed"));

    FeatureManager manager = new FeatureManager(toolInstance);
    List<VariantContext> vcFeatures = manager.getFeatures(toolInstance.variantContextFeatureInput, new SimpleInterval("1", 1, 2000));
    List<BEDFeature> bedFeatures = manager.getFeatures(toolInstance.bedListFeatureInput.get(0), new SimpleInterval("1", 1, 1));

    Assert.assertEquals(vcFeatures.size(), 14, "Wrong number of Features returned from VariantContext test Feature file");
    Assert.assertEquals(bedFeatures.size(), 1, "Wrong number of Features returned from BED test Feature file");
}
 
Example #5
Source File: AnnotateIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * @param interval  assumed to have non-zero length
 */
@Override
Double apply(final Locatable interval,
             final ReferenceContext referenceContext,
             final FeatureContext featureContext) {
    double lengthWeightedSum = 0.;
    final List<BEDFeature> features = featureContext.getValues(trackPath);
    for (final BEDFeature feature : features) {
        final double scoreOrNaN = (double) feature.getScore();
        final double score = Double.isNaN(scoreOrNaN) ? 1. : scoreOrNaN;    // missing or NaN score -> score = 1
        lengthWeightedSum += score *
                CoordMath.getOverlap(
                        feature.getStart(), feature.getEnd() - 1,       // zero-based
                        interval.getStart(), interval.getEnd());        // one-based
    }
    return lengthWeightedSum / interval.getLengthOnReference();
}
 
Example #6
Source File: XGBoostEvidenceFilter.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Calculate fractional overlap of BreakpointEvidence with genome tract data.
 * Separately sum the number of base pairs in genomeIntervals that overlap evidence (allowing base pairs to
 * count multiple times if there is overlap in genomeIntervals) then divide by length of evidence. returned
 * value will be double >= 0, but may be larger than 1 if any genomeIntervals overlap each other.
 */
private static double getGenomeIntervalsOverlap(final BreakpointEvidence evidence,
                                                final FeatureDataSource<BEDFeature> genomeIntervals,
                                                final ReadMetadata readMetadata) {
    final SVInterval location = evidence.getLocation();
    final SimpleInterval simpleInterval = new SimpleInterval(readMetadata.getContigName(location.getContig()),
            location.getStart(),
            location.getEnd() - 1); // "end - 1" because SimpleIntervals are closed on both ends
    int overlap = 0;
    for(final Iterator<BEDFeature> overlapperItr = genomeIntervals.query(simpleInterval); overlapperItr.hasNext();) {
        final BEDFeature overlapper = overlapperItr.next();
        // " + 1" because genome tract data is semi-closed, but BEDFeature is fully closed
        final int overlapLength = Math.min(simpleInterval.getEnd(), overlapper.getEnd()) + 1
                - Math.max(simpleInterval.getStart(), overlapper.getStart());
        overlap += overlapLength;
    }
    return overlap / (double)simpleInterval.size();
}
 
Example #7
Source File: BEDFileLoader.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
public static SortedSetMultimap<String, GenomeRegion> fromBedFile(@NotNull String bedFile) throws IOException {
    final SortedSetMultimap<String, GenomeRegion> regionMap = TreeMultimap.create();

    String prevChromosome = null;
    GenomeRegion prevRegion = null;
    try (final AbstractFeatureReader<BEDFeature, LineIterator> reader = getFeatureReader(bedFile, new BEDCodec(), false)) {
        for (final BEDFeature bedFeature : reader.iterator()) {
            final String chromosome = bedFeature.getContig();
            final long start = bedFeature.getStart();
            final long end = bedFeature.getEnd();

            if (end < start) {
                LOGGER.warn("Invalid genome region found in chromosome {}: start={}, end={}", chromosome, start, end);
            } else {
                final GenomeRegion region = GenomeRegions.create(chromosome, start, end);
                if (prevRegion != null && chromosome.equals(prevChromosome) && prevRegion.end() >= start) {
                    LOGGER.warn("BED file is not sorted, please fix! Current={}, Previous={}", region, prevRegion);
                } else {
                    regionMap.put(chromosome, region);
                    prevChromosome = chromosome;
                    prevRegion = region;
                }
            }
        }
    }

    return regionMap;
}
 
Example #8
Source File: SimpleReference.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public long getNumNonNBases(final File regions) throws IOException {
    loadAllSequences();
    long count = 0;

    final FeatureCodec<BEDFeature, LineIterator> bedCodec = new BEDCodec(BEDCodec.StartOffset.ONE);
    final LineIterator lineIterator = new AsciiLineReaderIterator(new AsciiLineReader(new FileInputStream(regions)));

    while (lineIterator.hasNext()) {
        final BEDFeature bedFeature = bedCodec.decode(lineIterator);
        count += data.get(new ChrString(bedFeature.getContig())).getNumNonNBases(bedFeature.getStart(), bedFeature.getEnd());
    }
    return count;
}
 
Example #9
Source File: LongISLNDReadAlignmentMap.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public LongISLNDReadAlignmentMap(final Collection<String> readAlignmentMapFiles) throws IOException {
    for (final String readAlignmentMapFileName : readAlignmentMapFiles) {
        log.info("Reading in read map from " + readAlignmentMapFileName);
        final AbstractFeatureReader<BEDFeature, LineIterator> featureReader = AbstractFeatureReader.getFeatureReader(readAlignmentMapFileName, new BEDCodec(), false);
        try {
            final CloseableTribbleIterator<BEDFeature> featureIterator = featureReader.iterator();
            while (featureIterator.hasNext()) {
                final BEDFeature feature = featureIterator.next();
                readAlignmentMap.put(feature.getName(), new LongISLNDReadMapRecord(feature).toReadMapRecord());
            }
        } finally {
            featureReader.close();
        }
    }
}
 
Example #10
Source File: LongISLNDReadMapRecord.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
LongISLNDReadMapRecord(final BEDFeature bedFeature) {
    chromosome = new ChrString(bedFeature.getContig());
    start = bedFeature.getStart();
    end = bedFeature.getEnd();
    readName = bedFeature.getName();
    strand = bedFeature.getStrand();
}
 
Example #11
Source File: FeatureManagerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider(name = "DetectFeatureTypeFromFeatureInputFieldTestData")
public Object[][] getDetectFeatureTypeFromFeatureInputFieldTestData() {
    return new Object[][] {
            { "variantContextFeatureInput", VariantContext.class },
            { "variantContextListFeatureInput", VariantContext.class },
            { "bedFeatureInput", BEDFeature.class },
            { "bedListFeatureInput", BEDFeature.class }
    };
}
 
Example #12
Source File: FeatureManagerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(expectedExceptions = UserException.WrongFeatureType.class)
public void testRestrictCodecSelectionToWrongFeatureType() {
    final File vcf = new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_vcf4_file.vcf");

    // If we require BED Features from this vcf file, we should get a type mismatch exception
    FeatureManager.getCodecForFile(vcf.toPath(), BEDFeature.class);
}
 
Example #13
Source File: AnnotateIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static void checkForOverlaps(final FeatureManager featureManager,
                                     final FeatureInput<BEDFeature> featureTrackPath,
                                     final SAMSequenceDictionary sequenceDictionary) {
    final List<BEDFeature> features = IteratorUtils.toList(featureManager.getFeatureIterator(featureTrackPath));
    final GenomeLocParser parser = new GenomeLocParser(sequenceDictionary);
    final List<GenomeLoc> genomeLocs = IntervalUtils.genomeLocsFromLocatables(parser, features);
    final List<GenomeLoc> mergedGenomeLocs = IntervalUtils.mergeIntervalLocations(
            genomeLocs, IntervalMergingRule.OVERLAPPING_ONLY);
    if (genomeLocs.size() != mergedGenomeLocs.size()) {
        throw new UserException.BadInput(String.format("Feature track %s contains overlapping intervals; " +
                "these should be merged.", featureTrackPath));
    }
}
 
Example #14
Source File: AnnotateIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
SegmentalDuplicationContentAnnotator(final FeatureInput<BEDFeature> segmentalDuplicationTrackPath) {
    super(segmentalDuplicationTrackPath);
}
 
Example #15
Source File: AnnotateIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
MappabilityAnnotator(final FeatureInput<BEDFeature> mappabilityTrackPath) {
    super(mappabilityTrackPath);
}
 
Example #16
Source File: AnnotateIntervals.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
BEDLengthWeightedAnnotator(final FeatureInput<BEDFeature> trackPath) {
    this.trackPath = trackPath;
}
 
Example #17
Source File: ExampleFeatureWalker.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Override
protected boolean isAcceptableFeatureType(Class<? extends Feature> featureType) {
    return featureType.isAssignableFrom(BEDFeature.class);
}
 
Example #18
Source File: BedToIntervalList.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to.
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());
        header.setSequenceDictionary(samSequenceDictionary);
        // set the sort order to be sorted by coordinate, which is actually done below
        // by getting the .uniqued() intervals list before we write out the file
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        final IntervalList intervalList = new IntervalList(header);

        final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false);
        final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
        final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

        while (iterator.hasNext()) {
            final BEDFeature bedFeature = iterator.next();
            final String sequenceName = bedFeature.getContig();
            final int start = bedFeature.getStart();
            final int end = bedFeature.getEnd();
            // NB: do not use an empty name within an interval
            final String name;
            if (bedFeature.getName().isEmpty()) {
                name = null;
            } else {
                name = bedFeature.getName();
            }

            final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

            // Do some validation
            if (null == sequenceRecord) {
                if (DROP_MISSING_CONTIGS) {
                    LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd()));
                    missingIntervals++;
                    missingRegion += bedFeature.getEnd() - bedFeature.getStart();
                    continue;
                }
                throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
            } else if (start < 1) {
                throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
            } else if (sequenceRecord.getSequenceLength() < start) {
                throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
            } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig
                    || end < 0 ) {
                throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
            } else if (sequenceRecord.getSequenceLength() < end) {
                throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
            } else if (end < start - 1) {
                throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
            }

            final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
            final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
            intervalList.add(interval);

            progressLogger.record(sequenceName, start);
        }
        CloserUtil.close(bedReader);

        if (DROP_MISSING_CONTIGS) {
            if (missingRegion == 0) {
                LOG.info("There were no missing regions.");
            } else {
                LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
            }
        }
        // Sort and write the output
        IntervalList out = intervalList;
        if (SORT) {
            out = out.sorted();
        }
        if (UNIQUE) {
            out = out.uniqued();
        }
        out.write(OUTPUT);
        LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount()));

    } catch (final IOException e) {
        throw new RuntimeException(e);
    }

    return 0;
}
 
Example #19
Source File: ConvertBedToTargetFile.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
protected static Target createTargetFromBEDFeature(final BEDFeature bedFeature) {
    return new Target(bedFeature.getName(), new SimpleInterval(bedFeature.getContig(), bedFeature.getStart(), bedFeature.getEnd()));
}
 
Example #20
Source File: ReplicationOriginAnnotator.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
public void loadReplicationOrigins(final String filename)
{
    if(filename.isEmpty())
        return;

    try
    {
        String currentChr = "";
        List<ReplicationOriginRegion> regionList = null;
        int regionCount = 0;

        final AbstractFeatureReader<BEDFeature, LineIterator> reader = getFeatureReader(filename, new BEDCodec(), false);

        for (final BEDFeature bedFeature : reader.iterator())
        {
            final String chromosome = refGenomeChromosome(bedFeature.getContig(), RG_VERSION);

            if (!chromosome.equals(currentChr))
            {
                currentChr = chromosome;
                regionList = mReplicationOrigins.get(chromosome);

                if (regionList == null)
                {
                    regionList = Lists.newArrayList();
                    mReplicationOrigins.put(currentChr, regionList);
                }
            }

            ++regionCount;

            double originValue = Double.parseDouble(bedFeature.getName()) / 100;

            regionList.add(new ReplicationOriginRegion(
                    chromosome,
                    bedFeature.getStart(),
                    bedFeature.getEnd(),
                    originValue));
        }

        LNX_LOGGER.debug("loaded {} replication origins", regionCount);
    }
    catch(IOException exception)
    {
        LNX_LOGGER.error("Failed to read replication origin BED file({})", filename);
    }
}