htsjdk.tribble.bed.BEDCodec Java Examples

The following examples show how to use htsjdk.tribble.bed.BEDCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BEDFileLoader.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
public static SortedSetMultimap<String, GenomeRegion> fromBedFile(@NotNull String bedFile) throws IOException {
    final SortedSetMultimap<String, GenomeRegion> regionMap = TreeMultimap.create();

    String prevChromosome = null;
    GenomeRegion prevRegion = null;
    try (final AbstractFeatureReader<BEDFeature, LineIterator> reader = getFeatureReader(bedFile, new BEDCodec(), false)) {
        for (final BEDFeature bedFeature : reader.iterator()) {
            final String chromosome = bedFeature.getContig();
            final long start = bedFeature.getStart();
            final long end = bedFeature.getEnd();

            if (end < start) {
                LOGGER.warn("Invalid genome region found in chromosome {}: start={}, end={}", chromosome, start, end);
            } else {
                final GenomeRegion region = GenomeRegions.create(chromosome, start, end);
                if (prevRegion != null && chromosome.equals(prevChromosome) && prevRegion.end() >= start) {
                    LOGGER.warn("BED file is not sorted, please fix! Current={}, Previous={}", region, prevRegion);
                } else {
                    regionMap.put(chromosome, region);
                    prevChromosome = chromosome;
                    prevRegion = region;
                }
            }
        }
    }

    return regionMap;
}
 
Example #2
Source File: FeatureManagerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@DataProvider(name = "DetectCorrectFileFormatTestData")
public Object[][] getDetectCorrectFileFormatTestData() {
    return new Object[][] {
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_vcf4_file.vcf"), VCFCodec.class },
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_vcf3_file.vcf"), VCF3Codec.class },
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bcf_file.bcf"), BCF2Codec.class },
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_bed_file.bed"), BEDCodec.class},
            { new File(FEATURE_MANAGER_TEST_DIRECTORY + "minimal_table_file.table"), TableCodec.class}
    };
}
 
Example #3
Source File: LongISLNDReadAlignmentMap.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public LongISLNDReadAlignmentMap(final Collection<String> readAlignmentMapFiles) throws IOException {
    for (final String readAlignmentMapFileName : readAlignmentMapFiles) {
        log.info("Reading in read map from " + readAlignmentMapFileName);
        final AbstractFeatureReader<BEDFeature, LineIterator> featureReader = AbstractFeatureReader.getFeatureReader(readAlignmentMapFileName, new BEDCodec(), false);
        try {
            final CloseableTribbleIterator<BEDFeature> featureIterator = featureReader.iterator();
            while (featureIterator.hasNext()) {
                final BEDFeature feature = featureIterator.next();
                readAlignmentMap.put(feature.getName(), new LongISLNDReadMapRecord(feature).toReadMapRecord());
            }
        } finally {
            featureReader.close();
        }
    }
}
 
Example #4
Source File: SimpleReference.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public long getNumNonNBases(final File regions) throws IOException {
    loadAllSequences();
    long count = 0;

    final FeatureCodec<BEDFeature, LineIterator> bedCodec = new BEDCodec(BEDCodec.StartOffset.ONE);
    final LineIterator lineIterator = new AsciiLineReaderIterator(new AsciiLineReader(new FileInputStream(regions)));

    while (lineIterator.hasNext()) {
        final BEDFeature bedFeature = bedCodec.decode(lineIterator);
        count += data.get(new ChrString(bedFeature.getContig())).getNumNonNBases(bedFeature.getStart(), bedFeature.getEnd());
    }
    return count;
}
 
Example #5
Source File: ReplicationOriginAnnotator.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
public void loadReplicationOrigins(final String filename)
{
    if(filename.isEmpty())
        return;

    try
    {
        String currentChr = "";
        List<ReplicationOriginRegion> regionList = null;
        int regionCount = 0;

        final AbstractFeatureReader<BEDFeature, LineIterator> reader = getFeatureReader(filename, new BEDCodec(), false);

        for (final BEDFeature bedFeature : reader.iterator())
        {
            final String chromosome = refGenomeChromosome(bedFeature.getContig(), RG_VERSION);

            if (!chromosome.equals(currentChr))
            {
                currentChr = chromosome;
                regionList = mReplicationOrigins.get(chromosome);

                if (regionList == null)
                {
                    regionList = Lists.newArrayList();
                    mReplicationOrigins.put(currentChr, regionList);
                }
            }

            ++regionCount;

            double originValue = Double.parseDouble(bedFeature.getName()) / 100;

            regionList.add(new ReplicationOriginRegion(
                    chromosome,
                    bedFeature.getStart(),
                    bedFeature.getEnd(),
                    originValue));
        }

        LNX_LOGGER.debug("loaded {} replication origins", regionCount);
    }
    catch(IOException exception)
    {
        LNX_LOGGER.error("Failed to read replication origin BED file({})", filename);
    }
}
 
Example #6
Source File: BedToIntervalList.java    From picard with MIT License 4 votes vote down vote up
@Override
protected int doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
    IOUtil.assertFileIsWritable(OUTPUT);
    try {
        // create a new header that we will assign the dictionary provided by the SAMSequenceDictionaryExtractor to.
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary samSequenceDictionary = SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY.toPath());
        header.setSequenceDictionary(samSequenceDictionary);
        // set the sort order to be sorted by coordinate, which is actually done below
        // by getting the .uniqued() intervals list before we write out the file
        header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
        final IntervalList intervalList = new IntervalList(header);

        final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(), false);
        final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
        final ProgressLogger progressLogger = new ProgressLogger(LOG, (int) 1e6);

        while (iterator.hasNext()) {
            final BEDFeature bedFeature = iterator.next();
            final String sequenceName = bedFeature.getContig();
            final int start = bedFeature.getStart();
            final int end = bedFeature.getEnd();
            // NB: do not use an empty name within an interval
            final String name;
            if (bedFeature.getName().isEmpty()) {
                name = null;
            } else {
                name = bedFeature.getName();
            }

            final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);

            // Do some validation
            if (null == sequenceRecord) {
                if (DROP_MISSING_CONTIGS) {
                    LOG.info(String.format("Dropping interval with missing contig: %s:%d-%d", sequenceName, bedFeature.getStart(), bedFeature.getEnd()));
                    missingIntervals++;
                    missingRegion += bedFeature.getEnd() - bedFeature.getStart();
                    continue;
                }
                throw new PicardException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
            } else if (start < 1) {
                throw new PicardException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
            } else if (sequenceRecord.getSequenceLength() < start) {
                throw new PicardException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
            } else if ((end == 0 && start != 1 ) //special case for 0-length interval at the start of a contig
                    || end < 0 ) {
                throw new PicardException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
            } else if (sequenceRecord.getSequenceLength() < end) {
                throw new PicardException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
            } else if (end < start - 1) {
                throw new PicardException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
            }

            final boolean isNegativeStrand = bedFeature.getStrand() == Strand.NEGATIVE;
            final Interval interval = new Interval(sequenceName, start, end, isNegativeStrand, name);
            intervalList.add(interval);

            progressLogger.record(sequenceName, start);
        }
        CloserUtil.close(bedReader);

        if (DROP_MISSING_CONTIGS) {
            if (missingRegion == 0) {
                LOG.info("There were no missing regions.");
            } else {
                LOG.warn(String.format("There were %d missing regions with a total of %d bases", missingIntervals, missingRegion));
            }
        }
        // Sort and write the output
        IntervalList out = intervalList;
        if (SORT) {
            out = out.sorted();
        }
        if (UNIQUE) {
            out = out.uniqued();
        }
        out.write(OUTPUT);
        LOG.info(String.format("Wrote %d intervals spanning a total of %d bases", out.getIntervals().size(),out.getBaseCount()));

    } catch (final IOException e) {
        throw new RuntimeException(e);
    }

    return 0;
}