htsjdk.tribble.FeatureCodec Java Examples

The following examples show how to use htsjdk.tribble.FeatureCodec. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ConvertBedToTargetFile.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected Object doWork() {
    final FeatureCodec<? extends Feature, ?> codec = FeatureManager.getCodecForFile(inputBedFile);
    final Class<? extends Feature> featureType = codec.getFeatureType();
    if (BEDFeature.class.isAssignableFrom(featureType)) {
        final FeatureDataSource<? extends BEDFeature> source = new FeatureDataSource<>(inputBedFile);
        try {
            final List<Target> targets = StreamSupport.stream(source.spliterator(), false).map(ConvertBedToTargetFile::createTargetFromBEDFeature)
                    .collect(Collectors.toList());
            TargetWriter.writeTargetsToFile(outFile, targets);
        } catch (final TribbleException e) {
            throw new UserException.BadInput(String.format("'%s' has a .bed extension but does not seem to be a valid BED file.", inputBedFile.getAbsolutePath()));
        }
    } else {
        throw new UserException.BadInput(String.format("'%s' does not seem to be a BED file.", inputBedFile.getAbsolutePath()));
    }
    return "SUCCESS";
}
 
Example #2
Source File: FeatureDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Get a new FeatureCodec instance to use for a FeatureInput. Avoid re-discovering which codec class to
 * use by checking to see if the FeatureInput already has a cached codec class. It not, discover the codec class
 * and cache it for next time.
 *
 * @return A new FeatureCodec instance to use for the FeatureInput.
 */
@SuppressWarnings("unchecked")
private static <T extends Feature> FeatureCodec<T, ?> getCodecForFeatureInput(final FeatureInput<T> featureInput,
                                                                              final Class<? extends Feature> targetFeatureType) {
    final FeatureCodec<T, ?> codec;
    final Class<FeatureCodec<T, ?>> codecClass = featureInput.getFeatureCodecClass();
    if (codecClass == null) {
        final Path featurePath = featureInput.toPath();
        IOUtils.assertFileIsReadable(featurePath);
        codec = (FeatureCodec<T, ?>) FeatureManager.getCodecForFile(featurePath, targetFeatureType);
        featureInput.setFeatureCodecClass((Class<FeatureCodec<T, ?>>) codec.getClass());
    } else {
        try {
            codec = codecClass.getDeclaredConstructor().newInstance();
        } catch (final InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e) {
            throw new GATKException("Unable to automatically instantiate codec " + codecClass.getName());
        }
    }
    return codec;
}
 
Example #3
Source File: FeatureDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static <T extends Feature> AbstractFeatureReader<T, ?> getTribbleFeatureReader(final FeatureInput<T> featureInput, final FeatureCodec<T, ?> codec, final Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper, final Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper) {
    Utils.nonNull(codec);
    try {
        // Must get the path to the data file from the codec here:
        final String absoluteRawPath = featureInput.getRawInputString();

        // Instruct the reader factory to not require an index. We will require one ourselves as soon as
        // a query by interval is attempted.
        final boolean requireIndex = false;

        // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
        if (BucketUtils.isEligibleForPrefetching(featureInput)) {
            return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper);
        } else {
            return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, Utils.identityFunction(), Utils.identityFunction());
        }
    } catch (final TribbleException e) {
        throw new GATKException("Error initializing feature reader for path " + featureInput.getFeaturePath(), e);
    }
}
 
Example #4
Source File: FeatureManager.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Returns a List of all codecs in DISCOVERED_CODECS that claim to be able to decode the specified file
 * according to their {@link FeatureCodec#canDecode(String)} methods.
 *
 * @param featureFile file for which to find potential codecs
 * @return A List of all codecs in DISCOVERED_CODECS for which {@link FeatureCodec#canDecode(String)} returns true on the specified file
 */
private static List<FeatureCodec<? extends Feature, ?>> getCandidateCodecsForFile( final Path featureFile )  {
    final List<FeatureCodec<? extends Feature, ?>> candidateCodecs = new ArrayList<>();

    for ( final Class<?> codecClass : DISCOVERED_CODECS ) {
        try {
            final FeatureCodec<? extends Feature, ?> codec = (FeatureCodec<? extends Feature, ?>)codecClass.getDeclaredConstructor().newInstance();
            if ( codec.canDecode(featureFile.toAbsolutePath().toUri().toString()) ) {
                candidateCodecs.add(codec);
            }
        }
        catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e ) {
            throw new GATKException("Unable to automatically instantiate codec " + codecClass.getName());
        }
    }

    return candidateCodecs;
}
 
Example #5
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void verifyFileType(
        final File resultVCFFile,
        final String outputExtension) {
    final FeatureCodec<? extends Feature, ?> featureCodec = FeatureManager.getCodecForFile(resultVCFFile.toPath());

    if (outputExtension.equals(".vcf") ||
        outputExtension.equals(".vcf.bgz") ||
        outputExtension.equals(".vcf.gz") ||
        outputExtension.equals(".tmp"))
    {
        Assert.assertEquals(featureCodec.getClass(), VCFCodec.class,
                "Wrong codec selected for file " + resultVCFFile.getAbsolutePath());
    }
    else if (outputExtension.equals(".bcf")) {
        Assert.assertEquals(featureCodec.getClass(), BCF2Codec.class,
                "Wrong codec selected for file " + resultVCFFile.getAbsolutePath());
    }
    else {
        throw new IllegalArgumentException("Unknown file extension in createVCFWriter test validation");
    }
}
 
Example #6
Source File: FeatureWalker.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void initializeDrivingFeatures() {
    final File drivingFile = getDrivingFeatureFile();
    final FeatureCodec<? extends Feature, ?> codec = FeatureManager.getCodecForFile(drivingFile.toPath());
    if (isAcceptableFeatureType(codec.getFeatureType())) {
        drivingFeatures = new FeatureDataSource<>(new FeatureInput<>(drivingFile.getAbsolutePath()), FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, null, cloudPrefetchBuffer, cloudIndexPrefetchBuffer, referenceArguments.getReferencePath());

        final FeatureInput<F> drivingFeaturesInput = new FeatureInput<>(drivingFile.getAbsolutePath(), "drivingFeatureFile");
        features.addToFeatureSources(0, drivingFeaturesInput, codec.getFeatureType(), cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
                                     referenceArguments.getReferencePath());
    } else {
        throw new UserException("File " + drivingFile + " contains features of the wrong type.");
    }
}
 
Example #7
Source File: ProgressReportingDelegatingCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public ProgressReportingDelegatingCodec(final FeatureCodec<A, B> delegatee, final double secondsBetweenUpdates){
    if ( secondsBetweenUpdates <= 0.0 ) {
        throw new IllegalArgumentException("secondsBetweenUpdates must be > 0.0");
    }
    this.delegatee = delegatee;
    this.pm = new ProgressMeter(secondsBetweenUpdates);
}
 
Example #8
Source File: FeatureInputUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@SuppressWarnings("unchecked")
private FeatureInput<VariantContext> getVariantFeatureInputWithCachedCodec() {
    final File inputVCFFile = new File(FEATURE_INPUT_TEST_DIRECTORY, "minimal_vcf4_file.vcf");
    final FeatureInput<VariantContext> featureInput = new FeatureInput<>(inputVCFFile.getAbsolutePath());
    Assert.assertNull(featureInput.getFeatureCodecClass());

    final FeatureCodec<? extends Feature, ?> codec = FeatureManager.getCodecForFile(featureInput.toPath());
    featureInput.setFeatureCodecClass((Class<FeatureCodec<VariantContext, ?>>)codec.getClass());

    return featureInput;
}
 
Example #9
Source File: FeatureManagerUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "DetectCorrectFileFormatTestData")
public void testDetectCorrectFileFormat( final File file, final Class<? extends FeatureCodec<? extends Feature, ?>> expectedCodecClass ) throws Exception {
    Assert.assertEquals(FeatureManager.getCodecForFile(file.toPath()).getClass(), expectedCodecClass,
                        "Wrong codec selected for file " + file.getAbsolutePath());

    // We should also get the correct codec if we pass in the explicit expected Feature type to getCodecForFile()
    @SuppressWarnings("unchecked")
    final Class<? extends Feature> expectedCodecFeatureType = expectedCodecClass.getDeclaredConstructor().newInstance().getFeatureType();
    Assert.assertEquals(FeatureManager.getCodecForFile(file.toPath(), expectedCodecFeatureType).getClass(), expectedCodecClass,
            "Wrong codec selected for file " + file.getAbsolutePath() + " after subsetting to the expected Feature type");
}
 
Example #10
Source File: SimpleReference.java    From varsim with BSD 2-Clause "Simplified" License 5 votes vote down vote up
public long getNumNonNBases(final File regions) throws IOException {
    loadAllSequences();
    long count = 0;

    final FeatureCodec<BEDFeature, LineIterator> bedCodec = new BEDCodec(BEDCodec.StartOffset.ONE);
    final LineIterator lineIterator = new AsciiLineReaderIterator(new AsciiLineReader(new FileInputStream(regions)));

    while (lineIterator.hasNext()) {
        final BEDFeature bedFeature = bedCodec.decode(lineIterator);
        count += data.get(new ChrString(bedFeature.getContig())).getNumNonNBases(bedFeature.getStart(), bedFeature.getEnd());
    }
    return count;
}
 
Example #11
Source File: FeatureManager.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Utility method that determines the correct codec to use to read Features from the provided file,
 * optionally considering only codecs that produce a particular type of Feature.
 *
 * Codecs MUST correctly implement the {@link FeatureCodec#canDecode(String)} method
 * in order to be considered as candidates for decoding the file, and must produce
 * Features of the specified type if featureType is non-null.
 *
 * Throws an exception if no suitable codecs are found (this is a user error, since the file is of
 * an unsupported format), or if more than one codec claims to be able to decode the file (this is
 * a configuration error on the codec authors' part).
 *
 * @param featurePath Path for which to find the right codec
 * @param featureType If specified, consider only codecs that produce Features of this type. May be null,
 *                    in which case all codecs are considered.
 * @return the codec suitable for decoding the provided file
 */
public static FeatureCodec<? extends Feature, ?> getCodecForFile( final Path featurePath, final Class<? extends Feature> featureType ) {
    // Make sure Path exists/is readable
    if ( ! Files.isReadable(featurePath) ) {
        throw new UserException.CouldNotReadInputFile(featurePath.toUri().toString());
    }

    // Gather all discovered codecs that claim to be able to decode the given file according to their
    // canDecode() methods
    final List<FeatureCodec<? extends Feature, ?>> candidateCodecs = getCandidateCodecsForFile(featurePath);

    // If no codecs can handle the file, it's a user error (the user provided a file in an unsupported format)
    if ( candidateCodecs.isEmpty() ) {
        throw new UserException.NoSuitableCodecs(featurePath);
    }

    // If featureType was specified, subset to only codecs that produce the requested type of Feature,
    // and throw an error if there are no such codecs.
    if ( featureType != null ) {
        final List<String> discoveredCodecsFeatureTypes = candidateCodecs.stream().map(codec -> codec.getFeatureType().getSimpleName()).collect(Collectors.toList());
        candidateCodecs.removeIf(codec -> ! featureType.isAssignableFrom(codec.getFeatureType()));

        if ( candidateCodecs.isEmpty() ) {
            throw new UserException.WrongFeatureType(featurePath, featureType, discoveredCodecsFeatureTypes);
        }
    }

    // If we still have multiple candidate codecs, it's a configuration error on the part of the codec authors
    if ( candidateCodecs.size() > 1 ) {
        final StringBuilder multiCodecMatches = new StringBuilder();
        for ( FeatureCodec<? extends Feature, ?> candidateCodec : candidateCodecs ) {
            multiCodecMatches.append(candidateCodec.getClass().getCanonicalName());
            multiCodecMatches.append(' ');
        }
        throw new GATKException("Multiple codecs found able to decode file " + featurePath.toAbsolutePath().toUri() +
                                ". This indicates a misconfiguration on the part of the codec authors. " +
                                "Matching codecs are: " + multiCodecMatches.toString());
    }

    final FeatureCodec<? extends Feature, ?> selectedCodec = candidateCodecs.get(0);
    logger.info("Using codec " + selectedCodec.getClass().getSimpleName() + " to read file " + featurePath.toAbsolutePath().toUri());
    return selectedCodec;
}
 
Example #12
Source File: FeatureInput.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * @return The previously established FeatureCodec class to use for this input, if any. May return {@code null}.
 */
public Class<FeatureCodec<T, ?>> getFeatureCodecClass() {
    return this.featureCodecClass;
}
 
Example #13
Source File: ProgressReportingDelegatingCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public FeatureCodec<A, B> getDelegatee() {
    return delegatee;
}
 
Example #14
Source File: FeatureManager.java    From gatk with BSD 3-Clause "New" or "Revised" License 2 votes vote down vote up
/**
 * Utility method that determines the correct codec to use to read Features from the provided file.
 *
 * Codecs MUST correctly implement the {@link FeatureCodec#canDecode(String)} method
 * in order to be considered as candidates for decoding the file.
 *
 * Throws an exception if no suitable codecs are found (this is a user error, since the file is of
 * an unsupported format), or if more than one codec claims to be able to decode the file (this is
 * a configuration error on the codec authors' part).
 *
 * @param featurePath path for which to find the right codec
 * @return the codec suitable for decoding the provided file
 */
public static FeatureCodec<? extends Feature, ?> getCodecForFile( final Path featurePath ) {
    return getCodecForFile(featurePath, null);
}
 
Example #15
Source File: FeatureInput.java    From gatk with BSD 3-Clause "New" or "Revised" License 2 votes vote down vote up
/**
 * Remember the FeatureCodec class for this input the first time it is discovered so we can bypass dynamic codec
 * discovery when multiple FeatureDataSources are created for the same input.
 */
public void setFeatureCodecClass(final Class<FeatureCodec<T, ?>> featureCodecClass) {
    this.featureCodecClass = featureCodecClass;
}