htsjdk.tribble.Feature Java Examples

The following examples show how to use htsjdk.tribble.Feature. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CosmicFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Test(dataProvider = "provideForTestCreateFuncotations")
public void testCreateFuncotations(final VariantContext variant,
                                   final ReferenceContext referenceContext,
                                   final List<Feature> featureList,
                                   final List<GencodeFuncotation> gencodeFuncotations,
                                   final List<Funcotation> expected) {

    final CosmicFuncotationFactory cosmicFuncotationFactory = new CosmicFuncotationFactory(PATH_TO_TEST_DB);

    Assert.assertEquals(
        cosmicFuncotationFactory.createFuncotationsOnVariant(
            variant,
            referenceContext,
            featureList,
            gencodeFuncotations
        ),
        expected
    );
}
 
Example #2
Source File: RefSeqCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public Feature decodeLoc(final LineIterator lineIterator) {
    final String line = lineIterator.next();
    if (line.startsWith(COMMENT_LINE_CHARACTER)){
        return null;
    }
    final String fields[] = line.split(LINE_DELIMITER);
    if (fields.length < MINIMUM_LINE_FIELD_COUNT){
        throw new TribbleException("RefSeq (decodeLoc) : Unable to parse line -> " + line + ", we expected at least 16 columns, we saw " + fields.length);
    }
    final String contig_name = fields[CONTIG_INDEX];
    try {
        return new RefSeqFeature(new SimpleInterval(contig_name, Integer.parseInt(fields[INTERVAL_LEFT_BOUND_INDEX])+1, Integer.parseInt(fields[INTERVAL_RIGHT_BOUND_INDEX])));
    //TODO maybe except for malformed simple intervals? Genome locs had that
    } catch ( NumberFormatException e ) {
        throw new UserException.MalformedFile("Could not parse location from line: " + line);
    }
}
 
Example #3
Source File: GATKVariantContextUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private void verifyFileType(
        final File resultVCFFile,
        final String outputExtension) {
    final FeatureCodec<? extends Feature, ?> featureCodec = FeatureManager.getCodecForFile(resultVCFFile.toPath());

    if (outputExtension.equals(".vcf") ||
        outputExtension.equals(".vcf.bgz") ||
        outputExtension.equals(".vcf.gz") ||
        outputExtension.equals(".tmp"))
    {
        Assert.assertEquals(featureCodec.getClass(), VCFCodec.class,
                "Wrong codec selected for file " + resultVCFFile.getAbsolutePath());
    }
    else if (outputExtension.equals(".bcf")) {
        Assert.assertEquals(featureCodec.getClass(), BCF2Codec.class,
                "Wrong codec selected for file " + resultVCFFile.getAbsolutePath());
    }
    else {
        throw new IllegalArgumentException("Unknown file extension in createVCFWriter test validation");
    }
}
 
Example #4
Source File: FeatureDataSource.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static <T extends Feature> AbstractFeatureReader<T, ?> getTribbleFeatureReader(final FeatureInput<T> featureInput, final FeatureCodec<T, ?> codec, final Function<SeekableByteChannel, SeekableByteChannel> cloudWrapper, final Function<SeekableByteChannel, SeekableByteChannel> cloudIndexWrapper) {
    Utils.nonNull(codec);
    try {
        // Must get the path to the data file from the codec here:
        final String absoluteRawPath = featureInput.getRawInputString();

        // Instruct the reader factory to not require an index. We will require one ourselves as soon as
        // a query by interval is attempted.
        final boolean requireIndex = false;

        // Only apply the wrappers if the feature input is in a remote location which will benefit from prefetching.
        if (BucketUtils.isEligibleForPrefetching(featureInput)) {
            return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, cloudWrapper, cloudIndexWrapper);
        } else {
            return AbstractFeatureReader.getFeatureReader(absoluteRawPath, null, codec, requireIndex, Utils.identityFunction(), Utils.identityFunction());
        }
    } catch (final TribbleException e) {
        throw new GATKException("Error initializing feature reader for path " + featureInput.getFeaturePath(), e);
    }
}
 
Example #5
Source File: DataSourceUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private static FeatureInput<? extends Feature> createAndRegisterFeatureInputs(final Path configFilePath,
                                                                              final Properties dataSourceProperties,
                                                                              final GATKTool funcotatorToolInstance,
                                                                              final int lookaheadFeatureCachingInBp,
                                                                              final Class<? extends Feature> featureType,
                                                                              final boolean useConfigFilePath) {
    Utils.nonNull(configFilePath);
    Utils.nonNull(dataSourceProperties);

    final String name       = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME);
    final String sourceFile = useConfigFilePath
                ? configFilePath.toUri().toString()
                : resolveFilePathStringFromKnownPath( dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE), configFilePath ).toUri().toString();

    final int lookaheadCacheSizePropertyValue = getLookAheadCacheBpPropertyValue(dataSourceProperties);
    final int lookaheadCacheSizeFinal = lookaheadCacheSizePropertyValue == -1 ? lookaheadFeatureCachingInBp : lookaheadCacheSizePropertyValue;

    logger.info( "Setting lookahead cache for data source: " + name + " : " + lookaheadCacheSizeFinal );

    // Get feature inputs by creating them with the tool instance itself.
    // This has the side effect of registering the FeatureInputs with the engine, so that they can be later queried.
    return funcotatorToolInstance.addFeatureInputsAfterInitialization(sourceFile, name, featureType, lookaheadCacheSizeFinal);
}
 
Example #6
Source File: DataSourceFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
private List<Funcotation> determineFuncotations(final VariantContext variant, final ReferenceContext referenceContext, final List<Feature> featureList, final List<GencodeFuncotation> gencodeFuncotations) {

        // Create our funcotations:
        final List<Funcotation> outputFuncotations;

        if (FuncotatorUtils.isSegmentVariantContext(variant, minBasesForValidSegment) && isSupportingSegmentFuncotation()) {
            outputFuncotations = createFuncotationsOnSegment(variant, referenceContext, featureList);
        } else {

            if (gencodeFuncotations == null) {
                outputFuncotations = createFuncotationsOnVariant(variant, referenceContext, featureList);
            } else {
                outputFuncotations = createFuncotationsOnVariant(variant, referenceContext, featureList, gencodeFuncotations);
            }
        }
        return outputFuncotations;
    }
 
Example #7
Source File: DataSourceFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Checks to see if the given featureList is compatible with this {@link DataSourceFuncotationFactory}.
 * Cues off of the feature type in the feature list and whether the given list contains any non-null features.
 * This method acts as a sanity-check before attempting to do any annotations on features.
 * If this {@link DataSourceFuncotationFactory} does not require features as per {@link #requiresFeatures()}, then
 * this method will always return {@code True}.
 * @param featureList {@link List} of {@link Feature} that might be applicable to this {@link DataSourceFuncotationFactory} for annotation.
 * @return {@code true} if the given {@code featureList} contains at least one non-null feature of type {@link #getAnnotationFeatureClass()}; {@code false} otherwise.
 */
private boolean isFeatureListCompatible(final List<Feature> featureList) {
    // Make sure these features can be annotated by this DataSourceFuncotationFactory.
    // NOTE: We only check the first non-null element of the list for feature type:

    // The feature list is compatible if we found a compatible feature
    // OR
    // if this DataSourceFuncotationFactory does not require features.
    if ( !requiresFeatures() ) {
        return true;
    }

    boolean foundCompatibleFeature = false;
    for ( final Feature f : featureList ) {
        if (f != null) {
            foundCompatibleFeature = getAnnotationFeatureClass().isAssignableFrom(f.getClass());
            break;
        }
    }
    return foundCompatibleFeature;
}
 
Example #8
Source File: FeatureManager.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Returns a List of all codecs in DISCOVERED_CODECS that claim to be able to decode the specified file
 * according to their {@link FeatureCodec#canDecode(String)} methods.
 *
 * @param featureFile file for which to find potential codecs
 * @return A List of all codecs in DISCOVERED_CODECS for which {@link FeatureCodec#canDecode(String)} returns true on the specified file
 */
private static List<FeatureCodec<? extends Feature, ?>> getCandidateCodecsForFile( final Path featureFile )  {
    final List<FeatureCodec<? extends Feature, ?>> candidateCodecs = new ArrayList<>();

    for ( final Class<?> codecClass : DISCOVERED_CODECS ) {
        try {
            final FeatureCodec<? extends Feature, ?> codec = (FeatureCodec<? extends Feature, ?>)codecClass.getDeclaredConstructor().newInstance();
            if ( codec.canDecode(featureFile.toAbsolutePath().toUri().toString()) ) {
                candidateCodecs.add(codec);
            }
        }
        catch (InstantiationException | IllegalAccessException | NoSuchMethodException | InvocationTargetException e ) {
            throw new GATKException("Unable to automatically instantiate codec " + codecClass.getName());
        }
    }

    return candidateCodecs;
}
 
Example #9
Source File: GencodeFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
/**
 * Create a {@link GencodeFuncotationFactory}.
 *
 * @param gencodeTranscriptFastaFilePath {@link Path} to the FASTA file containing the sequences of all transcripts in the Gencode data source.
 * @param version The version {@link String} of Gencode from which {@link Funcotation}s will be made.
 * @param name A {@link String} containing the name of this {@link GencodeFuncotationFactory}.
 * @param transcriptSelectionMode The {@link TranscriptSelectionMode} by which representative/verbose transcripts will be chosen for overlapping variants.
 * @param userRequestedTranscripts A {@link Set<String>} containing Gencode TranscriptIDs that the user requests to be annotated with priority over all other transcripts for overlapping variants.
 * @param annotationOverrides A {@link LinkedHashMap<String,String>} containing user-specified overrides for specific {@link Funcotation}s.
 * @param mainFeatureInput The backing {@link FeatureInput} for this {@link GencodeFuncotationFactory}, from which all {@link Funcotation}s will be created.
 * @param flankSettings Settings object containing our 5'/3' flank sizes
 * @param isDataSourceB37 If {@code true}, indicates that the data source behind this {@link GencodeFuncotationFactory} contains B37 data.
 * @param ncbiBuildVersion The NCBI build version for this {@link GencodeFuncotationFactory} (can be found in the datasource config file)
 */
public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,
                                 final String version,
                                 final String name,
                                 final TranscriptSelectionMode transcriptSelectionMode,
                                 final Set<String> userRequestedTranscripts,
                                 final LinkedHashMap<String, String> annotationOverrides,
                                 final FeatureInput<? extends Feature> mainFeatureInput,
                                 final FlankSettings flankSettings,
                                 final boolean isDataSourceB37,
                                 final String ncbiBuildVersion,
                                 final boolean isSegmentFuncotationEnabled) {
    this(gencodeTranscriptFastaFilePath, version, name,
            transcriptSelectionMode, userRequestedTranscripts, annotationOverrides, mainFeatureInput,
            flankSettings, isDataSourceB37, ncbiBuildVersion, isSegmentFuncotationEnabled,
            FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT);
}
 
Example #10
Source File: ConvertBedToTargetFile.java    From gatk-protected with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
protected Object doWork() {
    final FeatureCodec<? extends Feature, ?> codec = FeatureManager.getCodecForFile(inputBedFile);
    final Class<? extends Feature> featureType = codec.getFeatureType();
    if (BEDFeature.class.isAssignableFrom(featureType)) {
        final FeatureDataSource<? extends BEDFeature> source = new FeatureDataSource<>(inputBedFile);
        try {
            final List<Target> targets = StreamSupport.stream(source.spliterator(), false).map(ConvertBedToTargetFile::createTargetFromBEDFeature)
                    .collect(Collectors.toList());
            TargetWriter.writeTargetsToFile(outFile, targets);
        } catch (final TribbleException e) {
            throw new UserException.BadInput(String.format("'%s' has a .bed extension but does not seem to be a valid BED file.", inputBedFile.getAbsolutePath()));
        }
    } else {
        throw new UserException.BadInput(String.format("'%s' does not seem to be a BED file.", inputBedFile.getAbsolutePath()));
    }
    return "SUCCESS";
}
 
Example #11
Source File: GenomeLocParserUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testCreationFromFeature() {
    final Feature feature = new SimpleFeature("1", 1, 5);
    final GenomeLoc loc = genomeLocParser.createGenomeLoc(feature);
    Assert.assertEquals(loc.getContig(), feature.getContig());
    Assert.assertEquals(loc.getStart(), feature.getStart());
    Assert.assertEquals(loc.getStop(), feature.getEnd());
}
 
Example #12
Source File: FeatureManager.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Retrieve the data source for a particular FeatureInput. Throws an exception if the provided
 * FeatureInput is not among our discovered sources of Features.
 *
 * @param featureDescriptor FeatureInput whose data source to retrieve
 * @param <T> type of Feature in our FeatureInput
 * @return query-able data source for the provided FeatureInput, if it was found
 */
private <T extends Feature> FeatureDataSource<T> lookupDataSource( final FeatureInput<T> featureDescriptor ) {
    @SuppressWarnings("unchecked") final FeatureDataSource<T> dataSource = (FeatureDataSource<T>)featureSources.get(featureDescriptor);

    // Make sure the provided FeatureInput actually came from our tool as an @Argument-annotated field
    if ( dataSource == null ) {
        throw new GATKException(String.format("FeatureInput %s not found in feature manager's database for tool %s. " +
                                              "In order to be detected, FeatureInputs must be declared in the tool class " +
                                              "itself, a superclass of the tool class, or an @ArgumentCollection declared " +
                                              "in the tool class or a superclass. They must also be annotated as an @Argument.",
                                              featureDescriptor.getName(), toolInstanceSimpleClassName));
    }

    return dataSource;
}
 
Example #13
Source File: FeatureWalker.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@SuppressWarnings("unchecked")
private void initializeDrivingFeatures() {
    final File drivingFile = getDrivingFeatureFile();
    final FeatureCodec<? extends Feature, ?> codec = FeatureManager.getCodecForFile(drivingFile.toPath());
    if (isAcceptableFeatureType(codec.getFeatureType())) {
        drivingFeatures = new FeatureDataSource<>(new FeatureInput<>(drivingFile.getAbsolutePath()), FeatureDataSource.DEFAULT_QUERY_LOOKAHEAD_BASES, null, cloudPrefetchBuffer, cloudIndexPrefetchBuffer, referenceArguments.getReferencePath());

        final FeatureInput<F> drivingFeaturesInput = new FeatureInput<>(drivingFile.getAbsolutePath(), "drivingFeatureFile");
        features.addToFeatureSources(0, drivingFeaturesInput, codec.getFeatureType(), cloudPrefetchBuffer, cloudIndexPrefetchBuffer,
                                     referenceArguments.getReferencePath());
    } else {
        throw new UserException("File " + drivingFile + " contains features of the wrong type.");
    }
}
 
Example #14
Source File: DataSourceUtils.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Create a {@link VcfFuncotationFactory} from filesystem resources and field overrides.
 * @param dataSourceFile {@link Path} to the data source file.  Must not be {@code null}.
 * @param dataSourceProperties {@link Properties} consisting of the contents of the config file for the data source.  Must not be {@code null}.
 * @param annotationOverridesMap {@link LinkedHashMap}{@code <String->String>} containing any annotation overrides to be included in the resulting data source.  Must not be {@code null}.
 * @param featureInput The {@link FeatureInput<? extends Feature>} object for the VCF data source we are creating.
 * @param minBasesForValidSegment The minimum number of bases for a segment to be considered valid.
 * @return A new {@link GencodeFuncotationFactory} based on the given data source file information, field overrides map, and transcript information.
 */
private static VcfFuncotationFactory createVcfDataSource(final Path dataSourceFile,
                                                         final Properties dataSourceProperties,
                                                         final LinkedHashMap<String, String> annotationOverridesMap,
                                                         final FeatureInput<? extends Feature> featureInput,
                                                         final int minBasesForValidSegment) {

    Utils.nonNull(dataSourceFile);
    Utils.nonNull(dataSourceProperties);
    Utils.nonNull(annotationOverridesMap);

    // Get some metadata:
    final String name       = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_NAME);
    final String srcFile    = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_SRC_FILE);
    final String version    = dataSourceProperties.getProperty(CONFIG_FILE_FIELD_NAME_VERSION);
    final boolean isB37     = getIsB37PropertyValue(dataSourceProperties);

    // Create our VCF factory:
    return new VcfFuncotationFactory(
            name,
            version,
            resolveFilePathStringFromKnownPath(srcFile, dataSourceFile),
            annotationOverridesMap,
            featureInput,
            isB37,
            minBasesForValidSegment
    );
}
 
Example #15
Source File: ProgressReportingDelegatingCodec.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
public Feature decodeLoc(final B b) throws IOException {
    if (delegatee == null) {
        throw new IllegalStateException("this codec cannot be used without a delegatee.");
    }
    if (!pm.started()) {
        pm.start();
    }
    final Feature f = delegatee.decodeLoc(b);
    pm.update(f);
    return f;
}
 
Example #16
Source File: VcfFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testCacheOnObjectReference(){
    // This code is a bit complex, since the cache is based exclusively on object references.  That works great in Funcotator,
    //  but not as great in the general case (incl. autotests)
    // We do not care so much about the content of each variant context.  We change the position to control whether
    //  there is a cache hit or not.
    // Please note that this test does not actually test the content of the funcotations.  Just whether the cache
    //  was set to the appropriate size and that the hit/miss counters are being maintained properly.

    // Create dummy data.  Remember that since the cache is based on reference, we always have to index into this list.
    final List<String> alleles = Arrays.asList("G", "C", "T");
    final List<Triple<VariantContext, ReferenceContext, List<Feature>>> dummyTriples = IntStream.range(0, VcfFuncotationFactory.LRUCache.MAX_ENTRIES + 1)
            .boxed().map(i -> createDummyCacheTriples(alleles, i)).collect(Collectors.toList());

    // Create our funcotation factory to test
    final VcfFuncotationFactory vcfFuncotationFactory =
            createVcfFuncotationFactory(FACTORY_NAME, FACTORY_VERSION, IOUtils.getPath(EXAC_SNIPPET));

    for (int i = 0; i < VcfFuncotationFactory.LRUCache.MAX_ENTRIES; i++) {
        funcotateForCacheTest(vcfFuncotationFactory, dummyTriples.get(i));
        Assert.assertEquals(vcfFuncotationFactory.cacheHits, 0);
        Assert.assertEquals(vcfFuncotationFactory.cacheMisses, i+1);  // Should match the number of times createFuncotationOnVariant was called.
    }
    // We will get one more miss in this loop, since [0] will have been purged from the cache.  We will test this below.
    for (int i = 0; i < (VcfFuncotationFactory.LRUCache.MAX_ENTRIES + 1); i++) {
        funcotateForCacheTest(vcfFuncotationFactory, dummyTriples.get(i));
    }
    Assert.assertEquals(vcfFuncotationFactory.cacheHits, VcfFuncotationFactory.LRUCache.MAX_ENTRIES);

    // This should be another miss, since the variant at index = 0 should no longer be in the cache.
    funcotateForCacheTest(vcfFuncotationFactory, dummyTriples.get(0));
    Assert.assertEquals(vcfFuncotationFactory.cacheMisses, (VcfFuncotationFactory.LRUCache.MAX_ENTRIES + 2));
}
 
Example #17
Source File: FeatureInputUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testNullOKAsFeatureName() {
    final FeatureInput<Feature> featureInput = runCommandLineWithTaggedFeatureInput("argName:null", "myFile");

    Assert.assertEquals(featureInput.getFeaturePath(), "myFile", "Wrong File in FeatureInput");
    Assert.assertEquals(featureInput.getName(), "null", "Wrong name in FeatureInput");
}
 
Example #18
Source File: FeatureInputUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@SuppressWarnings("unchecked")
private FeatureInput<VariantContext> getVariantFeatureInputWithCachedCodec() {
    final File inputVCFFile = new File(FEATURE_INPUT_TEST_DIRECTORY, "minimal_vcf4_file.vcf");
    final FeatureInput<VariantContext> featureInput = new FeatureInput<>(inputVCFFile.getAbsolutePath());
    Assert.assertNull(featureInput.getFeatureCodecClass());

    final FeatureCodec<? extends Feature, ?> codec = FeatureManager.getCodecForFile(featureInput.toPath());
    featureInput.setFeatureCodecClass((Class<FeatureCodec<VariantContext, ?>>)codec.getClass());

    return featureInput;
}
 
Example #19
Source File: IntervalStratification.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
public List<Object> getRelevantStates(ReferenceContext referenceContext, ReadsContext readsContext, FeatureContext featureContext, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName, String FamilyName) {
    if (eval != null) {
        List<Feature> overlapping = featureContext.getValues(getVariantEvalWalker().intervalsFile);
        if ( !overlapping.isEmpty() )
            return OVERLAPPING;
        else
            return NOT_OVERLAPPING;
    }

    return Collections.emptyList();
}
 
Example #20
Source File: LocatableXsvFuncotationFactoryUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "provideForTestCreateFuncotations")
public void testCreateFuncotations(final VariantContext variant,
                                   final ReferenceContext referenceContext,
                                   final List<String> reportableFuncotationFieldNames,
                                   final List<Feature> featureList,
                                   final List<GencodeFuncotation> gencodeFuncotations,
                                   final List<Funcotation> expected) {

    // Create a temporary file for the "backing data" which will only contain the header:
    final Path headerBackingDataFilePath = createTempPath("headerBackingDataFile", "csv");
    final Path configFilePath;
    try {
        Files.write(headerBackingDataFilePath, ("CONTIG,START,END," + reportableFuncotationFieldNames.stream().collect(Collectors.joining(","))).getBytes());

        // Create a temporary file for the config file that points to the temporary file for the backing data:
        configFilePath = createTemporaryConfigFile(headerBackingDataFilePath);
    }
    catch (final IOException ex) {
        throw new GATKException("Could not write to temp file for testing: " + headerBackingDataFilePath.toUri(), ex);
    }

    final FeatureInput<? extends Feature> featureInput                   = FeatureInputTestTools.createFeatureInput( configFilePath.toUri().toString(), defaultDataSourceName );
    final LocatableXsvFuncotationFactory  locatableXsvFuncotationFactory = new LocatableXsvFuncotationFactory(defaultDataSourceName, DataSourceFuncotationFactory.DEFAULT_VERSION_STRING, new LinkedHashMap<>(), featureInput);
    locatableXsvFuncotationFactory.setSupportedFuncotationFields(headerBackingDataFilePath);

    Assert.assertEquals(
            locatableXsvFuncotationFactory.createFuncotationsOnVariant( variant, referenceContext, featureList ),
            expected
    );

    Assert.assertEquals(
            locatableXsvFuncotationFactory.createFuncotationsOnVariant( variant, referenceContext, featureList, gencodeFuncotations ),
            expected
    );
}
 
Example #21
Source File: VariantFiltration.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Add mask to variant context filters if it covers its location
 * @return VariantContext with the mask added if the VariantContext is within the extended mask area
 */
private VariantContext addMaskIfCoversVariant(final VariantContext vc, final FeatureContext featureContext) {
    final List<Feature> maskVariants = featureContext.getValues(mask, maskExtension, maskExtension);

    final boolean variantsMasked = maskVariants.isEmpty() == filterRecordsNotInMask;
    if (variantsMasked) {
        final Set<String> oldFiltersPlusNewOne = Sets.union(vc.getFilters(), singleton(maskName));
        return new VariantContextBuilder(vc).filters(oldFiltersPlusNewOne).make();
    } else {
        return vc;
    }
}
 
Example #22
Source File: FeatureInputUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testNullOKAsFileName() {
    final FeatureInput<Feature> featureInput = runCommandLineWithTaggedFeatureInput("argName:myName", "null");

    Assert.assertEquals(featureInput.getFeaturePath(), "null", "Wrong File in FeatureInput");
    Assert.assertEquals(featureInput.getName(), "myName", "Wrong name in FeatureInput");
}
 
Example #23
Source File: FeatureInputUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test(dataProvider = "ValidFileOnlyFeatureArgumentValuesDataProvider")
public void testNoFeatureNameSpecified(final String validFileOnlyFeatureArgumentValue) {
    FeatureInput<Feature> featureInput = runCommandLineWithTaggedFeatureInput("argName", validFileOnlyFeatureArgumentValue);

    Assert.assertEquals(featureInput.getFeaturePath(), validFileOnlyFeatureArgumentValue, "Wrong File in FeatureInput");
    // Name should default to the absolute path of the File when no name is specified
    Assert.assertEquals(featureInput.getName(), new File(validFileOnlyFeatureArgumentValue).getAbsolutePath(), "Wrong default name in FeatureInput");
}
 
Example #24
Source File: FuncotateSegments.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
protected <T extends Feature> SimpleInterval makeFeatureInterval(final T feature) {
    if (funcotatorArgs.referenceVersion.equals(BaseFuncotatorArgumentCollection.FuncotatorReferenceVersionHg19)) {
        return new SimpleInterval(FuncotatorUtils.convertB37ContigToHg19Contig(feature.getContig()), feature.getStart(), feature.getEnd());
    } else {
        return new SimpleInterval(feature);
    }
}
 
Example #25
Source File: FeatureInputUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testFeatureKeyValuePairsSpecifiedSameValue() {
    final FeatureInput<Feature> featureInput = runCommandLineWithTaggedFeatureInput("argName:myName,key1=value,key2=value", "myFile");

    Assert.assertEquals(featureInput.getAttribute("key1"), "value", "wrong attribute value for key1");
    Assert.assertEquals(featureInput.getAttribute("key2"), "value", "wrong attribute value for key2");

    Assert.assertEquals(featureInput.getName(), "myName");
    Assert.assertEquals(featureInput.getFeaturePath(), "myFile");
}
 
Example #26
Source File: DataSourceFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Creates a {@link List} of {@link Funcotation} for the given {@code variant}, {@code referenceContext}, {@code featureContext}, and {@code gencodeFuncotations}.
 * For some Data Sources knowledge of Gene Name or Transcript ID is required for annotation.
 * Accounts for override values passed into the constructor as well.
 * @param variant {@link VariantContext} to annotate.  Never {@code null}.
 * @param referenceContext {@link ReferenceContext} corresponding to the given {@code variant}.  Never {@code null}.
 * @param featureContext {@link FeatureContext} corresponding to the variant.  Never {@code null}.
 * @param gencodeFuncotations {@link List} of {@link GencodeFuncotation} that have already been created for the given {@code variant}/{@code referenceContext}/{@code featureContext}.
 *   {@code null} is acceptable if there are no corresponding gencode funcotations.
 * @return {@link List} of {@link Funcotation} given the {@code variant}, {@code referenceContext}, and {@code featureContext}.  This should never be empty.
 */
public List<Funcotation> createFuncotations(final VariantContext variant, final ReferenceContext referenceContext, final FeatureContext featureContext, final List<GencodeFuncotation> gencodeFuncotations) {

    Utils.nonNull(variant);
    Utils.nonNull(referenceContext);
    Utils.nonNull(featureContext);

    final List<Funcotation> outputFuncotations;

    // Query this funcotation factory to get the list of overlapping features.
    // NOTE: This will only get features that are LOCATABLE!
    //       This corresponds to requiresFeatures() returning `True`.
    final List<Feature> featureList = getFeaturesFromFeatureContext(featureContext);

    // If our featureList is compatible with this DataSourceFuncotationFactory, then we make our funcotations:
    if ( isFeatureListCompatible(featureList) ) {
        outputFuncotations = determineFuncotations(variant, referenceContext, featureList, gencodeFuncotations);

        // Set our overrides:
        setOverrideValuesInFuncotations(outputFuncotations);
    }
    else {
        return createDefaultFuncotationsOnVariant(variant, referenceContext);
    }

    if ((outputFuncotations == null) || (outputFuncotations.size() == 0)) {
        return createDefaultFuncotationsOnVariant(variant, referenceContext);
    } else {
        return outputFuncotations;
    }
}
 
Example #27
Source File: DataSourceFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * Queries the provided FeatureContext for Features from our FeatureInput {@link #mainSourceFileAsFeatureInput}.
 * The default implementation returns all Features from our FeatureInput that overlap the FeatureContext's
 * interval, but subclasses may override (for example, to pad the query).
 *
 * @param featureContext the FeatureContext to query
 * @return Features from our FeatureInput {@link #mainSourceFileAsFeatureInput} queried from the FeatureContext
 */
@SuppressWarnings("unchecked")
private List<Feature> queryFeaturesFromFeatureContext(final FeatureContext featureContext) {
    final List<Feature> features;

    SimpleInterval queryInterval = featureContext.getInterval();

    // Do we need to do a fuzzy hg19 / b37 conversion for querying our features:
    if ( dataSourceIsB37 ) {
        // Create a B37 interval:
        queryInterval = new SimpleInterval(
                        FuncotatorUtils.convertHG19ContigToB37Contig(queryInterval.getContig()),
                        queryInterval.getStart(),
                        queryInterval.getEnd()
                );
    }

    // Perform extra transformations on the query interval:
    queryInterval = transformFeatureQueryInterval(queryInterval);

    // If the interval has not changed, we should use the original one:
    if ( queryInterval.equals(featureContext.getInterval() ) ) {    // Get the features:
        features = (List<Feature>) featureContext.getValues(mainSourceFileAsFeatureInput);
    }
    else {
        // Query as normal:
        features = (List<Feature>) featureContext.getValues(mainSourceFileAsFeatureInput, queryInterval);
    }

    return features;
}
 
Example #28
Source File: FuncotationMapUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
private static List<GencodeFuncotation> createGencodeFuncotations(final String contig, final int start, final int end, final String ref, final String alt, final String referenceFileName, final ReferenceDataSource referenceDataSource, final FeatureReader<GencodeGtfFeature> featureReader, final String transcriptFastaFile, final String transcriptGtfFile, final TranscriptSelectionMode transcriptSelectionMode) {
    final SimpleInterval variantInterval = new SimpleInterval( contig, start, end );
    final VariantContext variantContext = createVariantContext(contig, start, end, ref, alt, referenceFileName);

    final ReferenceContext referenceContext = new ReferenceContext(referenceDataSource, variantInterval );

    // Get our gene feature iterator:
    final CloseableTribbleIterator<GencodeGtfFeature> gtfFeatureIterator;
    try {
        gtfFeatureIterator = featureReader.query(variantContext.getContig(), variantContext.getStart(), variantContext.getEnd());
    }
    catch (final IOException ex) {
        throw new GATKException("Could not finish the test!", ex);
    }
    final List<Feature> featureList = Collections.singletonList(gtfFeatureIterator.next());

    final String gencode_test = "GENCODE_TEST";
    final GencodeFuncotationFactory gencodeFactory = new GencodeFuncotationFactory(Paths.get(transcriptFastaFile),
    "TEST", gencode_test, transcriptSelectionMode, new HashSet<>(), new LinkedHashMap<>(),
            new FeatureInput<>(transcriptGtfFile, gencode_test, Collections.emptyMap()), "TEST");

    final FeatureContext featureContext = FuncotatorTestUtils.createFeatureContext(Collections.singletonList(gencodeFactory), "FuncotationMapUnitTest",
            variantInterval, 0, 0, 0, null);

    return gencodeFactory.createFuncotations(variantContext, referenceContext, featureContext).stream()
        .map(f -> (GencodeFuncotation) f).collect(Collectors.toList());
}
 
Example #29
Source File: VcfFuncotationFactory.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
/**
 * {@inheritDoc}
 * {@link VcfFuncotationFactory} can be used with or without Gencode annotations.
 */
protected List<Funcotation> createFuncotationsOnVariant(final VariantContext variant, final ReferenceContext referenceContext, final List<Feature> featureList, final List<GencodeFuncotation> gencodeFuncotations) {
    return createFuncotationsOnVariant(variant, referenceContext, featureList);
}
 
Example #30
Source File: FeatureInputUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testFeatureNameSpecified() {
    final FeatureInput<Feature> featureInput = runCommandLineWithTaggedFeatureInput("argName:myName", "myFile");

    Assert.assertEquals(featureInput.getFeaturePath(), "myFile", "Wrong File in FeatureInput");
    Assert.assertEquals(featureInput.getName(), "myName", "Wrong name in FeatureInput");
}