org.elasticsearch.plugins.AnalysisPlugin Java Examples

The following examples show how to use org.elasticsearch.plugins.AnalysisPlugin. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AnalysisModule.java    From crate with Apache License 2.0 6 votes vote down vote up
static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List<AnalysisPlugin> plugins) {
    NamedRegistry<PreConfiguredTokenFilter> preConfiguredTokenFilters = new NamedRegistry<>("pre-configured token_filter");

    // Add filters available in lucene-core
    preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
    preConfiguredTokenFilters.register(
        "standard",
        PreConfiguredTokenFilter.singletonWithVersion("standard", false, (reader, version) -> {
            DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
                "The [standard] token filter is deprecated and will be removed in a future version.");
            return reader;
        }));
    /* Note that "stop" is available in lucene-core but it's pre-built
     * version uses a set of English stop words that are in
     * lucene-analyzers-common so "stop" is defined in the analysis-common
     * module. */

    for (AnalysisPlugin plugin: plugins) {
        for (PreConfiguredTokenFilter filter : plugin.getPreConfiguredTokenFilters()) {
            preConfiguredTokenFilters.register(filter.getName(), filter);
        }
    }
    return unmodifiableMap(preConfiguredTokenFilters.getRegistry());
}
 
Example #2
Source File: AnalysisModule.java    From crate with Apache License 2.0 6 votes vote down vote up
public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) throws IOException {
    NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = setupCharFilters(plugins);
    NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = setupHunspellDictionaries(plugins);
    HunspellService hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry());
    NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers();

    Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
    Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
    Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
    Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers = setupPreBuiltAnalyzerProviderFactories(plugins);

    analysisRegistry = new AnalysisRegistry(environment,
            charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
            analyzers.getRegistry(), normalizers.getRegistry(),
            preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
 
Example #3
Source File: AnalysisTestsHelper.java    From crate with Apache License 2.0 6 votes vote down vote up
public static ESTestCase.TestAnalysis createTestAnalysisFromSettings(
        final Settings settings,
        final Path configPath,
        final AnalysisPlugin... plugins) throws IOException {
    final Settings actualSettings;
    if (settings.get(IndexMetaData.SETTING_VERSION_CREATED) == null) {
        actualSettings = Settings.builder().put(settings).put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build();
    } else {
        actualSettings = settings;
    }
    final IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", actualSettings);
    final AnalysisRegistry analysisRegistry =
            new AnalysisModule(new Environment(actualSettings, configPath), Arrays.asList(plugins)).getAnalysisRegistry();
    return new ESTestCase.TestAnalysis(analysisRegistry.build(indexSettings),
            analysisRegistry.buildTokenFilterFactories(indexSettings),
            analysisRegistry.buildTokenizerFactories(indexSettings),
            analysisRegistry.buildCharFilterFactories(indexSettings));
}
 
Example #4
Source File: AnalysisModule.java    From crate with Apache License 2.0 5 votes vote down vote up
private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(List<AnalysisPlugin> plugins, HunspellService
    hunspellService) {
    NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
    tokenFilters.register("stop", StopTokenFilterFactory::new);
    tokenFilters.register("standard", (indexSettings, environment, name, settings) -> {
        DEPRECATION_LOGGER.deprecatedAndMaybeLog("standard_deprecation",
            "The [standard] token filter name is deprecated and will be removed in a future version.");
        return new AbstractTokenFilterFactory(indexSettings, name, settings) {
            @Override
            public TokenStream create(TokenStream tokenStream) {
                return tokenStream;
            }
        };
    });
    tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
    tokenFilters.register(
        "hunspell",
        requiresAnalysisSettings((indexSettings, env, name, settings) ->
            new HunspellTokenFilterFactory(
                indexSettings,
                name,
                settings,
                hunspellService
            )
        )
    );

    tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters);
    return tokenFilters;
}
 
Example #5
Source File: AnalysisModule.java    From crate with Apache License 2.0 5 votes vote down vote up
static Map<String, PreBuiltAnalyzerProviderFactory> setupPreBuiltAnalyzerProviderFactories(List<AnalysisPlugin> plugins) {
    NamedRegistry<PreBuiltAnalyzerProviderFactory> preConfiguredCharFilters = new NamedRegistry<>("pre-built analyzer");
    for (AnalysisPlugin plugin : plugins) {
        for (PreBuiltAnalyzerProviderFactory factory : plugin.getPreBuiltAnalyzerProviderFactories()) {
            preConfiguredCharFilters.register(factory.getName(), factory);
        }
    }
    return unmodifiableMap(preConfiguredCharFilters.getRegistry());
}
 
Example #6
Source File: AnalysisModule.java    From crate with Apache License 2.0 5 votes vote down vote up
static Map<String, PreConfiguredCharFilter> setupPreConfiguredCharFilters(List<AnalysisPlugin> plugins) {
    NamedRegistry<PreConfiguredCharFilter> preConfiguredCharFilters = new NamedRegistry<>("pre-configured char_filter");

    // No char filter are available in lucene-core so none are built in to Elasticsearch core

    for (AnalysisPlugin plugin: plugins) {
        for (PreConfiguredCharFilter filter : plugin.getPreConfiguredCharFilters()) {
            preConfiguredCharFilters.register(filter.getName(), filter);
        }
    }
    return unmodifiableMap(preConfiguredCharFilters.getRegistry());
}
 
Example #7
Source File: ESTestCase.java    From crate with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an TestAnalysis with all the default analyzers configured.
 */
public static TestAnalysis createTestAnalysis(IndexSettings indexSettings, Settings nodeSettings,
                                              AnalysisPlugin... analysisPlugins) throws IOException {
    Environment env = TestEnvironment.newEnvironment(nodeSettings);
    AnalysisModule analysisModule = new AnalysisModule(env, Arrays.asList(analysisPlugins));
    AnalysisRegistry analysisRegistry = analysisModule.getAnalysisRegistry();
    return new TestAnalysis(analysisRegistry.build(indexSettings),
        analysisRegistry.buildTokenFilterFactories(indexSettings),
        analysisRegistry.buildTokenizerFactories(indexSettings),
        analysisRegistry.buildCharFilterFactories(indexSettings));
}
 
Example #8
Source File: AnalysisModule.java    From crate with Apache License 2.0 5 votes vote down vote up
private NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> setupAnalyzers(List<AnalysisPlugin> plugins) {
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = new NamedRegistry<>("analyzer");
    analyzers.register("default", StandardAnalyzerProvider::new);
    analyzers.register("standard", StandardAnalyzerProvider::new);
    analyzers.register("simple", SimpleAnalyzerProvider::new);
    analyzers.register("stop", StopAnalyzerProvider::new);
    analyzers.register("whitespace", WhitespaceAnalyzerProvider::new);
    analyzers.register("keyword", KeywordAnalyzerProvider::new);
    analyzers.extractAndRegister(plugins, AnalysisPlugin::getAnalyzers);
    return analyzers;
}
 
Example #9
Source File: AnalysisTestsHelper.java    From crate with Apache License 2.0 5 votes vote down vote up
public static ESTestCase.TestAnalysis createTestAnalysisFromClassPath(final Path baseDir,
                                                                      final String resource,
                                                                      final AnalysisPlugin... plugins) throws IOException {
    final Settings settings = Settings.builder()
            .loadFromStream(resource, AnalysisTestsHelper.class.getResourceAsStream(resource), false)
            .put(Environment.PATH_HOME_SETTING.getKey(), baseDir.toString())
            .build();

    return createTestAnalysisFromSettings(settings, plugins);
}
 
Example #10
Source File: ESTestCase.java    From crate with Apache License 2.0 5 votes vote down vote up
/**
 * Creates an TestAnalysis with all the default analyzers configured.
 */
public static TestAnalysis createTestAnalysis(Index index, Settings nodeSettings, Settings settings,
                                              AnalysisPlugin... analysisPlugins) throws IOException {
    Settings indexSettings = Settings.builder().put(settings)
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .build();
    return createTestAnalysis(IndexSettingsModule.newIndexSettings(index, indexSettings), nodeSettings, analysisPlugins);
}
 
Example #11
Source File: AnalysisFactoryTestCase.java    From crate with Apache License 2.0 4 votes vote down vote up
public AnalysisFactoryTestCase(AnalysisPlugin plugin) {
    this.plugin = Objects.requireNonNull(plugin, "plugin is required. use an empty plugin for core");
}
 
Example #12
Source File: SQLExecutor.java    From crate with Apache License 2.0 4 votes vote down vote up
public static Builder builder(ClusterService clusterService,
                              int numNodes,
                              Random random,
                              List<AnalysisPlugin> analysisPlugins) {
    return new Builder(clusterService, numNodes, random, analysisPlugins);
}
 
Example #13
Source File: SQLExecutor.java    From crate with Apache License 2.0 4 votes vote down vote up
private Builder(ClusterService clusterService,
                int numNodes,
                Random random,
                List<AnalysisPlugin> analysisPlugins) {
    if (numNodes < 1) {
        throw new IllegalArgumentException("Must have at least 1 node");
    }
    this.random = random;
    this.clusterService = clusterService;
    addNodesToClusterState(numNodes);
    functions = getFunctions();
    UserDefinedFunctionService udfService = new UserDefinedFunctionService(clusterService, functions);
    Map<String, SchemaInfo> schemaInfoByName = new HashMap<>();
    CrateSettings crateSettings = new CrateSettings(clusterService, clusterService.getSettings());
    schemaInfoByName.put("sys", new SysSchemaInfo(clusterService, crateSettings, new CeLicenseService()));
    schemaInfoByName.put("information_schema", new InformationSchemaInfo());
    schemaInfoByName.put(PgCatalogSchemaInfo.NAME, new PgCatalogSchemaInfo(udfService, tableStats));
    IndexNameExpressionResolver indexNameExpressionResolver = new IndexNameExpressionResolver();
    schemaInfoByName.put(
        BlobSchemaInfo.NAME,
        new BlobSchemaInfo(
            clusterService,
            new TestingBlobTableInfoFactory(
                Collections.emptyMap(), indexNameExpressionResolver, createTempDir())));


    Map<RelationName, DocTableInfo> docTables = new HashMap<>();
    DocTableInfoFactory tableInfoFactory = new TestingDocTableInfoFactory(
        docTables, functions, indexNameExpressionResolver);
    ViewInfoFactory testingViewInfoFactory = (ident, state) -> null;

    schemas = new Schemas(
        schemaInfoByName,
        clusterService,
        new DocSchemaInfoFactory(tableInfoFactory, testingViewInfoFactory, functions, udfService)
    );
    schemas.start();  // start listen to cluster state changes

    File homeDir = createTempDir();
    Environment environment = new Environment(
        Settings.builder().put(PATH_HOME_SETTING.getKey(), homeDir.getAbsolutePath()).build(),
        homeDir.toPath().resolve("config")
    );
    try {
        analysisRegistry = new AnalysisModule(environment, analysisPlugins).getAnalysisRegistry();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    fulltextAnalyzerResolver = new FulltextAnalyzerResolver(clusterService, analysisRegistry);
    createTableStatementAnalyzer = new CreateTableStatementAnalyzer(functions);
    createBlobTableAnalyzer = new CreateBlobTableAnalyzer(
        schemas,
        functions
    );
    allocationService = new AllocationService(
        new AllocationDeciders(
            Arrays.asList(
                new SameShardAllocationDecider(Settings.EMPTY, clusterService.getClusterSettings()),
                new ReplicaAfterPrimaryActiveAllocationDecider()
            )
        ),
        new TestGatewayAllocator(),
        new BalancedShardsAllocator(Settings.EMPTY),
        EmptyClusterInfoService.INSTANCE
    );

    publishInitialClusterState();
}
 
Example #14
Source File: ESTestCase.java    From crate with Apache License 2.0 4 votes vote down vote up
/**
 * Creates an TestAnalysis with all the default analyzers configured.
 */
public static TestAnalysis createTestAnalysis(Index index, Settings settings, AnalysisPlugin... analysisPlugins)
        throws IOException {
    Settings nodeSettings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir()).build();
    return createTestAnalysis(index, nodeSettings, settings, analysisPlugins);
}
 
Example #15
Source File: AnalysisTestsHelper.java    From crate with Apache License 2.0 4 votes vote down vote up
public static ESTestCase.TestAnalysis createTestAnalysisFromSettings(
        final Settings settings, final AnalysisPlugin... plugins) throws IOException {
    return createTestAnalysisFromSettings(settings, null, plugins);
}
 
Example #16
Source File: AnalysisModule.java    From crate with Apache License 2.0 4 votes vote down vote up
private NamedRegistry<AnalysisProvider<TokenizerFactory>> setupTokenizers(List<AnalysisPlugin> plugins) {
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = new NamedRegistry<>("tokenizer");
    tokenizers.register("standard", StandardTokenizerFactory::new);
    tokenizers.extractAndRegister(plugins, AnalysisPlugin::getTokenizers);
    return tokenizers;
}
 
Example #17
Source File: AnalysisModule.java    From crate with Apache License 2.0 4 votes vote down vote up
public NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> setupHunspellDictionaries(List<AnalysisPlugin> plugins) {
    NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = new NamedRegistry<>("dictionary");
    hunspellDictionaries.extractAndRegister(plugins, AnalysisPlugin::getHunspellDictionaries);
    return hunspellDictionaries;
}
 
Example #18
Source File: AnalysisModule.java    From crate with Apache License 2.0 4 votes vote down vote up
private NamedRegistry<AnalysisProvider<CharFilterFactory>> setupCharFilters(List<AnalysisPlugin> plugins) {
    NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = new NamedRegistry<>("char_filter");
    charFilters.extractAndRegister(plugins, AnalysisPlugin::getCharFilters);
    return charFilters;
}