org.elasticsearch.index.analysis.TokenizerFactory Java Examples

The following examples show how to use org.elasticsearch.index.analysis.TokenizerFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CommonAnalysisPlugin.java    From crate with Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisProvider<TokenizerFactory>> tokenizers = new TreeMap<>();
    tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new);
    tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new);
    tokenizers.put("thai", ThaiTokenizerFactory::new);
    tokenizers.put("ngram", NGramTokenizerFactory::new);
    tokenizers.put("edge_ngram", EdgeNGramTokenizerFactory::new);
    tokenizers.put("char_group", CharGroupTokenizerFactory::new);
    tokenizers.put("classic", ClassicTokenizerFactory::new);
    tokenizers.put("letter", LetterTokenizerFactory::new);
    tokenizers.put("lowercase", LowerCaseTokenizerFactory::new);
    tokenizers.put("path_hierarchy", PathHierarchyTokenizerFactory::new);
    tokenizers.put("PathHierarchy", PathHierarchyTokenizerFactory::new);
    tokenizers.put("pattern", PatternTokenizerFactory::new);
    tokenizers.put("uax_url_email", UAX29URLEmailTokenizerFactory::new);
    tokenizers.put("whitespace", WhitespaceTokenizerFactory::new);
    tokenizers.put("keyword", KeywordTokenizerFactory::new);
    return tokenizers;
}
 
Example #2
Source File: AnalysisModule.java    From crate with Apache License 2.0 6 votes vote down vote up
public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) throws IOException {
    NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = setupCharFilters(plugins);
    NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = setupHunspellDictionaries(plugins);
    HunspellService hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry());
    NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers();

    Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
    Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
    Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
    Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers = setupPreBuiltAnalyzerProviderFactories(plugins);

    analysisRegistry = new AnalysisRegistry(environment,
            charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
            analyzers.getRegistry(), normalizers.getRegistry(),
            preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
 
Example #3
Source File: IcuAnalysisTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
public void testDefaultsIcuAnalysis() throws IOException {

        TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY,
                new BundlePlugin(Settings.EMPTY));

        CharFilterFactory charFilterFactory = analysis.charFilter.get("icu_normalizer");
        assertThat(charFilterFactory, instanceOf(IcuNormalizerCharFilterFactory.class));

        TokenizerFactory tf = analysis.tokenizer.get("icu_tokenizer");
        assertThat(tf, instanceOf(IcuTokenizerFactory.class));

        TokenFilterFactory filterFactory = analysis.tokenFilter.get("icu_normalizer");
        assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class));

        filterFactory = analysis.tokenFilter.get("icu_folding");
        assertThat(filterFactory, instanceOf(IcuFoldingTokenFilterFactory.class));

        filterFactory = analysis.tokenFilter.get("icu_transform");
        assertThat(filterFactory, instanceOf(IcuTransformTokenFilterFactory.class));

        Analyzer analyzer = analysis.indexAnalyzers.get( "icu_collation");
        assertThat(analyzer, instanceOf(NamedAnalyzer.class));
    }
 
Example #4
Source File: IkESPluginTest.java    From es-ik with Apache License 2.0 6 votes vote down vote up
@Test
public void testDefaultsIcuAnalysis() {
    Index index = new Index("test");

    Settings settings = ImmutableSettings.settingsBuilder()
            .put("path.home", "none")
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .build();

    Injector parentInjector = new ModulesBuilder().add(new SettingsModule(ImmutableSettings.EMPTY), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
    Injector injector = new ModulesBuilder().add(
            new IndexSettingsModule(index, settings),
            new IndexNameModule(index),
            new AnalysisModule(ImmutableSettings.EMPTY, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new IKAnalysisBinderProcessor()))
            .createChildInjector(parentInjector);

    AnalysisService analysisService = injector.getInstance(AnalysisService.class);

    TokenizerFactory tokenizerFactory = analysisService.tokenizer("ik_tokenizer");
    MatcherAssert.assertThat(tokenizerFactory, instanceOf(IKTokenizerFactory.class));


}
 
Example #5
Source File: AnalysisJiebaPlugin.java    From elasticsearch-jieba-plugin with MIT License 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
  Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();

  extra.put("jieba_search", JiebaTokenizerFactory::getJiebaSearchTokenizerFactory);
  extra.put("jieba_index", JiebaTokenizerFactory::getJiebaIndexTokenizerFactory);

  return extra;
}
 
Example #6
Source File: MockKeywordPlugin.java    From crate with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return singletonMap("keyword", (indexSettings, environment, name, settings) -> {
        class Factory implements TokenizerFactory {

            @Override
            public Tokenizer create() {
                return new MockTokenizer(MockTokenizer.KEYWORD, false);
            }
        }
        return new Factory();
    });
}
 
Example #7
Source File: ESTestCase.java    From crate with Apache License 2.0 5 votes vote down vote up
public TestAnalysis(IndexAnalyzers indexAnalyzers,
                    Map<String, TokenFilterFactory> tokenFilter,
                    Map<String, TokenizerFactory> tokenizer,
                    Map<String, CharFilterFactory> charFilter) {
    this.indexAnalyzers = indexAnalyzers;
    this.tokenFilter = tokenFilter;
    this.tokenizer = tokenizer;
    this.charFilter = charFilter;
}
 
Example #8
Source File: BundlePlugin.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new LinkedHashMap<>();
    if (settings.getAsBoolean("plugins.xbib.icu.enabled", true)) {
        extra.put("icu_collation_tokenizer", IcuCollationTokenizerFactory::new);
        extra.put("icu_tokenizer", IcuTokenizerFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.hyphen.enabled", true)) {
        extra.put("hyphen", HyphenTokenizerFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.naturalsort.enabled", true)) {
        extra.put("naturalsort", NaturalSortKeyTokenizerFactory::new);
    }
    return extra;
}
 
Example #9
Source File: SortformAnalyzerProvider.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void build(final Map<String, TokenizerFactory> tokenizers,
                  final Map<String, CharFilterFactory> charFilters,
                  final Map<String, TokenFilterFactory> tokenFilters) {
    List<CharFilterFactory> myCharFilters = new ArrayList<>();
    List<String> charFilterNames = analyzerSettings.getAsList("char_filter");
    for (String charFilterName : charFilterNames) {
        CharFilterFactory charFilter = charFilters.get(charFilterName);
        if (charFilter == null) {
            throw new IllegalArgumentException("Sortform Analyzer [" + name() +
                    "] failed to find char_filter under name [" + charFilterName + "]");
        }
        myCharFilters.add(charFilter);
    }
    List<TokenFilterFactory> myTokenFilters = new ArrayList<>();
    List<String> tokenFilterNames = analyzerSettings.getAsList("filter");
    for (String tokenFilterName : tokenFilterNames) {
        TokenFilterFactory tokenFilter = tokenFilters.get(tokenFilterName);
        if (tokenFilter == null) {
            throw new IllegalArgumentException("Sortform Analyzer [" + name() +
                    "] failed to find filter under name [" + tokenFilterName + "]");
        }
        myTokenFilters.add(tokenFilter);
    }
    int positionOffsetGap = analyzerSettings.getAsInt("position_offset_gap", 0);
    int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);
    this.customAnalyzer = new CustomAnalyzer(name(), tokenizerFactory,
            myCharFilters.toArray(new CharFilterFactory[myCharFilters.size()]),
            myTokenFilters.toArray(new TokenFilterFactory[myTokenFilters.size()]),
            positionOffsetGap,
            offsetGap
    );
}
 
Example #10
Source File: HyphenAnalyzerProvider.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public void build(final Map<String, TokenizerFactory> tokenizers,
                  final Map<String, CharFilterFactory> charFilters,
                  final Map<String, TokenFilterFactory> tokenFilters) {
    List<CharFilterFactory> myCharFilters = new ArrayList<>();
    List<String> charFilterNames = analyzerSettings.getAsList("char_filter");
    for (String charFilterName : charFilterNames) {
        CharFilterFactory charFilter = charFilters.get(charFilterName);
        if (charFilter == null) {
            throw new IllegalArgumentException("hyphen analyzer [" + name()
                    + "] failed to find char_filter under name [" + charFilterName + "]");
        }
        myCharFilters.add(charFilter);
    }
    List<TokenFilterFactory> myTokenFilters = new ArrayList<>();
    myTokenFilters.add(tokenFilterFactory);
    List<String> tokenFilterNames = analyzerSettings.getAsList("filter");
    for (String tokenFilterName : tokenFilterNames) {
        TokenFilterFactory tokenFilter = tokenFilters.get(tokenFilterName);
        if (tokenFilter == null) {
            throw new IllegalArgumentException("hyphen analyzer [" + name()
                    + "] failed to find filter under name [" + tokenFilterName + "]");
        }
        myTokenFilters.add(tokenFilter);
    }
    int positionOffsetGap = analyzerSettings.getAsInt("position_offset_gap", 0);
    int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);
    this.customAnalyzer = new CustomAnalyzer(name(), tokenizerFactory,
            myCharFilters.toArray(new CharFilterFactory[myCharFilters.size()]),
            myTokenFilters.toArray(new TokenFilterFactory[myTokenFilters.size()]),
            positionOffsetGap,
            offsetGap
    );
}
 
Example #11
Source File: ChineseWordPlugin.java    From word with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();
    extra.put("word_tokenizer", ChineseWordTokenizerFactory::new);
    extra.put("word_sentence", ChineseWordTokenizerFactory::new);
    return extra;
}
 
Example #12
Source File: BosonNLPIndicesAnalysis.java    From elasticsearch-analysis-bosonnlp with Apache License 2.0 5 votes vote down vote up
@Inject    
public BosonNLPIndicesAnalysis(final Settings settings, IndicesAnalysisService indicesAnalysisService) {
    super(settings);
    // Get all the arguments from settings
    this.TAG_URL = settings.get("API_URL", "").toString();
    this.BOSONNLP_API_TOKEN = settings.get("API_TOKEN", "").toString();
    this.spaceMode = Integer.parseInt(settings.get("space_mode", "0"));
    this.oovLevel = Integer.parseInt(settings.get("oov_level", "3"));
    this.t2s = Integer.parseInt(settings.get("t2s", "0"));
    this.specialCharConv = Integer.parseInt(settings.get("spechial_char_conv", "0"));
    
    // Register the bosonnlp type analyzer
    indicesAnalysisService.analyzerProviderFactories().put("bosonnlp", 
            new PreBuiltAnalyzerProviderFactory("bosonnlp", AnalyzerScope.GLOBAL, 
                    new BosonNLPAnalyzer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv)));
    
    // Register the bosonnlp type tokenizer
    indicesAnalysisService.tokenizerFactories().put("bosonnlp", 
            new PreBuiltTokenizerFactoryFactory(new TokenizerFactory(){

                @Override
                public String name() {
                    return "bosonnlp";
                }

                @Override
                public Tokenizer create() {
                    BosonNLPTokenizer BToken = null;
                    try {
                        BToken = new BosonNLPTokenizer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv);
                    } catch (JSONException | IOException | UnirestException e) {

                        e.printStackTrace();
                    }
                    return BToken;
                }
    			
            }));
		
}
 
Example #13
Source File: AnalysisLcPinyinPlugin.java    From elasticsearch-analysis-lc-pinyin with Artistic License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> providerMap
            = new HashMap<String, AnalysisModule.AnalysisProvider<TokenizerFactory>>();

    providerMap.put("lc_index", LcPinyinTokenizerFactory::getLcIndexTokenizerFactory);
    providerMap.put("lc_search", LcPinyinTokenizerFactory::getLcSmartPinyinTokenizerFactory);

    return providerMap;
}
 
Example #14
Source File: MynlpPlugin.java    From mynlp with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>(16);

    extra.put("mynlp", MynlpTokenizerFactory::new);
    extra.put("mynlp-core", MynlpTokenizerFactory::new);
    if (enableCws) {
        extra.put("mynlp-cws", MynlpTokenizerFactory::new);
    }

    return extra;
}
 
Example #15
Source File: AnalysisHanLPPlugin.java    From elasticsearch-analysis-hanlp with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    HashMap<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> tokenizers = new HashMap<>();
    tokenizers.put("hanlp", HanLPTokenizerFactory::createStandard);
    tokenizers.put("hanlp-standard", HanLPTokenizerFactory::createStandard);
    tokenizers.put("hanlp-nlp", HanLPTokenizerFactory::createNLP);
    tokenizers.put("hanlp-index", HanLPIndexAnalyzerFactory::new);
    tokenizers.put("hanlp-nshort", HanLPTokenizerFactory::createNShort);
    tokenizers.put("hanlp-shortest", HanLPTokenizerFactory::createShortest);
    tokenizers.put("hanlp-crf", HanLPTokenizerFactory::createCRF);
    tokenizers.put("hanlp-speed", HanLPTokenizerFactory::createSpeed);
    return tokenizers;
}
 
Example #16
Source File: AnalysisHanLPPlugin.java    From elasticsearch-analysis-hanlp with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();

    extra.put("hanlp", HanLPTokenizerFactory::getHanLPTokenizerFactory);
    extra.put("hanlp_standard", HanLPTokenizerFactory::getHanLPStandardTokenizerFactory);
    extra.put("hanlp_index", HanLPTokenizerFactory::getHanLPIndexTokenizerFactory);
    extra.put("hanlp_nlp", HanLPTokenizerFactory::getHanLPNLPTokenizerFactory);
    extra.put("hanlp_n_short", HanLPTokenizerFactory::getHanLPNShortTokenizerFactory);
    extra.put("hanlp_dijkstra", HanLPTokenizerFactory::getHanLPDijkstraTokenizerFactory);
    extra.put("hanlp_crf", HanLPTokenizerFactory::getHanLPCRFTokenizerFactory);
    extra.put("hanlp_speed", HanLPTokenizerFactory::getHanLPSpeedTokenizerFactory);

    return extra;
}
 
Example #17
Source File: AnalysiaHLSegPlugin.java    From elasticsearch-analysis-hlseg with Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();

    extra.put("hlseg_search", HLSegTokenizerFactory::getHLSegSearchTokenizerFactory);
   
    return extra;
}
 
Example #18
Source File: KomoranPlugin.java    From KOMORAN with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return singletonMap("komoran-tokenizer", KomoranTokenizerFactory::new);
}
 
Example #19
Source File: SynonymPlugin.java    From elasticsearch-analysis-synonym with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return singletonMap("ngram_synonym", NGramSynonymTokenizerFactory::new);
}
 
Example #20
Source File: SynonymTokenFilterFactory.java    From elasticsearch-analysis-synonym with Apache License 2.0 4 votes vote down vote up
public SynonymTokenFilterFactory(final IndexSettings indexSettings, final Environment environment, final String name, final Settings settings,
        final AnalysisRegistry analysisRegistry) throws IOException {
    super(indexSettings, name, settings);

    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    final boolean expand = settings.getAsBoolean("expand", true);

    final String tokenizerName = settings.get("tokenizer", "whitespace");

    AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = null;
    if (analysisRegistry != null) {
        tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
        if (tokenizerFactoryFactory == null) {
            throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
        }
    }

    final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory == null ? null
            : tokenizerFactoryFactory.get(indexSettings, environment, tokenizerName, AnalysisRegistry
                    .getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));

    final Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(final String fieldName) {
            final Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer() : tokenizerFactory.create();
            final TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };

    synonymLoader = new SynonymLoader(environment, settings, expand, analyzer);
    if (synonymLoader.getSynonymMap() == null) {
        if (settings.getAsList("synonyms", null) != null) {
            logger.warn("synonyms values are empty.");
        } else if (settings.get("synonyms_path") != null) {
            logger.warn("synonyms_path[{}] is empty.", settings.get("synonyms_path"));
        } else {
            throw new IllegalArgumentException("synonym requires either `synonyms` or `synonyms_path` to be configured");
        }
    }
}
 
Example #21
Source File: AnalysisURLPlugin.java    From elasticsearch-analysis-url with Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return singletonMap("url", URLTokenizerFactory::new);
}
 
Example #22
Source File: StandardnumberAnalyzer.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
public StandardnumberAnalyzer(TokenizerFactory tokenizerFactory,
                              StandardnumberTokenFilterFactory stdnumTokenFilterFactory) {
    this.tokenizerFactory = tokenizerFactory;
    this.stdnumTokenFilterFactory = stdnumTokenFilterFactory;
}
 
Example #23
Source File: DynamicSynonymTokenFilterFactory.java    From elasticsearch-analysis-dynamic-synonym with Apache License 2.0 4 votes vote down vote up
public DynamicSynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
                                        String name, Settings settings) throws IOException {

    //加载配置
    super(indexSettings, name, settings);
    this.indexName = indexSettings.getIndex().getName();
    this.interval = settings.getAsInt("interval", 60);
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    this.expand = settings.getAsBoolean("expand", true);
    this.format = settings.get("format", "");
    this.location = settings.get("synonyms_path");

    logger.info("indexName:{} synonyms_path:{} interval:{} ignore_case:{} expand:{} format:{}",
            indexName, location, interval, ignoreCase, expand, format);

    //属性检查
    if (this.location == null) {
        throw new IllegalArgumentException(
                "dynamic synonym requires `synonyms_path` to be configured");
    }

    String tokenizerName = settings.get("tokenizer", "whitespace");
    AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
            analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
    if (tokenizerFactoryFactory == null) {
        throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
    }
    final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName,
            AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer() : tokenizerFactory.create();
            TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };


    //根据location前缀初始化同义词更新策略
    SynonymFile synonymFile;
    if (location.startsWith("http://")) {
        synonymFile = new RemoteSynonymFile(env, analyzer, expand, format, location);
    } else {
        synonymFile = new LocalSynonymFile(env, analyzer, expand, format, location);
    }
    synonymMap = synonymFile.reloadSynonymMap();

    //加入监控队列,定时load
    scheduledFutures.putIfAbsent(this.indexName, new CopyOnWriteArrayList<ScheduledFuture>());
    scheduledFutures.get(this.indexName)
            .add(monitorPool.scheduleAtFixedRate(new Monitor(synonymFile), interval, interval, TimeUnit.SECONDS));
}
 
Example #24
Source File: AnalysisModule.java    From crate with Apache License 2.0 4 votes vote down vote up
private NamedRegistry<AnalysisProvider<TokenizerFactory>> setupTokenizers(List<AnalysisPlugin> plugins) {
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = new NamedRegistry<>("tokenizer");
    tokenizers.register("standard", StandardTokenizerFactory::new);
    tokenizers.extractAndRegister(plugins, AnalysisPlugin::getTokenizers);
    return tokenizers;
}
 
Example #25
Source File: AnalysisIkPlugin.java    From Elasticsearch-Tutorial-zh-CN with GNU General Public License v3.0 3 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();


    extra.put("ik_smart", IkTokenizerFactory::getIkSmartTokenizerFactory);
    extra.put("ik_max_word", IkTokenizerFactory::getIkTokenizerFactory);

    return extra;
}
 
Example #26
Source File: AnalysisPlugin.java    From crate with Apache License 2.0 2 votes vote down vote up
/**
 * Override to add additional {@link Tokenizer}s. See {@link #requiresAnalysisSettings(AnalysisProvider)}
 * how to on get the configuration from the index.
 */
default Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return emptyMap();
}