Java Code Examples for org.elasticsearch.index.analysis.TokenizerFactory

The following examples show how to use org.elasticsearch.index.analysis.TokenizerFactory. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: es-ik   Source File: IkESPluginTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testDefaultsIcuAnalysis() {
    Index index = new Index("test");

    Settings settings = ImmutableSettings.settingsBuilder()
            .put("path.home", "none")
            .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
            .build();

    Injector parentInjector = new ModulesBuilder().add(new SettingsModule(ImmutableSettings.EMPTY), new EnvironmentModule(new Environment(settings)), new IndicesAnalysisModule()).createInjector();
    Injector injector = new ModulesBuilder().add(
            new IndexSettingsModule(index, settings),
            new IndexNameModule(index),
            new AnalysisModule(ImmutableSettings.EMPTY, parentInjector.getInstance(IndicesAnalysisService.class)).addProcessor(new IKAnalysisBinderProcessor()))
            .createChildInjector(parentInjector);

    AnalysisService analysisService = injector.getInstance(AnalysisService.class);

    TokenizerFactory tokenizerFactory = analysisService.tokenizer("ik_tokenizer");
    MatcherAssert.assertThat(tokenizerFactory, instanceOf(IKTokenizerFactory.class));


}
 
Example 2
public void testDefaultsIcuAnalysis() throws IOException {

        TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY,
                new BundlePlugin(Settings.EMPTY));

        CharFilterFactory charFilterFactory = analysis.charFilter.get("icu_normalizer");
        assertThat(charFilterFactory, instanceOf(IcuNormalizerCharFilterFactory.class));

        TokenizerFactory tf = analysis.tokenizer.get("icu_tokenizer");
        assertThat(tf, instanceOf(IcuTokenizerFactory.class));

        TokenFilterFactory filterFactory = analysis.tokenFilter.get("icu_normalizer");
        assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class));

        filterFactory = analysis.tokenFilter.get("icu_folding");
        assertThat(filterFactory, instanceOf(IcuFoldingTokenFilterFactory.class));

        filterFactory = analysis.tokenFilter.get("icu_transform");
        assertThat(filterFactory, instanceOf(IcuTransformTokenFilterFactory.class));

        Analyzer analyzer = analysis.indexAnalyzers.get( "icu_collation");
        assertThat(analyzer, instanceOf(NamedAnalyzer.class));
    }
 
Example 3
Source Project: crate   Source File: AnalysisModule.java    License: Apache License 2.0 6 votes vote down vote up
public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) throws IOException {
    NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = setupCharFilters(plugins);
    NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = setupHunspellDictionaries(plugins);
    HunspellService hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry());
    NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers();

    Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
    Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
    Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
    Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers = setupPreBuiltAnalyzerProviderFactories(plugins);

    analysisRegistry = new AnalysisRegistry(environment,
            charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
            analyzers.getRegistry(), normalizers.getRegistry(),
            preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
 
Example 4
Source Project: crate   Source File: CommonAnalysisPlugin.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisProvider<TokenizerFactory>> tokenizers = new TreeMap<>();
    tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new);
    tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new);
    tokenizers.put("thai", ThaiTokenizerFactory::new);
    tokenizers.put("ngram", NGramTokenizerFactory::new);
    tokenizers.put("edge_ngram", EdgeNGramTokenizerFactory::new);
    tokenizers.put("char_group", CharGroupTokenizerFactory::new);
    tokenizers.put("classic", ClassicTokenizerFactory::new);
    tokenizers.put("letter", LetterTokenizerFactory::new);
    tokenizers.put("lowercase", LowerCaseTokenizerFactory::new);
    tokenizers.put("path_hierarchy", PathHierarchyTokenizerFactory::new);
    tokenizers.put("PathHierarchy", PathHierarchyTokenizerFactory::new);
    tokenizers.put("pattern", PatternTokenizerFactory::new);
    tokenizers.put("uax_url_email", UAX29URLEmailTokenizerFactory::new);
    tokenizers.put("whitespace", WhitespaceTokenizerFactory::new);
    tokenizers.put("keyword", KeywordTokenizerFactory::new);
    return tokenizers;
}
 
Example 5
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();

    extra.put("hlseg_search", HLSegTokenizerFactory::getHLSegSearchTokenizerFactory);
   
    return extra;
}
 
Example 6
Source Project: elasticsearch-jieba-plugin   Source File: AnalysisJiebaPlugin.java    License: MIT License 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
  Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();

  extra.put("jieba_search", JiebaTokenizerFactory::getJiebaSearchTokenizerFactory);
  extra.put("jieba_index", JiebaTokenizerFactory::getJiebaIndexTokenizerFactory);

  return extra;
}
 
Example 7
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();

    extra.put("hanlp", HanLPTokenizerFactory::getHanLPTokenizerFactory);
    extra.put("hanlp_standard", HanLPTokenizerFactory::getHanLPStandardTokenizerFactory);
    extra.put("hanlp_index", HanLPTokenizerFactory::getHanLPIndexTokenizerFactory);
    extra.put("hanlp_nlp", HanLPTokenizerFactory::getHanLPNLPTokenizerFactory);
    extra.put("hanlp_n_short", HanLPTokenizerFactory::getHanLPNShortTokenizerFactory);
    extra.put("hanlp_dijkstra", HanLPTokenizerFactory::getHanLPDijkstraTokenizerFactory);
    extra.put("hanlp_crf", HanLPTokenizerFactory::getHanLPCRFTokenizerFactory);
    extra.put("hanlp_speed", HanLPTokenizerFactory::getHanLPSpeedTokenizerFactory);

    return extra;
}
 
Example 8
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    HashMap<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> tokenizers = new HashMap<>();
    tokenizers.put("hanlp", HanLPTokenizerFactory::createStandard);
    tokenizers.put("hanlp-standard", HanLPTokenizerFactory::createStandard);
    tokenizers.put("hanlp-nlp", HanLPTokenizerFactory::createNLP);
    tokenizers.put("hanlp-index", HanLPIndexAnalyzerFactory::new);
    tokenizers.put("hanlp-nshort", HanLPTokenizerFactory::createNShort);
    tokenizers.put("hanlp-shortest", HanLPTokenizerFactory::createShortest);
    tokenizers.put("hanlp-crf", HanLPTokenizerFactory::createCRF);
    tokenizers.put("hanlp-speed", HanLPTokenizerFactory::createSpeed);
    return tokenizers;
}
 
Example 9
Source Project: mynlp   Source File: MynlpPlugin.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>(16);

    extra.put("mynlp", MynlpTokenizerFactory::new);
    extra.put("mynlp-core", MynlpTokenizerFactory::new);
    if (enableCws) {
        extra.put("mynlp-cws", MynlpTokenizerFactory::new);
    }

    return extra;
}
 
Example 10
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> providerMap
            = new HashMap<String, AnalysisModule.AnalysisProvider<TokenizerFactory>>();

    providerMap.put("lc_index", LcPinyinTokenizerFactory::getLcIndexTokenizerFactory);
    providerMap.put("lc_search", LcPinyinTokenizerFactory::getLcSmartPinyinTokenizerFactory);

    return providerMap;
}
 
Example 11
@Inject    
public BosonNLPIndicesAnalysis(final Settings settings, IndicesAnalysisService indicesAnalysisService) {
    super(settings);
    // Get all the arguments from settings
    this.TAG_URL = settings.get("API_URL", "").toString();
    this.BOSONNLP_API_TOKEN = settings.get("API_TOKEN", "").toString();
    this.spaceMode = Integer.parseInt(settings.get("space_mode", "0"));
    this.oovLevel = Integer.parseInt(settings.get("oov_level", "3"));
    this.t2s = Integer.parseInt(settings.get("t2s", "0"));
    this.specialCharConv = Integer.parseInt(settings.get("spechial_char_conv", "0"));
    
    // Register the bosonnlp type analyzer
    indicesAnalysisService.analyzerProviderFactories().put("bosonnlp", 
            new PreBuiltAnalyzerProviderFactory("bosonnlp", AnalyzerScope.GLOBAL, 
                    new BosonNLPAnalyzer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv)));
    
    // Register the bosonnlp type tokenizer
    indicesAnalysisService.tokenizerFactories().put("bosonnlp", 
            new PreBuiltTokenizerFactoryFactory(new TokenizerFactory(){

                @Override
                public String name() {
                    return "bosonnlp";
                }

                @Override
                public Tokenizer create() {
                    BosonNLPTokenizer BToken = null;
                    try {
                        BToken = new BosonNLPTokenizer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv);
                    } catch (JSONException | IOException | UnirestException e) {

                        e.printStackTrace();
                    }
                    return BToken;
                }
    			
            }));
		
}
 
Example 12
Source Project: word   Source File: ChineseWordPlugin.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();
    extra.put("word_tokenizer", ChineseWordTokenizerFactory::new);
    extra.put("word_sentence", ChineseWordTokenizerFactory::new);
    return extra;
}
 
Example 13
@Override
public void build(final Map<String, TokenizerFactory> tokenizers,
                  final Map<String, CharFilterFactory> charFilters,
                  final Map<String, TokenFilterFactory> tokenFilters) {
    List<CharFilterFactory> myCharFilters = new ArrayList<>();
    List<String> charFilterNames = analyzerSettings.getAsList("char_filter");
    for (String charFilterName : charFilterNames) {
        CharFilterFactory charFilter = charFilters.get(charFilterName);
        if (charFilter == null) {
            throw new IllegalArgumentException("hyphen analyzer [" + name()
                    + "] failed to find char_filter under name [" + charFilterName + "]");
        }
        myCharFilters.add(charFilter);
    }
    List<TokenFilterFactory> myTokenFilters = new ArrayList<>();
    myTokenFilters.add(tokenFilterFactory);
    List<String> tokenFilterNames = analyzerSettings.getAsList("filter");
    for (String tokenFilterName : tokenFilterNames) {
        TokenFilterFactory tokenFilter = tokenFilters.get(tokenFilterName);
        if (tokenFilter == null) {
            throw new IllegalArgumentException("hyphen analyzer [" + name()
                    + "] failed to find filter under name [" + tokenFilterName + "]");
        }
        myTokenFilters.add(tokenFilter);
    }
    int positionOffsetGap = analyzerSettings.getAsInt("position_offset_gap", 0);
    int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);
    this.customAnalyzer = new CustomAnalyzer(name(), tokenizerFactory,
            myCharFilters.toArray(new CharFilterFactory[myCharFilters.size()]),
            myTokenFilters.toArray(new TokenFilterFactory[myTokenFilters.size()]),
            positionOffsetGap,
            offsetGap
    );
}
 
Example 14
@Override
public void build(final Map<String, TokenizerFactory> tokenizers,
                  final Map<String, CharFilterFactory> charFilters,
                  final Map<String, TokenFilterFactory> tokenFilters) {
    List<CharFilterFactory> myCharFilters = new ArrayList<>();
    List<String> charFilterNames = analyzerSettings.getAsList("char_filter");
    for (String charFilterName : charFilterNames) {
        CharFilterFactory charFilter = charFilters.get(charFilterName);
        if (charFilter == null) {
            throw new IllegalArgumentException("Sortform Analyzer [" + name() +
                    "] failed to find char_filter under name [" + charFilterName + "]");
        }
        myCharFilters.add(charFilter);
    }
    List<TokenFilterFactory> myTokenFilters = new ArrayList<>();
    List<String> tokenFilterNames = analyzerSettings.getAsList("filter");
    for (String tokenFilterName : tokenFilterNames) {
        TokenFilterFactory tokenFilter = tokenFilters.get(tokenFilterName);
        if (tokenFilter == null) {
            throw new IllegalArgumentException("Sortform Analyzer [" + name() +
                    "] failed to find filter under name [" + tokenFilterName + "]");
        }
        myTokenFilters.add(tokenFilter);
    }
    int positionOffsetGap = analyzerSettings.getAsInt("position_offset_gap", 0);
    int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);
    this.customAnalyzer = new CustomAnalyzer(name(), tokenizerFactory,
            myCharFilters.toArray(new CharFilterFactory[myCharFilters.size()]),
            myTokenFilters.toArray(new TokenFilterFactory[myTokenFilters.size()]),
            positionOffsetGap,
            offsetGap
    );
}
 
Example 15
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new LinkedHashMap<>();
    if (settings.getAsBoolean("plugins.xbib.icu.enabled", true)) {
        extra.put("icu_collation_tokenizer", IcuCollationTokenizerFactory::new);
        extra.put("icu_tokenizer", IcuTokenizerFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.hyphen.enabled", true)) {
        extra.put("hyphen", HyphenTokenizerFactory::new);
    }
    if (settings.getAsBoolean("plugins.xbib.naturalsort.enabled", true)) {
        extra.put("naturalsort", NaturalSortKeyTokenizerFactory::new);
    }
    return extra;
}
 
Example 16
Source Project: crate   Source File: ESTestCase.java    License: Apache License 2.0 5 votes vote down vote up
public TestAnalysis(IndexAnalyzers indexAnalyzers,
                    Map<String, TokenFilterFactory> tokenFilter,
                    Map<String, TokenizerFactory> tokenizer,
                    Map<String, CharFilterFactory> charFilter) {
    this.indexAnalyzers = indexAnalyzers;
    this.tokenFilter = tokenFilter;
    this.tokenizer = tokenizer;
    this.charFilter = charFilter;
}
 
Example 17
Source Project: crate   Source File: MockKeywordPlugin.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return singletonMap("keyword", (indexSettings, environment, name, settings) -> {
        class Factory implements TokenizerFactory {

            @Override
            public Tokenizer create() {
                return new MockTokenizer(MockTokenizer.KEYWORD, false);
            }
        }
        return new Factory();
    });
}
 
Example 18
public DynamicSynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
                                        String name, Settings settings) throws IOException {

    //加载配置
    super(indexSettings, name, settings);
    this.indexName = indexSettings.getIndex().getName();
    this.interval = settings.getAsInt("interval", 60);
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    this.expand = settings.getAsBoolean("expand", true);
    this.format = settings.get("format", "");
    this.location = settings.get("synonyms_path");

    logger.info("indexName:{} synonyms_path:{} interval:{} ignore_case:{} expand:{} format:{}",
            indexName, location, interval, ignoreCase, expand, format);

    //属性检查
    if (this.location == null) {
        throw new IllegalArgumentException(
                "dynamic synonym requires `synonyms_path` to be configured");
    }

    String tokenizerName = settings.get("tokenizer", "whitespace");
    AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory =
            analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
    if (tokenizerFactoryFactory == null) {
        throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
    }
    final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.get(indexSettings, env, tokenizerName,
            AnalysisRegistry.getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));
    Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer() : tokenizerFactory.create();
            TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };


    //根据location前缀初始化同义词更新策略
    SynonymFile synonymFile;
    if (location.startsWith("http://")) {
        synonymFile = new RemoteSynonymFile(env, analyzer, expand, format, location);
    } else {
        synonymFile = new LocalSynonymFile(env, analyzer, expand, format, location);
    }
    synonymMap = synonymFile.reloadSynonymMap();

    //加入监控队列,定时load
    scheduledFutures.putIfAbsent(this.indexName, new CopyOnWriteArrayList<ScheduledFuture>());
    scheduledFutures.get(this.indexName)
            .add(monitorPool.scheduleAtFixedRate(new Monitor(synonymFile), interval, interval, TimeUnit.SECONDS));
}
 
Example 19
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return singletonMap("url", URLTokenizerFactory::new);
}
 
Example 20
Source Project: KOMORAN   Source File: KomoranPlugin.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return singletonMap("komoran-tokenizer", KomoranTokenizerFactory::new);
}
 
Example 21
@Override
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return singletonMap("ngram_synonym", NGramSynonymTokenizerFactory::new);
}
 
Example 22
public SynonymTokenFilterFactory(final IndexSettings indexSettings, final Environment environment, final String name, final Settings settings,
        final AnalysisRegistry analysisRegistry) throws IOException {
    super(indexSettings, name, settings);

    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    final boolean expand = settings.getAsBoolean("expand", true);

    final String tokenizerName = settings.get("tokenizer", "whitespace");

    AnalysisModule.AnalysisProvider<TokenizerFactory> tokenizerFactoryFactory = null;
    if (analysisRegistry != null) {
        tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizerName, indexSettings);
        if (tokenizerFactoryFactory == null) {
            throw new IllegalArgumentException("failed to find tokenizer [" + tokenizerName + "] for synonym token filter");
        }
    }

    final TokenizerFactory tokenizerFactory = tokenizerFactoryFactory == null ? null
            : tokenizerFactoryFactory.get(indexSettings, environment, tokenizerName, AnalysisRegistry
                    .getSettingsFromIndexSettings(indexSettings, AnalysisRegistry.INDEX_ANALYSIS_TOKENIZER + "." + tokenizerName));

    final Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(final String fieldName) {
            final Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer() : tokenizerFactory.create();
            final TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };

    synonymLoader = new SynonymLoader(environment, settings, expand, analyzer);
    if (synonymLoader.getSynonymMap() == null) {
        if (settings.getAsList("synonyms", null) != null) {
            logger.warn("synonyms values are empty.");
        } else if (settings.get("synonyms_path") != null) {
            logger.warn("synonyms_path[{}] is empty.", settings.get("synonyms_path"));
        } else {
            throw new IllegalArgumentException("synonym requires either `synonyms` or `synonyms_path` to be configured");
        }
    }
}
 
Example 23
public StandardnumberAnalyzer(TokenizerFactory tokenizerFactory,
                              StandardnumberTokenFilterFactory stdnumTokenFilterFactory) {
    this.tokenizerFactory = tokenizerFactory;
    this.stdnumTokenFilterFactory = stdnumTokenFilterFactory;
}
 
Example 24
Source Project: crate   Source File: AnalysisModule.java    License: Apache License 2.0 4 votes vote down vote up
private NamedRegistry<AnalysisProvider<TokenizerFactory>> setupTokenizers(List<AnalysisPlugin> plugins) {
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = new NamedRegistry<>("tokenizer");
    tokenizers.register("standard", StandardTokenizerFactory::new);
    tokenizers.extractAndRegister(plugins, AnalysisPlugin::getTokenizers);
    return tokenizers;
}
 
Example 25
@Override
public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() {
    Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> extra = new HashMap<>();


    extra.put("ik_smart", IkTokenizerFactory::getIkSmartTokenizerFactory);
    extra.put("ik_max_word", IkTokenizerFactory::getIkTokenizerFactory);

    return extra;
}
 
Example 26
Source Project: crate   Source File: AnalysisPlugin.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Override to add additional {@link Tokenizer}s. See {@link #requiresAnalysisSettings(AnalysisProvider)}
 * how to on get the configuration from the index.
 */
default Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
    return emptyMap();
}