org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory Java Examples
The following examples show how to use
org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AnnotationIndicesAnalysis.java From elasticsearch-analysis-annotation with Apache License 2.0 | 6 votes |
@Inject public AnnotationIndicesAnalysis(Settings settings, IndicesAnalysisService indicesAnalysisService) { super(settings); indicesAnalysisService.analyzerProviderFactories().put( "default", new PreBuiltAnalyzerProviderFactory("default", AnalyzerScope.INDICES, new AnnotationAnalyzer( Lucene.ANALYZER_VERSION))); indicesAnalysisService.tokenFilterFactories().put("annotation_filter", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { return "annotation_filter"; } @Override public TokenStream create(TokenStream tokenStream) { return new InlineAnnotationFilter(tokenStream); } })); }
Example #2
Source File: AnalysisModule.java From crate with Apache License 2.0 | 6 votes |
public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) throws IOException { NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = setupCharFilters(plugins); NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = setupHunspellDictionaries(plugins); HunspellService hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry()); NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService); NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins); NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins); NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers(); Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins); Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins); Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins); Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers = setupPreBuiltAnalyzerProviderFactories(plugins); analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(), analyzers.getRegistry(), normalizers.getRegistry(), preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers); }
Example #3
Source File: BosonNLPIndicesAnalysis.java From elasticsearch-analysis-bosonnlp with Apache License 2.0 | 5 votes |
@Inject public BosonNLPIndicesAnalysis(final Settings settings, IndicesAnalysisService indicesAnalysisService) { super(settings); // Get all the arguments from settings this.TAG_URL = settings.get("API_URL", "").toString(); this.BOSONNLP_API_TOKEN = settings.get("API_TOKEN", "").toString(); this.spaceMode = Integer.parseInt(settings.get("space_mode", "0")); this.oovLevel = Integer.parseInt(settings.get("oov_level", "3")); this.t2s = Integer.parseInt(settings.get("t2s", "0")); this.specialCharConv = Integer.parseInt(settings.get("spechial_char_conv", "0")); // Register the bosonnlp type analyzer indicesAnalysisService.analyzerProviderFactories().put("bosonnlp", new PreBuiltAnalyzerProviderFactory("bosonnlp", AnalyzerScope.GLOBAL, new BosonNLPAnalyzer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv))); // Register the bosonnlp type tokenizer indicesAnalysisService.tokenizerFactories().put("bosonnlp", new PreBuiltTokenizerFactoryFactory(new TokenizerFactory(){ @Override public String name() { return "bosonnlp"; } @Override public Tokenizer create() { BosonNLPTokenizer BToken = null; try { BToken = new BosonNLPTokenizer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv); } catch (JSONException | IOException | UnirestException e) { e.printStackTrace(); } return BToken; } })); }
Example #4
Source File: AnalysisModule.java From crate with Apache License 2.0 | 5 votes |
static Map<String, PreBuiltAnalyzerProviderFactory> setupPreBuiltAnalyzerProviderFactories(List<AnalysisPlugin> plugins) { NamedRegistry<PreBuiltAnalyzerProviderFactory> preConfiguredCharFilters = new NamedRegistry<>("pre-built analyzer"); for (AnalysisPlugin plugin : plugins) { for (PreBuiltAnalyzerProviderFactory factory : plugin.getPreBuiltAnalyzerProviderFactories()) { preConfiguredCharFilters.register(factory.getName(), factory); } } return unmodifiableMap(preConfiguredCharFilters.getRegistry()); }
Example #5
Source File: AnalysisPlugin.java From crate with Apache License 2.0 | 4 votes |
/** * Override to add additional pre-configured {@link Analyzer}s. */ default List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() { return emptyList(); }
Example #6
Source File: CommonAnalysisPlugin.java From crate with Apache License 2.0 | 4 votes |
@Override public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() { List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>(); analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH, () -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET))); analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH, () -> new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true, CharArraySet.EMPTY_SET))); analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE, () -> new SnowballAnalyzer("English", EnglishAnalyzer.ENGLISH_STOP_WORDS_SET))); // Language analyzers: analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, ArabicAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, ArmenianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, BasqueAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, BengaliAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, BrazilianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, BulgarianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, CatalanAnalyzer::new)); // chinese analyzer: only for old indices, best effort analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.ONE, StandardAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, CJKAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, CzechAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, DanishAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, DutchAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, EnglishAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, FinnishAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, FrenchAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, GalicianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, GermanAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("greek", CachingStrategy.LUCENE, GreekAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("hindi", CachingStrategy.LUCENE, HindiAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("hungarian", CachingStrategy.LUCENE, HungarianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("indonesian", CachingStrategy.LUCENE, IndonesianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("irish", CachingStrategy.LUCENE, IrishAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("italian", CachingStrategy.LUCENE, ItalianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("latvian", CachingStrategy.LUCENE, LatvianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("lithuanian", CachingStrategy.LUCENE, LithuanianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("norwegian", CachingStrategy.LUCENE, NorwegianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("persian", CachingStrategy.LUCENE, PersianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("portuguese", CachingStrategy.LUCENE, PortugueseAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("romanian", CachingStrategy.LUCENE, RomanianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("russian", CachingStrategy.LUCENE, RussianAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("sorani", CachingStrategy.LUCENE, SoraniAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("spanish", CachingStrategy.LUCENE, SpanishAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("swedish", CachingStrategy.LUCENE, SwedishAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("turkish", CachingStrategy.LUCENE, TurkishAnalyzer::new)); analyzers.add(new PreBuiltAnalyzerProviderFactory("thai", CachingStrategy.LUCENE, ThaiAnalyzer::new)); return analyzers; }