Java Code Examples for org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory

The following examples show how to use org.elasticsearch.index.analysis.PreBuiltAnalyzerProviderFactory. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Inject
public AnnotationIndicesAnalysis(Settings settings,
		IndicesAnalysisService indicesAnalysisService) {
	super(settings);
	indicesAnalysisService.analyzerProviderFactories().put(
			"default",
			new PreBuiltAnalyzerProviderFactory("default",
					AnalyzerScope.INDICES, new AnnotationAnalyzer(
							Lucene.ANALYZER_VERSION)));

	indicesAnalysisService.tokenFilterFactories().put("annotation_filter",
			new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() {
				@Override
				public String name() {
					return "annotation_filter";
				}

				@Override
				public TokenStream create(TokenStream tokenStream) {
					return new InlineAnnotationFilter(tokenStream);
				}
			}));
}
 
Example 2
Source Project: crate   Source File: AnalysisModule.java    License: Apache License 2.0 6 votes vote down vote up
public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) throws IOException {
    NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = setupCharFilters(plugins);
    NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = setupHunspellDictionaries(plugins);
    HunspellService hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry());
    NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
    NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
    NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers();

    Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
    Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
    Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);
    Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers = setupPreBuiltAnalyzerProviderFactories(plugins);

    analysisRegistry = new AnalysisRegistry(environment,
            charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
            analyzers.getRegistry(), normalizers.getRegistry(),
            preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
 
Example 3
@Inject    
public BosonNLPIndicesAnalysis(final Settings settings, IndicesAnalysisService indicesAnalysisService) {
    super(settings);
    // Get all the arguments from settings
    this.TAG_URL = settings.get("API_URL", "").toString();
    this.BOSONNLP_API_TOKEN = settings.get("API_TOKEN", "").toString();
    this.spaceMode = Integer.parseInt(settings.get("space_mode", "0"));
    this.oovLevel = Integer.parseInt(settings.get("oov_level", "3"));
    this.t2s = Integer.parseInt(settings.get("t2s", "0"));
    this.specialCharConv = Integer.parseInt(settings.get("spechial_char_conv", "0"));
    
    // Register the bosonnlp type analyzer
    indicesAnalysisService.analyzerProviderFactories().put("bosonnlp", 
            new PreBuiltAnalyzerProviderFactory("bosonnlp", AnalyzerScope.GLOBAL, 
                    new BosonNLPAnalyzer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv)));
    
    // Register the bosonnlp type tokenizer
    indicesAnalysisService.tokenizerFactories().put("bosonnlp", 
            new PreBuiltTokenizerFactoryFactory(new TokenizerFactory(){

                @Override
                public String name() {
                    return "bosonnlp";
                }

                @Override
                public Tokenizer create() {
                    BosonNLPTokenizer BToken = null;
                    try {
                        BToken = new BosonNLPTokenizer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv);
                    } catch (JSONException | IOException | UnirestException e) {

                        e.printStackTrace();
                    }
                    return BToken;
                }
    			
            }));
		
}
 
Example 4
Source Project: crate   Source File: AnalysisModule.java    License: Apache License 2.0 5 votes vote down vote up
static Map<String, PreBuiltAnalyzerProviderFactory> setupPreBuiltAnalyzerProviderFactories(List<AnalysisPlugin> plugins) {
    NamedRegistry<PreBuiltAnalyzerProviderFactory> preConfiguredCharFilters = new NamedRegistry<>("pre-built analyzer");
    for (AnalysisPlugin plugin : plugins) {
        for (PreBuiltAnalyzerProviderFactory factory : plugin.getPreBuiltAnalyzerProviderFactories()) {
            preConfiguredCharFilters.register(factory.getName(), factory);
        }
    }
    return unmodifiableMap(preConfiguredCharFilters.getRegistry());
}
 
Example 5
Source Project: crate   Source File: AnalysisPlugin.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Override to add additional pre-configured {@link Analyzer}s.
 */
default List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
    return emptyList();
}
 
Example 6
Source Project: crate   Source File: CommonAnalysisPlugin.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public List<PreBuiltAnalyzerProviderFactory> getPreBuiltAnalyzerProviderFactories() {
    List<PreBuiltAnalyzerProviderFactory> analyzers = new ArrayList<>();
    analyzers.add(new PreBuiltAnalyzerProviderFactory("standard_html_strip", CachingStrategy.ELASTICSEARCH,
        () -> new StandardHtmlStripAnalyzer(CharArraySet.EMPTY_SET)));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("pattern", CachingStrategy.ELASTICSEARCH,
        () -> new PatternAnalyzer(Regex.compile("\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/, null), true,
        CharArraySet.EMPTY_SET)));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("snowball", CachingStrategy.LUCENE,
        () -> new SnowballAnalyzer("English", EnglishAnalyzer.ENGLISH_STOP_WORDS_SET)));

    // Language analyzers:
    analyzers.add(new PreBuiltAnalyzerProviderFactory("arabic", CachingStrategy.LUCENE, ArabicAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("armenian", CachingStrategy.LUCENE, ArmenianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("basque", CachingStrategy.LUCENE, BasqueAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("bengali", CachingStrategy.LUCENE, BengaliAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("brazilian", CachingStrategy.LUCENE, BrazilianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("bulgarian", CachingStrategy.LUCENE, BulgarianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("catalan", CachingStrategy.LUCENE, CatalanAnalyzer::new));
    // chinese analyzer: only for old indices, best effort
    analyzers.add(new PreBuiltAnalyzerProviderFactory("chinese", CachingStrategy.ONE, StandardAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("cjk", CachingStrategy.LUCENE, CJKAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("czech", CachingStrategy.LUCENE, CzechAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("danish", CachingStrategy.LUCENE, DanishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("dutch", CachingStrategy.LUCENE, DutchAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("english", CachingStrategy.LUCENE, EnglishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("finnish", CachingStrategy.LUCENE, FinnishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("french", CachingStrategy.LUCENE, FrenchAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("galician", CachingStrategy.LUCENE, GalicianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("german", CachingStrategy.LUCENE, GermanAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("greek", CachingStrategy.LUCENE, GreekAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("hindi", CachingStrategy.LUCENE, HindiAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("hungarian", CachingStrategy.LUCENE, HungarianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("indonesian", CachingStrategy.LUCENE, IndonesianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("irish", CachingStrategy.LUCENE, IrishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("italian", CachingStrategy.LUCENE, ItalianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("latvian", CachingStrategy.LUCENE, LatvianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("lithuanian", CachingStrategy.LUCENE, LithuanianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("norwegian", CachingStrategy.LUCENE, NorwegianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("persian", CachingStrategy.LUCENE, PersianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("portuguese", CachingStrategy.LUCENE, PortugueseAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("romanian", CachingStrategy.LUCENE, RomanianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("russian", CachingStrategy.LUCENE, RussianAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("sorani", CachingStrategy.LUCENE, SoraniAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("spanish", CachingStrategy.LUCENE, SpanishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("swedish", CachingStrategy.LUCENE, SwedishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("turkish", CachingStrategy.LUCENE, TurkishAnalyzer::new));
    analyzers.add(new PreBuiltAnalyzerProviderFactory("thai", CachingStrategy.LUCENE, ThaiAnalyzer::new));
    return analyzers;
}