org.elasticsearch.index.analysis.Analysis Java Examples

The following examples show how to use org.elasticsearch.index.analysis.Analysis. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DutchAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
DutchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new DutchAnalyzer(
        Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #2
Source File: GermanAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
GermanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new GermanAnalyzer(
        Analysis.parseStopWords(env, settings, GermanAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #3
Source File: EnglishAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
EnglishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new EnglishAnalyzer(
        Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #4
Source File: BrazilianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
BrazilianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new BrazilianAnalyzer(
        Analysis.parseStopWords(env, settings, BrazilianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #5
Source File: AbstractCompoundWordTokenFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
protected AbstractCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);

    minWordSize = settings.getAsInt("min_word_size", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
    minSubwordSize = settings.getAsInt("min_subword_size", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
    maxSubwordSize = settings.getAsInt("max_subword_size", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
    onlyLongestMatch = settings.getAsBoolean("only_longest_match", false);
    wordList = Analysis.getWordSet(env, settings, "word_list");
    if (wordList == null) {
        throw new IllegalArgumentException("word_list must be provided for [" + name + "], either as a path to a file, or directly");
    }
}
 
Example #6
Source File: ItalianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
ItalianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new ItalianAnalyzer(
        Analysis.parseStopWords(env, settings, ItalianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #7
Source File: BengaliAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
BengaliAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new BengaliAnalyzer(
        Analysis.parseStopWords(env, settings, BengaliAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #8
Source File: WordDelimiterGraphTokenFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
public WordDelimiterGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);

    // Sample Format for the type table:
    // $ => DIGIT
    // % => DIGIT
    // . => DIGIT
    // \u002C => DIGIT
    // \u200D => ALPHANUM
    List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
    if (charTypeTableValues == null) {
        this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
    } else {
        this.charTypeTable = parseTypes(charTypeTableValues);
    }
    int flags = 0;
    // If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
    flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
    // If set, causes number subwords to be generated: "500-42" => "500" "42"
    flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
    // 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
    flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
    // If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
    flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
    // If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
    flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
    // 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
    flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
    // If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
    flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
    // 1, causes "j2se" to be three tokens; "j" "2" "se"
    flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
    // If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
    flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
    // If not null is the set of tokens to protect from being delimited
    Set<?> protectedWords = Analysis.getWordSet(env, settings, "protected_words");
    this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
    this.flags = flags;
    this.adjustOffsets = settings.getAsBoolean("adjust_offsets", true);
}
 
Example #9
Source File: FrenchAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
FrenchAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new FrenchAnalyzer(
        Analysis.parseStopWords(env, settings, FrenchAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #10
Source File: HungarianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
HungarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new HungarianAnalyzer(
        Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #11
Source File: CzechAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
CzechAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new CzechAnalyzer(
        Analysis.parseStopWords(env, settings, CzechAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #12
Source File: NorwegianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
NorwegianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new NorwegianAnalyzer(
        Analysis.parseStopWords(env, settings, NorwegianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #13
Source File: FinnishAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
FinnishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new FinnishAnalyzer(
        Analysis.parseStopWords(env, settings, FinnishAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #14
Source File: AbstractCompoundWordTokenFilterFactory.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public AbstractCompoundWordTokenFilterFactory(Index index, Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettings, name, settings);

    minWordSize = settings.getAsInt("min_word_size", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
    minSubwordSize = settings.getAsInt("min_subword_size", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
    maxSubwordSize = settings.getAsInt("max_subword_size", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
    onlyLongestMatch = settings.getAsBoolean("only_longest_match", false);
    wordList = Analysis.getWordSet(env, settings, "word_list");
    if (wordList == null) {
        throw new IllegalArgumentException("word_list must be provided for [" + name + "], either as a path to a file, or directly");
    }
}
 
Example #15
Source File: RussianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
RussianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new RussianAnalyzer(
        Analysis.parseStopWords(env, settings, RussianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #16
Source File: BulgarianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
BulgarianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new BulgarianAnalyzer(
        Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #17
Source File: TurkishAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
TurkishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new TurkishAnalyzer(
        Analysis.parseStopWords(env, settings, TurkishAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #18
Source File: CatalanAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
CatalanAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new CatalanAnalyzer(
        Analysis.parseStopWords(env, settings, CatalanAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #19
Source File: SoraniAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
SoraniAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new SoraniAnalyzer(
        Analysis.parseStopWords(env, settings, SoraniAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #20
Source File: HindiAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
HindiAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new HindiAnalyzer(
        Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #21
Source File: LithuanianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
LithuanianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new LithuanianAnalyzer(
        Analysis.parseStopWords(env, settings, LithuanianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #22
Source File: KeepWordFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
KeepWordFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);

    final List<String> arrayKeepWords = settings.getAsList(KEEP_WORDS_KEY, null);
    final String keepWordsPath = settings.get(KEEP_WORDS_PATH_KEY, null);
    if ((arrayKeepWords == null && keepWordsPath == null) || (arrayKeepWords != null && keepWordsPath != null)) {
        // we don't allow both or none
        throw new IllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
                + KEEP_WORDS_PATH_KEY + "` to be configured");
    }
    if (settings.get(ENABLE_POS_INC_KEY) != null) {
        throw new IllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain");
    }
    this.keepWords = Analysis.getWordSet(env, settings, KEEP_WORDS_KEY);
}
 
Example #23
Source File: IrishAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
IrishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new IrishAnalyzer(
        Analysis.parseStopWords(env, settings, IrishAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #24
Source File: LatvianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
LatvianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new LatvianAnalyzer(
        Analysis.parseStopWords(env, settings, LatvianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #25
Source File: FingerprintAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
FingerprintAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);

    char separator = parseSeparator(settings);
    int maxOutputSize = settings.getAsInt(MAX_OUTPUT_SIZE.getPreferredName(),DEFAULT_MAX_OUTPUT_SIZE);
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, DEFAULT_STOP_WORDS);

    this.analyzer = new FingerprintAnalyzer(stopWords, separator, maxOutputSize);
}
 
Example #26
Source File: MappingCharFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
MappingCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name);

    List<String> rules = Analysis.getWordList(env, settings, "mappings");
    if (rules == null) {
        throw new IllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
    }

    NormalizeCharMap.Builder normMapBuilder = new NormalizeCharMap.Builder();
    parseRules(rules, normMapBuilder);
    normMap = normMapBuilder.build();
}
 
Example #27
Source File: IndonesianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
IndonesianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new IndonesianAnalyzer(
        Analysis.parseStopWords(env, settings, IndonesianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #28
Source File: ArabicAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
ArabicAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    arabicAnalyzer = new ArabicAnalyzer(
        Analysis.parseStopWords(env, settings, ArabicAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    arabicAnalyzer.setVersion(version);
}
 
Example #29
Source File: SnowballAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
SnowballAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);

    String language = settings.get("language", settings.get("name", "English"));
    CharArraySet defaultStopwords = DEFAULT_LANGUAGE_STOPWORDS.getOrDefault(language, CharArraySet.EMPTY_SET);
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);

    analyzer = new SnowballAnalyzer(language, stopWords);
    analyzer.setVersion(version);
}
 
Example #30
Source File: SwedishAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
SwedishAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new SwedishAnalyzer(
        Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}