org.elasticsearch.index.settings.IndexSettingsService Java Examples

The following examples show how to use org.elasticsearch.index.settings.IndexSettingsService. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: PathHierarchyTokenizerFactory.java From Elasticsearch with Apache License 2.0

7 votes

@Inject
public PathHierarchyTokenizerFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    bufferSize = settings.getAsInt("buffer_size", 1024);
    String delimiter = settings.get("delimiter");
    if (delimiter == null) {
        this.delimiter = PathHierarchyTokenizer.DEFAULT_DELIMITER;
    } else if (delimiter.length() > 1) {
        throw new IllegalArgumentException("delimiter can only be a one char value");
    } else {
        this.delimiter = delimiter.charAt(0);
    }

    String replacement = settings.get("replacement");
    if (replacement == null) {
        this.replacement = this.delimiter;
    } else if (replacement.length() > 1) {
        throw new IllegalArgumentException("replacement can only be a one char value");
    } else {
        this.replacement = replacement.charAt(0);
    }
    this.skip = settings.getAsInt("skip", PathHierarchyTokenizer.DEFAULT_SKIP);
    this.reverse = settings.getAsBoolean("reverse", false);
}

Example #2

Source File: DelimitedPayloadTokenFilterFactory.java From Elasticsearch with Apache License 2.0

6 votes

@Inject
public DelimitedPayloadTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name,
        @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    String delimiterConf = settings.get(DELIMITER);
    if (delimiterConf != null) {
        delimiter = delimiterConf.charAt(0);
    } else {
        delimiter = DEFAULT_DELIMITER;
    }
        
    if (settings.get(ENCODING) != null) {
        if (settings.get(ENCODING).equals("float")) {
            encoder = new FloatEncoder();
        } else if (settings.get(ENCODING).equals("int")) {
            encoder = new IntegerEncoder();
        } else if (settings.get(ENCODING).equals("identity")) {
            encoder = new IdentityEncoder();
        } 
    } else {
        encoder = DEFAULT_ENCODER;
    }
}

Example #3

Source File: HunspellTokenFilterFactory.java From Elasticsearch with Apache License 2.0

6 votes

@Inject
public HunspellTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService)  {
    super(index, indexSettingsService.getSettings(), name, settings);

    String locale = settings.get("locale", settings.get("language", settings.get("lang", null)));
    if (locale == null) {
        throw new IllegalArgumentException("missing [locale | language | lang] configuration for hunspell token filter");
    }

    dictionary = hunspellService.getDictionary(locale);
    if (dictionary == null) {
        throw new IllegalArgumentException(String.format(Locale.ROOT, "Unknown hunspell dictionary for locale [%s]", locale));
    }

    dedup = settings.getAsBoolean("dedup", true);
    longestOnly = settings.getAsBoolean("longest_only", false);
}

Example #4

Source File: IndexQueryParserService.java From Elasticsearch with Apache License 2.0

6 votes

@Inject
public IndexQueryParserService(Index index, IndexSettingsService indexSettingsService,
                               IndicesQueriesRegistry indicesQueriesRegistry,
                               ScriptService scriptService, AnalysisService analysisService,
                               MapperService mapperService, IndexCache indexCache, IndexFieldDataService fieldDataService,
                               BitsetFilterCache bitsetFilterCache,
                               @Nullable SimilarityService similarityService) {
    super(index, indexSettingsService.getSettings());
    this.indexSettingsService = indexSettingsService;
    this.scriptService = scriptService;
    this.analysisService = analysisService;
    this.mapperService = mapperService;
    this.similarityService = similarityService;
    this.indexCache = indexCache;
    this.fieldDataService = fieldDataService;
    this.bitsetFilterCache = bitsetFilterCache;

    Settings indexSettings = indexSettingsService.getSettings();
    this.defaultField = indexSettings.get(DEFAULT_FIELD, AllFieldMapper.NAME);
    this.queryStringLenient = indexSettings.getAsBoolean(QUERY_STRING_LENIENT, false);
    this.parseFieldMatcher = new ParseFieldMatcher(indexSettings);
    this.defaultAllowUnmappedFields = indexSettings.getAsBoolean(ALLOW_UNMAPPED, true);
    this.indicesQueriesRegistry = indicesQueriesRegistry;
}

Example #5

Source File: IndexStore.java From Elasticsearch with Apache License 2.0

6 votes

@Inject
public IndexStore(Index index, IndexSettingsService settingsService, IndicesStore indicesStore) {
    super(index, settingsService.getSettings());
    Settings indexSettings = settingsService.getSettings();
    this.indicesStore = indicesStore;

    this.rateLimitingType = indexSettings.get(INDEX_STORE_THROTTLE_TYPE, "none");
    if (rateLimitingType.equalsIgnoreCase("node")) {
        nodeRateLimiting = true;
    } else {
        nodeRateLimiting = false;
        rateLimiting.setType(rateLimitingType);
    }
    this.rateLimitingThrottle = indexSettings.getAsBytesSize(INDEX_STORE_THROTTLE_MAX_BYTES_PER_SEC, new ByteSizeValue(0));
    rateLimiting.setMaxRate(rateLimitingThrottle);

    logger.debug("using index.store.throttle.type [{}], with index.store.throttle.max_bytes_per_sec [{}]", rateLimitingType, rateLimitingThrottle);
    this.settingsService = settingsService;
    this.settingsService.addListener(applySettings);
}

Example #6

Source File: HanLpTokenizerTokenizerFactory.java From elasticsearch-analysis-hanlp with Apache License 2.0

6 votes

@Inject
public HanLpTokenizerTokenizerFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    indexMode = settings.getAsBoolean(INDEX_MODE, indexMode);
    nameRecognize = settings.getAsBoolean(NAME_RECOGNIZE, nameRecognize);
    translatedNameRecognize = settings.getAsBoolean(TRANSLATED_NAME_RECOGNIZE, translatedNameRecognize);
    japaneseNameRecognize = settings.getAsBoolean(JAPANESE_NAME_RECOGNIZE, japaneseNameRecognize);
    placeRecognize = settings.getAsBoolean(PLACE_RECOGNIZE, placeRecognize);
    organizationRecognize = settings.getAsBoolean(ORGANIZATION_RECOGNIZE, organizationRecognize);
    useCustomDictionary = settings.getAsBoolean(USE_CUSTOM_DICTIONARY, useCustomDictionary); // enableCustomDictionary
    speechTagging = settings.getAsBoolean(SPEECH_TAGGING, speechTagging); // PorterStemming
    offset = settings.getAsBoolean(OFFSET, offset);
    numberQuantifierRecognize = settings.getAsBoolean(NUMBER_QUANTIFIER_RECOGNIZE, numberQuantifierRecognize);
    threads = settings.getAsInt(THREADS, threads); // if more than 1, it means use multi-threading
}

Example #7

Source File: HanLpAnalyzerProvider.java From elasticsearch-analysis-hanlp with Apache License 2.0

6 votes

@Inject
public HanLpAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    boolean indexMode = settings.getAsBoolean(INDEX_MODE, false);
    boolean nameRecognize = settings.getAsBoolean(NAME_RECOGNIZE, true);
    boolean translatedNameRecognize = settings.getAsBoolean(TRANSLATED_NAME_RECOGNIZE, true);
    boolean japaneseNameRecognize = settings.getAsBoolean(JAPANESE_NAME_RECOGNIZE, false);
    boolean placeRecognize = settings.getAsBoolean(PLACE_RECOGNIZE, false);
    boolean organizationRecognize = settings.getAsBoolean(ORGANIZATION_RECOGNIZE, false);
    boolean useCustomDictionary = settings.getAsBoolean(USE_CUSTOM_DICTIONARY, true); // enableCustomDictionary
    boolean speechTagging = settings.getAsBoolean(SPEECH_TAGGING, false); // PorterStemming
    boolean offset = settings.getAsBoolean(OFFSET, false);
    boolean numberQuantifierRecognize = settings.getAsBoolean(NUMBER_QUANTIFIER_RECOGNIZE, false);
    int threads = settings.getAsInt(THREADS, 1); // if more than 1, it means use multi-threading

    analyzer = new HanLPAnalyzer(indexMode, nameRecognize, translatedNameRecognize, japaneseNameRecognize,
                                 placeRecognize, organizationRecognize, useCustomDictionary, speechTagging, offset,
                                 numberQuantifierRecognize, threads, null);
}

Example #8

Source File: KeywordMarkerTokenFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public KeywordMarkerTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    boolean ignoreCase = settings.getAsBoolean("ignore_case", false);
    Set<?> rules = Analysis.getWordSet(env, settings, "keywords");
    if (rules == null) {
        throw new IllegalArgumentException("keyword filter requires either `keywords` or `keywords_path` to be configured");
    }
    keywordLookup = new CharArraySet(rules, ignoreCase);
}

Example #9

Source File: SwedishAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public SwedishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new SwedishAnalyzer(Analysis.parseStopWords(env, settings, SwedishAnalyzer.getDefaultStopSet()),
                                   Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}

Example #10

Source File: StopAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public StopAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
    this.stopAnalyzer = new StopAnalyzer(stopWords);
    this.stopAnalyzer.setVersion(version);
}

Example #11

Source File: PatternReplaceCharFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public PatternReplaceCharFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name);

    if (!Strings.hasLength(settings.get("pattern"))) {
        throw new IllegalArgumentException("pattern is missing for [" + name + "] char filter of type 'pattern_replace'");
    }
    pattern = Pattern.compile(settings.get("pattern"));
    replacement = settings.get("replacement", ""); // when not set or set to "", use "".
}

Example #12

Source File: HindiAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public HindiAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new HindiAnalyzer(Analysis.parseStopWords(env, settings, HindiAnalyzer.getDefaultStopSet()),
                                 Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}

Example #13

Source File: DutchAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public DutchAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new DutchAnalyzer(Analysis.parseStopWords(env, settings, DutchAnalyzer.getDefaultStopSet()),
                                 Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}

Example #14

Source File: MapperService.java From Elasticsearch with Apache License 2.0

5 votes

public MapperService(Index index, Settings indexSettings, AnalysisService analysisService,
                     SimilarityLookupService similarityLookupService,
                     ScriptService scriptService, MapperRegistry mapperRegistry,
                     DynamicArrayFieldMapperBuilderFactoryProvider dynamicArrayFieldMapperBuilderFactoryProvider) {
    this(index, new IndexSettingsService(index, indexSettings), analysisService, similarityLookupService, scriptService,
        mapperRegistry, dynamicArrayFieldMapperBuilderFactoryProvider);
}

Example #15

Source File: GalicianAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public GalicianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new GalicianAnalyzer(Analysis.parseStopWords(env, settings, GalicianAnalyzer.getDefaultStopSet()),
                                    Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}

Example #16

Source File: IndexService.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public IndexService(Injector injector, Index index, NodeEnvironment nodeEnv,
                    AnalysisService analysisService, MapperService mapperService, IndexQueryParserService queryParserService,
                    SimilarityService similarityService, IndexAliasesService aliasesService, IndexCache indexCache,
                    IndexSettingsService settingsService,
                    IndexFieldDataService indexFieldData, BitsetFilterCache bitSetFilterCache, IndicesService indicesServices) {

    super(index, settingsService.getSettings());
    this.injector = injector;
    this.analysisService = analysisService;
    this.mapperService = mapperService;
    this.queryParserService = queryParserService;
    this.similarityService = similarityService;
    this.aliasesService = aliasesService;
    this.indexCache = indexCache;
    this.indexFieldData = indexFieldData;
    this.settingsService = settingsService;
    this.bitsetFilterCache = bitSetFilterCache;

    this.pluginsService = injector.getInstance(PluginsService.class);
    this.indicesServices = indicesServices;
    this.indicesLifecycle = (InternalIndicesLifecycle) injector.getInstance(IndicesLifecycle.class);

    // inject workarounds for cyclic dep
    indexFieldData.setListener(new FieldDataCacheListener(this));
    bitSetFilterCache.setListener(new BitsetCacheListener(this));
    this.nodeEnv = nodeEnv;
}

Example #17

Source File: TrimTokenFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public TrimTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    if (version.onOrAfter(Version.LUCENE_4_4_0) && settings.get(UPDATE_OFFSETS_KEY) != null) {
        throw new IllegalArgumentException(UPDATE_OFFSETS_KEY +  " is not supported anymore. Please fix your analysis chain or use"
                + " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
    }
    this.updateOffsets = settings.getAsBoolean("update_offsets", false);
}

Example #18

Source File: KeepTypesFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public KeepTypesFilterFactory(Index index, IndexSettingsService indexSettingsService,
                             Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    final String[] arrayKeepTypes = settings.getAsArray(KEEP_TYPES_KEY, null);
    if ((arrayKeepTypes == null)) {
        throw new IllegalArgumentException("keep_types requires `" + KEEP_TYPES_KEY + "` to be configured");
    }

    this.keepTypes = new HashSet<>(Arrays.asList(arrayKeepTypes));
}

Example #19

Source File: IndexFieldDataService.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public IndexFieldDataService(Index index, IndexSettingsService indexSettingsService, IndicesFieldDataCache indicesFieldDataCache,
                             CircuitBreakerService circuitBreakerService, MapperService mapperService) {
    super(index, indexSettingsService.getSettings());
    this.indicesFieldDataCache = indicesFieldDataCache;
    this.circuitBreakerService = circuitBreakerService;
    this.mapperService = mapperService;
}

Example #20

Source File: StopTokenFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public StopTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
    this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase);
    if (version.onOrAfter(Version.LUCENE_4_4) && settings.get("enable_position_increments") != null) {
        throw new IllegalArgumentException("enable_position_increments is not supported anymore as of Lucene 4.4 as it can create broken token streams."
                + " Please fix your analysis chain or use an older compatibility version (<= 4.3).");
    }
    this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", true);
}

Example #21

Source File: BlobShard.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public BlobShard(ShardId shardId,
                 IndexSettingsService indexSettingsService,
                 BlobEnvironment blobEnvironment,
                 IndexShard indexShard) {
    super(shardId, indexSettingsService.getSettings());
    this.indexShard = indexShard;
    File blobDir = blobDir(blobEnvironment);
    logger.info("creating BlobContainer at {}", blobDir);
    this.blobContainer = new BlobContainer(blobDir);
}

Example #22

Source File: SpanishAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public SpanishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new SpanishAnalyzer(Analysis.parseStopWords(env, settings, SpanishAnalyzer.getDefaultStopSet()),
                                   Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}

Example #23

Source File: BosonNLPAnalyzerProvider.java From elasticsearch-analysis-bosonnlp with Apache License 2.0

5 votes

@Inject
public BosonNLPAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {

    super(index, indexSettingsService.getSettings(), name, settings);
    this.TAG_URL = settings.get("API_URL", "").toString();
    this.BOSONNLP_API_TOKEN = settings.get("API_TOKEN", "").toString();
    this.spaceMode = Integer.parseInt(settings.get("space_mode", "0"));
    this.oovLevel = Integer.parseInt(settings.get("oov_level", "3"));
    this.t2s = Integer.parseInt(settings.get("t2s", "0"));
    this.specialCharConv = Integer.parseInt(settings.get("spechial_char_conv", "0"));

    this.analyzer = new BosonNLPAnalyzer(TAG_URL, BOSONNLP_API_TOKEN, spaceMode, oovLevel, t2s, specialCharConv);

}

Example #24

Source File: MappingCharFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public MappingCharFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name);

    List<String> rules = Analysis.getWordList(env, settings, "mappings");
    if (rules == null) {
        throw new IllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
    }

    NormalizeCharMap.Builder normMapBuilder = new NormalizeCharMap.Builder();
    parseRules(rules, normMapBuilder);
    normMap = normMapBuilder.build();
}

Example #25

Source File: HungarianAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public HungarianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new HungarianAnalyzer(Analysis.parseStopWords(env, settings, HungarianAnalyzer.getDefaultStopSet()),
                                     Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}

Example #26

Source File: LithuanianAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public LithuanianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new LithuanianAnalyzer(Analysis.parseStopWords(env, settings, LithuanianAnalyzer.getDefaultStopSet()),
                                  Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}

Example #27

Source File: DanishAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public DanishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new DanishAnalyzer(Analysis.parseStopWords(env, settings, DanishAnalyzer.getDefaultStopSet()),
                                  Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}

Example #28

Source File: WordDelimiterTokenFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public WordDelimiterTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    // Sample Format for the type table:
    // $ => DIGIT
    // % => DIGIT
    // . => DIGIT
    // \u002C => DIGIT
    // \u200D => ALPHANUM
    List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
    if (charTypeTableValues == null) {
        this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
    } else {
        this.charTypeTable = parseTypes(charTypeTableValues);
    }
    int flags = 0;
    // If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
    flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
    // If set, causes number subwords to be generated: "500-42" => "500" "42"
    flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
    // 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
    flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
    // If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
    flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
    // If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
    flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
    // 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
    flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
    // If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
    flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
    // 1, causes "j2se" to be three tokens; "j" "2" "se"
    flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
    // If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
    flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
    // If not null is the set of tokens to protect from being delimited
    Set<?> protectedWords = Analysis.getWordSet(env, settings, "protected_words");
    this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
    this.flags = flags;
}

Example #29

Source File: IrishAnalyzerProvider.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public IrishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new IrishAnalyzer(Analysis.parseStopWords(env, settings, IrishAnalyzer.getDefaultStopSet()),
                                 Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}

Example #30

Source File: ShingleTokenFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public ShingleTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    Integer maxShingleSize = settings.getAsInt("max_shingle_size", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
    Integer minShingleSize = settings.getAsInt("min_shingle_size", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
    Boolean outputUnigrams = settings.getAsBoolean("output_unigrams", true);
    Boolean outputUnigramsIfNoShingles = settings.getAsBoolean("output_unigrams_if_no_shingles", false);
    String tokenSeparator = settings.get("token_separator", ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
    String fillerToken = settings.get("filler_token", ShingleFilter.DEFAULT_FILLER_TOKEN);
    factory = new Factory("shingle", minShingleSize, maxShingleSize, outputUnigrams, outputUnigramsIfNoShingles, tokenSeparator, fillerToken);
}