org.elasticsearch.env.Environment Java Examples

The following examples show how to use org.elasticsearch.env.Environment. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AzureRepository.java    From crate with Apache License 2.0 6 votes vote down vote up
public AzureRepository(RepositoryMetaData metadata,
                       Environment environment,
                       NamedXContentRegistry namedXContentRegistry,
                       AzureStorageService storageService,
                       ThreadPool threadPool) {
    super(metadata, environment.settings(), namedXContentRegistry, threadPool, buildBasePath(metadata));
    this.chunkSize = Repository.CHUNK_SIZE_SETTING.get(metadata.settings());
    this.storageService = storageService;

    // If the user explicitly did not define a readonly value, we set it by ourselves depending on the location mode setting.
    // For secondary_only setting, the repository should be read only
    final LocationMode locationMode = Repository.LOCATION_MODE_SETTING.get(metadata.settings());
    if (Repository.READONLY_SETTING.exists(metadata.settings())) {
        this.readonly = Repository.READONLY_SETTING.get(metadata.settings());
    } else {
        this.readonly = locationMode == LocationMode.SECONDARY_ONLY;
    }
}
 
Example #2
Source File: HyphenationCompoundWordTokenFilterFactory.java    From crate with Apache License 2.0 6 votes vote down vote up
HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, env, name, settings);

    String hyphenationPatternsPath = settings.get("hyphenation_patterns_path", null);
    if (hyphenationPatternsPath == null) {
        throw new IllegalArgumentException("hyphenation_patterns_path is a required setting.");
    }

    Path hyphenationPatternsFile = env.configFile().resolve(hyphenationPatternsPath);

    try {
        InputStream in = Files.newInputStream(hyphenationPatternsFile);
        hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(in));
    } catch (Exception e) {
        throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path.", e);
    }
}
 
Example #3
Source File: DelimitedPayloadTokenFilterFactory.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Inject
public DelimitedPayloadTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name,
        @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    String delimiterConf = settings.get(DELIMITER);
    if (delimiterConf != null) {
        delimiter = delimiterConf.charAt(0);
    } else {
        delimiter = DEFAULT_DELIMITER;
    }
        
    if (settings.get(ENCODING) != null) {
        if (settings.get(ENCODING).equals("float")) {
            encoder = new FloatEncoder();
        } else if (settings.get(ENCODING).equals("int")) {
            encoder = new IntegerEncoder();
        } else if (settings.get(ENCODING).equals("identity")) {
            encoder = new IdentityEncoder();
        } 
    } else {
        encoder = DEFAULT_ENCODER;
    }
}
 
Example #4
Source File: Analysis.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
/**
 * @return null If no settings set for "settingsPrefix" then return <code>null</code>.
 * @throws IllegalArgumentException
 *          If the Reader can not be instantiated.
 */
public static Reader getReaderFromFile(Environment env, Settings settings, String settingPrefix) {
    String filePath = settings.get(settingPrefix, null);

    if (filePath == null) {
        return null;
    }

    final Path path = env.configFile().resolve(filePath);

    try {
        return FileSystemUtils.newBufferedReader(path.toUri().toURL(), Charsets.UTF_8);
    } catch (IOException ioe) {
        String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, ioe.getMessage());
        throw new IllegalArgumentException(message);
    }
}
 
Example #5
Source File: ShingleTokenFilterFactory.java    From crate with Apache License 2.0 6 votes vote down vote up
public ShingleTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    int maxAllowedShingleDiff = indexSettings.getMaxShingleDiff();
    Integer maxShingleSize = settings.getAsInt("max_shingle_size", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
    Integer minShingleSize = settings.getAsInt("min_shingle_size", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
    Boolean outputUnigrams = settings.getAsBoolean("output_unigrams", true);
    Boolean outputUnigramsIfNoShingles = settings.getAsBoolean("output_unigrams_if_no_shingles", false);

    int shingleDiff = maxShingleSize - minShingleSize + (outputUnigrams ? 1 : 0);
    if (shingleDiff > maxAllowedShingleDiff) {
        deprecationLogger.deprecated("Deprecated big difference between maxShingleSize and minShingleSize in Shingle TokenFilter,"
            + "expected difference must be less than or equal to: [" + maxAllowedShingleDiff + "]");
    }
    String tokenSeparator = settings.get("token_separator", ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
    String fillerToken = settings.get("filler_token", ShingleFilter.DEFAULT_FILLER_TOKEN);
    factory = new Factory("shingle", minShingleSize, maxShingleSize, outputUnigrams, outputUnigramsIfNoShingles, tokenSeparator, fillerToken);
}
 
Example #6
Source File: AnalysisRegistry.java    From crate with Apache License 2.0 6 votes vote down vote up
public AnalysisRegistry(Environment environment,
                        Map<String, AnalysisProvider<CharFilterFactory>> charFilters,
                        Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters,
                        Map<String, AnalysisProvider<TokenizerFactory>> tokenizers,
                        Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers,
                        Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers,
                        Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
                        Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
                        Map<String, PreConfiguredTokenizer> preConfiguredTokenizers,
                        Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
    this.environment = environment;
    this.charFilters = unmodifiableMap(charFilters);
    this.tokenFilters = unmodifiableMap(tokenFilters);
    this.tokenizers = unmodifiableMap(tokenizers);
    this.analyzers = unmodifiableMap(analyzers);
    this.normalizers = unmodifiableMap(normalizers);
    prebuiltAnalysis =
        new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
 
Example #7
Source File: SynonymTokenFilterFactory.java    From crate with Apache License 2.0 6 votes vote down vote up
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
                                  String name, Settings settings) throws IOException {
    super(indexSettings, name, settings);
    this.settings = settings;
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    if (settings.get("ignore_case") != null) {
        deprecationLogger.deprecated(
            "The ignore_case option on the synonym_graph filter is deprecated. " +
                "Instead, insert a lowercase filter in the filter chain before the synonym_graph filter.");
    }

    this.expand = settings.getAsBoolean("expand", true);
    this.lenient = settings.getAsBoolean("lenient", false);
    this.format = settings.get("format", "");
    this.environment = env;
}
 
Example #8
Source File: NodeSettingsTest.java    From crate with Apache License 2.0 6 votes vote down vote up
@Before
public void doSetup() throws Exception {
    tmp.create();
    Path configPath = createConfigPath();
    Map<String, String> settings = new HashMap<>();
    settings.put("node.name", "node-test");
    settings.put("node.data", "true");
    settings.put(PATH_HOME_SETTING.getKey(), configPath.toString());
    // Avoid connecting to other test nodes
    settings.put("discovery.type", "single-node");

    Environment environment = InternalSettingsPreparer.prepareEnvironment(Settings.EMPTY, settings, configPath, () -> "node-test");
    node = new CrateNode(environment);
    node.start();
    sqlOperations = node.injector().getInstance(SQLOperations.class);
}
 
Example #9
Source File: WordDelimiterTokenFilterFactory.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public WordDelimiterTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    // Sample Format for the type table:
    // $ => DIGIT
    // % => DIGIT
    // . => DIGIT
    // \u002C => DIGIT
    // \u200D => ALPHANUM
    List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
    if (charTypeTableValues == null) {
        this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
    } else {
        this.charTypeTable = parseTypes(charTypeTableValues);
    }
    int flags = 0;
    // If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
    flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
    // If set, causes number subwords to be generated: "500-42" => "500" "42"
    flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
    // 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
    flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
    // If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
    flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
    // If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
    flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
    // 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
    flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
    // If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
    flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
    // 1, causes "j2se" to be three tokens; "j" "2" "se"
    flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
    // If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
    flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
    // If not null is the set of tokens to protect from being delimited
    Set<?> protectedWords = Analysis.getWordSet(env, settings, "protected_words");
    this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
    this.flags = flags;
}
 
Example #10
Source File: IrishAnalyzerProvider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public IrishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new IrishAnalyzer(Analysis.parseStopWords(env, settings, IrishAnalyzer.getDefaultStopSet()),
                                 Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
Example #11
Source File: PatternReplaceCharFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name);

    String sPattern = settings.get("pattern");
    if (!Strings.hasLength(sPattern)) {
        throw new IllegalArgumentException("pattern is missing for [" + name + "] char filter of type 'pattern_replace'");
    }
    pattern = Regex.compile(sPattern, settings.get("flags"));
    replacement = settings.get("replacement", ""); // when not set or set to "", use "".
}
 
Example #12
Source File: RepositoriesModule.java    From crate with Apache License 2.0 5 votes vote down vote up
public RepositoriesModule(Environment env,
                          List<RepositoryPlugin> repoPlugins,
                          NamedXContentRegistry namedXContentRegistry,
                          ThreadPool threadPool) {
    Map<String, Repository.Factory> factories = new HashMap<>();
    factories.put(FsRepository.TYPE, new Repository.Factory() {

        @Override
        public TypeSettings settings() {
            return new TypeSettings(FsRepository.mandatorySettings(), FsRepository.optionalSettings());
        }

        @Override
        public Repository create(RepositoryMetaData metadata) throws Exception {
            return new FsRepository(metadata, env, namedXContentRegistry, threadPool);
        }
    });
    for (RepositoryPlugin repoPlugin : repoPlugins) {
        Map<String, Repository.Factory> newRepoTypes = repoPlugin.getRepositories(env, namedXContentRegistry, threadPool);
        for (Map.Entry<String, Repository.Factory> entry : newRepoTypes.entrySet()) {
            if (factories.put(entry.getKey(), entry.getValue()) != null) {
                throw new IllegalArgumentException("Repository type [" + entry.getKey() + "] is already registered");
            }
        }
    }
    repositoryTypes = Collections.unmodifiableMap(factories);
}
 
Example #13
Source File: SoraniAnalyzerProvider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public SoraniAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new SoraniAnalyzer(Analysis.parseStopWords(env, settings, SoraniAnalyzer.getDefaultStopSet()),
                                  Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
Example #14
Source File: Analysis.java    From crate with Apache License 2.0 5 votes vote down vote up
public static CharArraySet getWordSet(Environment env, Settings settings, String settingsPrefix) {
    List<String> wordList = getWordList(env, settings, settingsPrefix);
    if (wordList == null) {
        return null;
    }
    boolean ignoreCase =
        settings.getAsBoolean(settingsPrefix + "_case", false);
    return new CharArraySet(wordList, ignoreCase);
}
 
Example #15
Source File: WordDelimiterTokenFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
public WordDelimiterTokenFilterFactory(IndexSettings indexSettings, Environment env,
        String name, Settings settings) {
    super(indexSettings, name, settings);

    // Sample Format for the type table:
    // $ => DIGIT
    // % => DIGIT
    // . => DIGIT
    // \u002C => DIGIT
    // \u200D => ALPHANUM
    List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
    if (charTypeTableValues == null) {
        this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
    } else {
        this.charTypeTable = parseTypes(charTypeTableValues);
    }
    int flags = 0;
    // If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
    flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
    // If set, causes number subwords to be generated: "500-42" => "500" "42"
    flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
    // 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
    flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
    // If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
    flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
    // If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
    flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
    // 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
    flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
    // If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
    flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
    // 1, causes "j2se" to be three tokens; "j" "2" "se"
    flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
    // If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
    flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
    // If not null is the set of tokens to protect from being delimited
    Set<?> protectedWords = Analysis.getWordSet(env, settings, "protected_words");
    this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
    this.flags = flags;
}
 
Example #16
Source File: NGramTokenizerFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
NGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);
    int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff();
    this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
    int ngramDiff = maxGram - minGram;
    if (ngramDiff > maxAllowedNgramDiff) {
        deprecationLogger.deprecated("Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
            + "expected difference must be less than or equal to: [" + maxAllowedNgramDiff + "]");
    }
    this.matcher = parseTokenChars(settings.getAsList("token_chars"));
}
 
Example #17
Source File: NaturalSortKeyAnalyzerProvider.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public NaturalSortKeyAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name,
                                      Settings settings) {
    super(indexSettings, name, settings);
    this.collator = createCollator(settings);
    this.digits = settings.getAsInt("digits", 1);
    this.maxTokens = settings.getAsInt("maxTokens", 2);
    this.bufferSize = settings.getAsInt("bufferSize", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
}
 
Example #18
Source File: HdfsRepository.java    From crate with Apache License 2.0 5 votes vote down vote up
public HdfsRepository(RepositoryMetaData metadata, Environment environment,
                      NamedXContentRegistry namedXContentRegistry, ThreadPool threadPool) {
    super(metadata, environment.settings(), namedXContentRegistry, threadPool, BlobPath.cleanPath());

    this.environment = environment;
    this.chunkSize = metadata.settings().getAsBytesSize("chunk_size", null);

    String uriSetting = getMetadata().settings().get("uri");
    if (Strings.hasText(uriSetting) == false) {
        throw new IllegalArgumentException("No 'uri' defined for hdfs snapshot/restore");
    }
    uri = URI.create(uriSetting);
    if ("hdfs".equalsIgnoreCase(uri.getScheme()) == false) {
        throw new IllegalArgumentException(String.format(Locale.ROOT,
            "Invalid scheme [%s] specified in uri [%s]; only 'hdfs' uri allowed for hdfs snapshot/restore",
            uri.getScheme(), uriSetting));
    }
    if (Strings.hasLength(uri.getPath()) && uri.getPath().equals("/") == false) {
        throw new IllegalArgumentException(String.format(Locale.ROOT,
            "Use 'path' option to specify a path [%s], not the uri [%s] for hdfs snapshot/restore", uri.getPath(), uriSetting));
    }

    pathSetting = getMetadata().settings().get("path");
    // get configuration
    if (pathSetting == null) {
        throw new IllegalArgumentException("No 'path' defined for hdfs snapshot/restore");
    }
}
 
Example #19
Source File: SynonymLoader.java    From elasticsearch-analysis-synonym with Apache License 2.0 5 votes vote down vote up
public SynonymLoader(final Environment env, final Settings settings, final boolean expand, final Analyzer analyzer) {
    this.env = env;
    this.settings = settings;
    this.expand = expand;
    this.analyzer = analyzer;

    createSynonymMap(false);
}
 
Example #20
Source File: Bootstrap.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private void setup(boolean addShutdownHook, Settings settings, Environment environment) throws Exception {
    initializeNatives(environment.tmpFile(),
                      settings.getAsBoolean("bootstrap.mlockall", false),
                      settings.getAsBoolean("bootstrap.seccomp", true),
                      settings.getAsBoolean("bootstrap.ctrlhandler", true));

    // initialize probes before the security manager is installed
    initializeProbes();

    if (addShutdownHook) {
        Runtime.getRuntime().addShutdownHook(new Thread() {
            @Override
            public void run() {
                if (node != null) {
                    node.close();
                }
            }
        });
    }

    // look for jar hell
    JarHell.checkJarHell();

    // install SM after natives, shutdown hooks, etc.
    setupSecurity(settings, environment);

    // We do not need to reload system properties here as we have already applied them in building the settings and
    // reloading could cause multiple prompts to the user for values if a system property was specified with a prompt
    // placeholder
    Settings nodeSettings = Settings.settingsBuilder()
            .put(settings)
            .put(InternalSettingsPreparer.IGNORE_SYSTEM_PROPERTIES_SETTING, true)
            .build();

    NodeBuilder nodeBuilder = NodeBuilder.nodeBuilder().settings(nodeSettings);
    node = nodeBuilder.build();
}
 
Example #21
Source File: WordDelimiterGraphTokenFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
public WordDelimiterGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);

    // Sample Format for the type table:
    // $ => DIGIT
    // % => DIGIT
    // . => DIGIT
    // \u002C => DIGIT
    // \u200D => ALPHANUM
    List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
    if (charTypeTableValues == null) {
        this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
    } else {
        this.charTypeTable = parseTypes(charTypeTableValues);
    }
    int flags = 0;
    // If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
    flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
    // If set, causes number subwords to be generated: "500-42" => "500" "42"
    flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
    // 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
    flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
    // If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
    flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
    // If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
    flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
    // 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
    flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
    // If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
    flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
    // 1, causes "j2se" to be three tokens; "j" "2" "se"
    flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
    // If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
    flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
    // If not null is the set of tokens to protect from being delimited
    Set<?> protectedWords = Analysis.getWordSet(env, settings, "protected_words");
    this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
    this.flags = flags;
    this.adjustOffsets = settings.getAsBoolean("adjust_offsets", true);
}
 
Example #22
Source File: BulgarianAnalyzerProvider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public BulgarianAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new BulgarianAnalyzer(Analysis.parseStopWords(env, settings, BulgarianAnalyzer.getDefaultStopSet()),
                                     Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
Example #23
Source File: URLTokenFilterFactory.java    From elasticsearch-analysis-url with Apache License 2.0 5 votes vote down vote up
public URLTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);

    this.parts = Arrays.stream(settings.getAsArray("part", new String[]{"whole"}))
            .map(URLPart::fromString)
            .collect(Collectors.toList());

    this.urlDecode = settings.getAsBoolean("url_decode", false);
    this.tokenizeHost = settings.getAsBoolean("tokenize_host", true);
    this.tokenizePath = settings.getAsBoolean("tokenize_path", true);
    this.tokenizeQuery = settings.getAsBoolean("tokenize_query", true);
    this.allowMalformed = settings.getAsBoolean("allow_malformed", false);
    this.tokenizeMalformed = settings.getAsBoolean("tokenize_malformed", false);
    this.passthrough = settings.getAsBoolean("passthrough", false);
}
 
Example #24
Source File: Security.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/** returns dynamic Permissions to configured paths and bind ports */
static Permissions createPermissions(Environment environment) throws IOException {
    Permissions policy = new Permissions();
    addClasspathPermissions(policy);
    addFilePermissions(policy, environment);
    addBindPermissions(policy, environment.settings());
    return policy;
}
 
Example #25
Source File: AbstractCompoundWordTokenFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
protected AbstractCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);

    minWordSize = settings.getAsInt("min_word_size", CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE);
    minSubwordSize = settings.getAsInt("min_subword_size", CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE);
    maxSubwordSize = settings.getAsInt("max_subword_size", CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE);
    onlyLongestMatch = settings.getAsBoolean("only_longest_match", false);
    wordList = Analysis.getWordSet(env, settings, "word_list");
    if (wordList == null) {
        throw new IllegalArgumentException("word_list must be provided for [" + name + "], either as a path to a file, or directly");
    }
}
 
Example #26
Source File: IcuNormalizerCharFilterFactory.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public IcuNormalizerCharFilterFactory(IndexSettings indexSettings, Environment environment, String name,
                                      Settings settings) {
    super(indexSettings, name);
    Normalizer2 base = Normalizer2.getInstance(getNormalizationResource(settings),
            getNormalizationName(settings), getNormalizationMode(settings));
    String unicodeSetFilter = settings.get("unicode_set_filter");
    this.normalizer = unicodeSetFilter != null ?
            new FilteredNormalizer2(base, new UnicodeSet(unicodeSetFilter).freeze()) : base;
}
 
Example #27
Source File: IKAnalyzerProvider.java    From es-ik with Apache License 2.0 5 votes vote down vote up
@Inject
public IKAnalyzerProvider(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettings, name, settings);

    loader = ServiceLoader.load(Configuration.class);
    Iterator<Configuration> iterator = loader.iterator();
    if (!iterator.hasNext()) {
        throw new NotFoundIKAnalyzerConfigurationImplementation();
    }
    analyzer = new IKAnalyzer(iterator.next().init(index, indexSettings, env, name, settings));
}
 
Example #28
Source File: MockNode.java    From crate with Apache License 2.0 5 votes vote down vote up
private MockNode(
        final Environment environment,
        final Collection<Class<? extends Plugin>> classpathPlugins,
        final boolean forbidPrivateIndexSettings) {
    super(environment, classpathPlugins, forbidPrivateIndexSettings);
    this.classpathPlugins = classpathPlugins;
}
 
Example #29
Source File: BasqueAnalyzerProvider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public BasqueAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new BasqueAnalyzer(Analysis.parseStopWords(env, settings, BasqueAnalyzer.getDefaultStopSet()),
                                  Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
Example #30
Source File: StandardHtmlStripAnalyzerProvider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public StandardHtmlStripAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env,  @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    this.esVersion = Version.indexCreated(indexSettingsService.getSettings());
    final CharArraySet defaultStopwords;
    if (esVersion.onOrAfter(Version.V_1_0_0_RC1)) {
        defaultStopwords = CharArraySet.EMPTY_SET;
    } else {
        defaultStopwords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    analyzer = new StandardHtmlStripAnalyzer(stopWords);
    analyzer.setVersion(version);
}