org.elasticsearch.env.Environment Java Examples

The following examples show how to use org.elasticsearch.env.Environment. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DelimitedPayloadTokenFilterFactory.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Inject
public DelimitedPayloadTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name,
        @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    String delimiterConf = settings.get(DELIMITER);
    if (delimiterConf != null) {
        delimiter = delimiterConf.charAt(0);
    } else {
        delimiter = DEFAULT_DELIMITER;
    }
        
    if (settings.get(ENCODING) != null) {
        if (settings.get(ENCODING).equals("float")) {
            encoder = new FloatEncoder();
        } else if (settings.get(ENCODING).equals("int")) {
            encoder = new IntegerEncoder();
        } else if (settings.get(ENCODING).equals("identity")) {
            encoder = new IdentityEncoder();
        } 
    } else {
        encoder = DEFAULT_ENCODER;
    }
}
 
Example #2
Source File: SynonymTokenFilterFactory.java    From crate with Apache License 2.0 6 votes vote down vote up
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
                                  String name, Settings settings) throws IOException {
    super(indexSettings, name, settings);
    this.settings = settings;
    this.ignoreCase = settings.getAsBoolean("ignore_case", false);
    if (settings.get("ignore_case") != null) {
        deprecationLogger.deprecated(
            "The ignore_case option on the synonym_graph filter is deprecated. " +
                "Instead, insert a lowercase filter in the filter chain before the synonym_graph filter.");
    }

    this.expand = settings.getAsBoolean("expand", true);
    this.lenient = settings.getAsBoolean("lenient", false);
    this.format = settings.get("format", "");
    this.environment = env;
}
 
Example #3
Source File: NodeSettingsTest.java    From crate with Apache License 2.0 6 votes vote down vote up
@Before
public void doSetup() throws Exception {
    tmp.create();
    Path configPath = createConfigPath();
    Map<String, String> settings = new HashMap<>();
    settings.put("node.name", "node-test");
    settings.put("node.data", "true");
    settings.put(PATH_HOME_SETTING.getKey(), configPath.toString());
    // Avoid connecting to other test nodes
    settings.put("discovery.type", "single-node");

    Environment environment = InternalSettingsPreparer.prepareEnvironment(Settings.EMPTY, settings, configPath, () -> "node-test");
    node = new CrateNode(environment);
    node.start();
    sqlOperations = node.injector().getInstance(SQLOperations.class);
}
 
Example #4
Source File: ShingleTokenFilterFactory.java    From crate with Apache License 2.0 6 votes vote down vote up
public ShingleTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    int maxAllowedShingleDiff = indexSettings.getMaxShingleDiff();
    Integer maxShingleSize = settings.getAsInt("max_shingle_size", ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE);
    Integer minShingleSize = settings.getAsInt("min_shingle_size", ShingleFilter.DEFAULT_MIN_SHINGLE_SIZE);
    Boolean outputUnigrams = settings.getAsBoolean("output_unigrams", true);
    Boolean outputUnigramsIfNoShingles = settings.getAsBoolean("output_unigrams_if_no_shingles", false);

    int shingleDiff = maxShingleSize - minShingleSize + (outputUnigrams ? 1 : 0);
    if (shingleDiff > maxAllowedShingleDiff) {
        deprecationLogger.deprecated("Deprecated big difference between maxShingleSize and minShingleSize in Shingle TokenFilter,"
            + "expected difference must be less than or equal to: [" + maxAllowedShingleDiff + "]");
    }
    String tokenSeparator = settings.get("token_separator", ShingleFilter.DEFAULT_TOKEN_SEPARATOR);
    String fillerToken = settings.get("filler_token", ShingleFilter.DEFAULT_FILLER_TOKEN);
    factory = new Factory("shingle", minShingleSize, maxShingleSize, outputUnigrams, outputUnigramsIfNoShingles, tokenSeparator, fillerToken);
}
 
Example #5
Source File: AnalysisRegistry.java    From crate with Apache License 2.0 6 votes vote down vote up
public AnalysisRegistry(Environment environment,
                        Map<String, AnalysisProvider<CharFilterFactory>> charFilters,
                        Map<String, AnalysisProvider<TokenFilterFactory>> tokenFilters,
                        Map<String, AnalysisProvider<TokenizerFactory>> tokenizers,
                        Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers,
                        Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers,
                        Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
                        Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
                        Map<String, PreConfiguredTokenizer> preConfiguredTokenizers,
                        Map<String, PreBuiltAnalyzerProviderFactory> preConfiguredAnalyzers) {
    this.environment = environment;
    this.charFilters = unmodifiableMap(charFilters);
    this.tokenFilters = unmodifiableMap(tokenFilters);
    this.tokenizers = unmodifiableMap(tokenizers);
    this.analyzers = unmodifiableMap(analyzers);
    this.normalizers = unmodifiableMap(normalizers);
    prebuiltAnalysis =
        new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers, preConfiguredAnalyzers);
}
 
Example #6
Source File: AzureRepository.java    From crate with Apache License 2.0 6 votes vote down vote up
public AzureRepository(RepositoryMetaData metadata,
                       Environment environment,
                       NamedXContentRegistry namedXContentRegistry,
                       AzureStorageService storageService,
                       ThreadPool threadPool) {
    super(metadata, environment.settings(), namedXContentRegistry, threadPool, buildBasePath(metadata));
    this.chunkSize = Repository.CHUNK_SIZE_SETTING.get(metadata.settings());
    this.storageService = storageService;

    // If the user explicitly did not define a readonly value, we set it by ourselves depending on the location mode setting.
    // For secondary_only setting, the repository should be read only
    final LocationMode locationMode = Repository.LOCATION_MODE_SETTING.get(metadata.settings());
    if (Repository.READONLY_SETTING.exists(metadata.settings())) {
        this.readonly = Repository.READONLY_SETTING.get(metadata.settings());
    } else {
        this.readonly = locationMode == LocationMode.SECONDARY_ONLY;
    }
}
 
Example #7
Source File: HyphenationCompoundWordTokenFilterFactory.java    From crate with Apache License 2.0 6 votes vote down vote up
HyphenationCompoundWordTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, env, name, settings);

    String hyphenationPatternsPath = settings.get("hyphenation_patterns_path", null);
    if (hyphenationPatternsPath == null) {
        throw new IllegalArgumentException("hyphenation_patterns_path is a required setting.");
    }

    Path hyphenationPatternsFile = env.configFile().resolve(hyphenationPatternsPath);

    try {
        InputStream in = Files.newInputStream(hyphenationPatternsFile);
        hyphenationTree = HyphenationCompoundWordTokenFilter.getHyphenationTree(new InputSource(in));
    } catch (Exception e) {
        throw new IllegalArgumentException("Exception while reading hyphenation_patterns_path.", e);
    }
}
 
Example #8
Source File: Analysis.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
/**
 * @return null If no settings set for "settingsPrefix" then return <code>null</code>.
 * @throws IllegalArgumentException
 *          If the Reader can not be instantiated.
 */
public static Reader getReaderFromFile(Environment env, Settings settings, String settingPrefix) {
    String filePath = settings.get(settingPrefix, null);

    if (filePath == null) {
        return null;
    }

    final Path path = env.configFile().resolve(filePath);

    try {
        return FileSystemUtils.newBufferedReader(path.toUri().toURL(), Charsets.UTF_8);
    } catch (IOException ioe) {
        String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, ioe.getMessage());
        throw new IllegalArgumentException(message);
    }
}
 
Example #9
Source File: CliTool.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public ExitStatus execute(Settings settings, Environment env) throws Exception {
    if (msg != null) {
        if (status != ExitStatus.OK) {
            terminal.printError(msg);
        } else {
            terminal.println(msg);
        }
    }
    return status;
}
 
Example #10
Source File: SynonymLoader.java    From elasticsearch-analysis-synonym with Apache License 2.0 5 votes vote down vote up
public SynonymLoader(final Environment env, final Settings settings, final boolean expand, final Analyzer analyzer) {
    this.env = env;
    this.settings = settings;
    this.expand = expand;
    this.analyzer = analyzer;

    createSynonymMap(false);
}
 
Example #11
Source File: OpenDistroSecuritySSLPlugin.java    From deprecated-security-ssl with Apache License 2.0 5 votes vote down vote up
@Override
public Collection<Object> createComponents(Client localClient, ClusterService clusterService, ThreadPool threadPool,
        ResourceWatcherService resourceWatcherService, ScriptService scriptService, NamedXContentRegistry xContentRegistry,
        Environment environment, NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry) {

    final List<Object> components = new ArrayList<>(1);
    
    if(client) {
        return components;
    }
    
    final String principalExtractorClass = settings.get(SSLConfigConstants.OPENDISTRO_SECURITY_SSL_TRANSPORT_PRINCIPAL_EXTRACTOR_CLASS, null);

    if(principalExtractorClass == null) {
        principalExtractor = new com.amazon.opendistroforelasticsearch.security.ssl.transport.DefaultPrincipalExtractor();
    } else {
        try {
            log.debug("Try to load and instantiate '{}'", principalExtractorClass);
            Class<?> principalExtractorClazz = Class.forName(principalExtractorClass);
            principalExtractor = (PrincipalExtractor) principalExtractorClazz.newInstance();
        } catch (Exception e) {
            log.error("Unable to load '{}' due to", principalExtractorClass, e);
            throw new ElasticsearchException(e);
        }
    }
    
    components.add(principalExtractor);
    
    return components;
}
 
Example #12
Source File: DetachClusterCommand.java    From crate with Apache License 2.0 5 votes vote down vote up
@Override
protected void processNodePaths(Terminal terminal, Path[] dataPaths, Environment env) throws IOException {
    final Tuple<Manifest, MetaData> manifestMetaDataTuple = loadMetaData(terminal, dataPaths);
    final Manifest manifest = manifestMetaDataTuple.v1();
    final MetaData metaData = manifestMetaDataTuple.v2();

    confirm(terminal, CONFIRMATION_MSG);

    writeNewMetaData(terminal, manifest, updateCurrentTerm(), metaData, updateMetaData(metaData), dataPaths);

    terminal.println(NODE_DETACHED_MSG);
}
 
Example #13
Source File: ItalianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
ItalianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new ItalianAnalyzer(
        Analysis.parseStopWords(env, settings, ItalianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #14
Source File: ArmenianAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
ArmenianAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    analyzer = new ArmenianAnalyzer(
        Analysis.parseStopWords(env, settings, ArmenianAnalyzer.getDefaultStopSet()),
        Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET)
    );
    analyzer.setVersion(version);
}
 
Example #15
Source File: FsRepository.java    From crate with Apache License 2.0 5 votes vote down vote up
/**
 * Constructs a shared file system repository.
 */
public FsRepository(RepositoryMetaData metadata, Environment environment, NamedXContentRegistry namedXContentRegistry,
                    ThreadPool threadPool) {
    super(metadata, environment.settings(), namedXContentRegistry, threadPool, BlobPath.cleanPath());
    this.environment = environment;
    String location = REPOSITORIES_LOCATION_SETTING.get(metadata.settings());
    if (location.isEmpty()) {
        LOGGER.warn("the repository location is missing, it should point to a shared file system location"
            + " that is available on all master and data nodes");
        throw new RepositoryException(metadata.name(), "missing location");
    }
    Path locationFile = environment.resolveRepoFile(location);
    if (locationFile == null) {
        if (environment.repoFiles().length > 0) {
            LOGGER.warn("The specified location [{}] doesn't start with any "
                + "repository paths specified by the path.repo setting: [{}] ", location, environment.repoFiles());
            throw new RepositoryException(metadata.name(), "location [" + location
                + "] doesn't match any of the locations specified by path.repo");
        } else {
            LOGGER.warn("The specified location [{}] should start with a repository path specified by"
                + " the path.repo setting, but the path.repo setting was not set on this node", location);
            throw new RepositoryException(metadata.name(), "location [" + location
                + "] doesn't match any of the locations specified by path.repo because this setting is empty");
        }
    }

    if (CHUNK_SIZE_SETTING.exists(metadata.settings())) {
        this.chunkSize = CHUNK_SIZE_SETTING.get(metadata.settings());
    } else {
        this.chunkSize = REPOSITORIES_CHUNK_SIZE_SETTING.get(environment.settings());
    }
    this.basePath = BlobPath.cleanPath();
}
 
Example #16
Source File: NaturalSortKeyAnalyzerProvider.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public NaturalSortKeyAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name,
                                      Settings settings) {
    super(indexSettings, name, settings);
    this.collator = createCollator(settings);
    this.digits = settings.getAsInt("digits", 1);
    this.maxTokens = settings.getAsInt("maxTokens", 2);
    this.bufferSize = settings.getAsInt("bufferSize", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
}
 
Example #17
Source File: JiebaAnalyzerProvider.java    From elasticsearch-jieba-plugin with MIT License 5 votes vote down vote up
public JiebaAnalyzerProvider(IndexSettings indexSettings,
                            Environment environment,
                            String name,
                            Settings settings,
                            JiebaSegmenter.SegMode mode) {
  super(indexSettings, name, settings);
  if (null != mode) {
    jiebaAnalyzer = new JiebaAnalyzer(mode.name());
  } else {
    jiebaAnalyzer = new JiebaAnalyzer(settings.get("segMode", JiebaSegmenter.SegMode.SEARCH.name()));
  }

  JiebaDict.init(environment);
}
 
Example #18
Source File: NGramTokenizerFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
NGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);
    int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff();
    this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE);
    this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE);
    int ngramDiff = maxGram - minGram;
    if (ngramDiff > maxAllowedNgramDiff) {
        deprecationLogger.deprecated("Deprecated big difference between max_gram and min_gram in NGram Tokenizer,"
            + "expected difference must be less than or equal to: [" + maxAllowedNgramDiff + "]");
    }
    this.matcher = parseTokenChars(settings.getAsList("token_chars"));
}
 
Example #19
Source File: KeepWordFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
KeepWordFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);

    final List<String> arrayKeepWords = settings.getAsList(KEEP_WORDS_KEY, null);
    final String keepWordsPath = settings.get(KEEP_WORDS_PATH_KEY, null);
    if ((arrayKeepWords == null && keepWordsPath == null) || (arrayKeepWords != null && keepWordsPath != null)) {
        // we don't allow both or none
        throw new IllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
                + KEEP_WORDS_PATH_KEY + "` to be configured");
    }
    if (settings.get(ENABLE_POS_INC_KEY) != null) {
        throw new IllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain");
    }
    this.keepWords = Analysis.getWordSet(env, settings, KEEP_WORDS_KEY);
}
 
Example #20
Source File: JiebaTokenizerFactory.java    From elasticsearch-jieba-plugin with MIT License 5 votes vote down vote up
public static TokenizerFactory getJiebaSearchTokenizerFactory(IndexSettings indexSettings,
                                                              Environment environment,
                                                              String s,
                                                              Settings settings) {
  JiebaTokenizerFactory jiebaTokenizerFactory = new JiebaTokenizerFactory(indexSettings,
      environment,
      settings);
  jiebaTokenizerFactory.setSegMode(JiebaSegmenter.SegMode.SEARCH.name());
  return jiebaTokenizerFactory;
}
 
Example #21
Source File: EnglishAnalyzerProvider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public EnglishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new EnglishAnalyzer(Analysis.parseStopWords(env, settings, EnglishAnalyzer.getDefaultStopSet()),
                                   Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
Example #22
Source File: AzureDiscoveryPlugin.java    From crate with Apache License 2.0 5 votes vote down vote up
@Override
public Collection<Object> createComponents(Client client,
                                           ClusterService clusterService,
                                           ThreadPool threadPool,
                                           NamedXContentRegistry xContentRegistry,
                                           Environment environment,
                                           NodeEnvironment nodeEnvironment,
                                           NamedWriteableRegistry namedWriteableRegistry) {
    if (AzureConfiguration.isDiscoveryReady(settings, logger)) {
        return Collections.singletonList(azureComputeService());
    }
    return Collections.emptyList();
}
 
Example #23
Source File: WordDelimiterTokenFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
public WordDelimiterTokenFilterFactory(IndexSettings indexSettings, Environment env,
        String name, Settings settings) {
    super(indexSettings, name, settings);

    // Sample Format for the type table:
    // $ => DIGIT
    // % => DIGIT
    // . => DIGIT
    // \u002C => DIGIT
    // \u200D => ALPHANUM
    List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
    if (charTypeTableValues == null) {
        this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
    } else {
        this.charTypeTable = parseTypes(charTypeTableValues);
    }
    int flags = 0;
    // If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
    flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
    // If set, causes number subwords to be generated: "500-42" => "500" "42"
    flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
    // 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
    flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
    // If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
    flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
    // If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
    flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
    // 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
    flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
    // If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
    flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
    // 1, causes "j2se" to be three tokens; "j" "2" "se"
    flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
    // If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
    flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
    // If not null is the set of tokens to protect from being delimited
    Set<?> protectedWords = Analysis.getWordSet(env, settings, "protected_words");
    this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
    this.flags = flags;
}
 
Example #24
Source File: Analysis.java    From crate with Apache License 2.0 5 votes vote down vote up
public static CharArraySet getWordSet(Environment env, Settings settings, String settingsPrefix) {
    List<String> wordList = getWordList(env, settings, settingsPrefix);
    if (wordList == null) {
        return null;
    }
    boolean ignoreCase =
        settings.getAsBoolean(settingsPrefix + "_case", false);
    return new CharArraySet(wordList, ignoreCase);
}
 
Example #25
Source File: SoraniAnalyzerProvider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public SoraniAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new SoraniAnalyzer(Analysis.parseStopWords(env, settings, SoraniAnalyzer.getDefaultStopSet()),
                                  Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}
 
Example #26
Source File: RepositoriesModule.java    From crate with Apache License 2.0 5 votes vote down vote up
public RepositoriesModule(Environment env,
                          List<RepositoryPlugin> repoPlugins,
                          NamedXContentRegistry namedXContentRegistry,
                          ThreadPool threadPool) {
    Map<String, Repository.Factory> factories = new HashMap<>();
    factories.put(FsRepository.TYPE, new Repository.Factory() {

        @Override
        public TypeSettings settings() {
            return new TypeSettings(FsRepository.mandatorySettings(), FsRepository.optionalSettings());
        }

        @Override
        public Repository create(RepositoryMetaData metadata) throws Exception {
            return new FsRepository(metadata, env, namedXContentRegistry, threadPool);
        }
    });
    for (RepositoryPlugin repoPlugin : repoPlugins) {
        Map<String, Repository.Factory> newRepoTypes = repoPlugin.getRepositories(env, namedXContentRegistry, threadPool);
        for (Map.Entry<String, Repository.Factory> entry : newRepoTypes.entrySet()) {
            if (factories.put(entry.getKey(), entry.getValue()) != null) {
                throw new IllegalArgumentException("Repository type [" + entry.getKey() + "] is already registered");
            }
        }
    }
    repositoryTypes = Collections.unmodifiableMap(factories);
}
 
Example #27
Source File: PatternReplaceCharFilterFactory.java    From crate with Apache License 2.0 5 votes vote down vote up
PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name);

    String sPattern = settings.get("pattern");
    if (!Strings.hasLength(sPattern)) {
        throw new IllegalArgumentException("pattern is missing for [" + name + "] char filter of type 'pattern_replace'");
    }
    pattern = Regex.compile(sPattern, settings.get("flags"));
    replacement = settings.get("replacement", ""); // when not set or set to "", use "".
}
 
Example #28
Source File: WordDelimiterTokenFilterFactory.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public WordDelimiterTokenFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    // Sample Format for the type table:
    // $ => DIGIT
    // % => DIGIT
    // . => DIGIT
    // \u002C => DIGIT
    // \u200D => ALPHANUM
    List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
    if (charTypeTableValues == null) {
        this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
    } else {
        this.charTypeTable = parseTypes(charTypeTableValues);
    }
    int flags = 0;
    // If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
    flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
    // If set, causes number subwords to be generated: "500-42" => "500" "42"
    flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
    // 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
    flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
    // If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
    flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
    // If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
    flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
    // 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
    flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
    // If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
    flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
    // 1, causes "j2se" to be three tokens; "j" "2" "se"
    flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
    // If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
    flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
    // If not null is the set of tokens to protect from being delimited
    Set<?> protectedWords = Analysis.getWordSet(env, settings, "protected_words");
    this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
    this.flags = flags;
}
 
Example #29
Source File: HdfsRepository.java    From crate with Apache License 2.0 5 votes vote down vote up
public HdfsRepository(RepositoryMetaData metadata, Environment environment,
                      NamedXContentRegistry namedXContentRegistry, ThreadPool threadPool) {
    super(metadata, environment.settings(), namedXContentRegistry, threadPool, BlobPath.cleanPath());

    this.environment = environment;
    this.chunkSize = metadata.settings().getAsBytesSize("chunk_size", null);

    String uriSetting = getMetadata().settings().get("uri");
    if (Strings.hasText(uriSetting) == false) {
        throw new IllegalArgumentException("No 'uri' defined for hdfs snapshot/restore");
    }
    uri = URI.create(uriSetting);
    if ("hdfs".equalsIgnoreCase(uri.getScheme()) == false) {
        throw new IllegalArgumentException(String.format(Locale.ROOT,
            "Invalid scheme [%s] specified in uri [%s]; only 'hdfs' uri allowed for hdfs snapshot/restore",
            uri.getScheme(), uriSetting));
    }
    if (Strings.hasLength(uri.getPath()) && uri.getPath().equals("/") == false) {
        throw new IllegalArgumentException(String.format(Locale.ROOT,
            "Use 'path' option to specify a path [%s], not the uri [%s] for hdfs snapshot/restore", uri.getPath(), uriSetting));
    }

    pathSetting = getMetadata().settings().get("path");
    // get configuration
    if (pathSetting == null) {
        throw new IllegalArgumentException("No 'path' defined for hdfs snapshot/restore");
    }
}
 
Example #30
Source File: IrishAnalyzerProvider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public IrishAnalyzerProvider(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    analyzer = new IrishAnalyzer(Analysis.parseStopWords(env, settings, IrishAnalyzer.getDefaultStopSet()),
                                 Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET));
    analyzer.setVersion(version);
}