Java Code Examples for org.elasticsearch.common.settings.Settings#getAsArray()

The following examples show how to use org.elasticsearch.common.settings.Settings#getAsArray() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: URLTokenizerFactory.java    From elasticsearch-analysis-url with Apache License 2.0 6 votes vote down vote up
public URLTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);

    String[] parts = settings.getAsArray("part");
    if (parts != null && parts.length > 0) {
        this.parts = Arrays.stream(parts)
                .map(URLPart::fromString)
                .collect(Collectors.toList());
    }
    this.urlDecode = settings.getAsBoolean("url_decode", false);
    this.tokenizeHost = settings.getAsBoolean("tokenize_host", true);
    this.tokenizePath = settings.getAsBoolean("tokenize_path", true);
    this.tokenizeQuery = settings.getAsBoolean("tokenize_query", true);
    this.allowMalformed = settings.getAsBoolean("allow_malformed", false);
    this.tokenizeMalformed = settings.getAsBoolean("tokenize_malformed", false);
}
 
Example 2
Source File: OpenshiftRequestContextFactory.java    From openshift-elasticsearch-plugin with Apache License 2.0 6 votes vote down vote up
public OpenshiftRequestContextFactory(
        final Settings settings,
        final RequestUtils utils,
        final OpenshiftAPIService apiService,
        final ThreadContext threadContext){
    this.threadContext = threadContext;
    this.apiService = apiService;
    this.utils = utils;
    this.operationsProjects = settings.getAsArray(ConfigurationSettings.OPENSHIFT_CONFIG_OPS_PROJECTS,
            ConfigurationSettings.DEFAULT_OPENSHIFT_OPS_PROJECTS);
    this.kibanaPrefix = settings.get(ConfigurationSettings.KIBANA_CONFIG_INDEX_NAME,
            ConfigurationSettings.DEFAULT_USER_PROFILE_PREFIX);
    this.kibanaIndexMode = settings.get(ConfigurationSettings.OPENSHIFT_KIBANA_INDEX_MODE, UNIQUE);
    if (!ArrayUtils.contains(new String[] { UNIQUE, SHARED_OPS, SHARED_NON_OPS }, kibanaIndexMode.toLowerCase())) {
        this.kibanaIndexMode = UNIQUE;
    }
    LOGGER.info("Using kibanaIndexMode: '{}'", this.kibanaIndexMode);
    
    contextCache = CacheBuilder.newBuilder()
            .maximumSize(settings.getAsInt(ConfigurationSettings.OPENSHIFT_CONTEXT_CACHE_MAXSIZE, 
                    ConfigurationSettings.DEFAULT_OPENSHIFT_CONTEXT_CACHE_MAXSIZE))
            .expireAfterWrite(settings.getAsLong(ConfigurationSettings.OPENSHIFT_CONTEXT_CACHE_EXPIRE_SECONDS, 
                    ConfigurationSettings.DEFAULT_OPENSHIFT_CONTEXT_CACHE_EXPIRE_SECONDS), TimeUnit.SECONDS)
            .removalListener(this)
            .build(this);
}
 
Example 3
Source File: AnalysisSetting.java    From elasticsearch-analysis-lc-pinyin with Artistic License 2.0 6 votes vote down vote up
public static int parseIndexAnalysisSettings(Settings settings) {
    int settingCode = 0;
    if(settings != null) {
        String[] defaultSetting = new String[]{"chinese_char", "first_letter", "full_pinyin"};
        String[] indexAnalysisSetting = settings.getAsArray(analysisMode, defaultSetting);
        for (String settingItem : indexAnalysisSetting) {
            if ("chinese_char".equalsIgnoreCase(settingItem)) {
                settingCode |= IndexAnalysisSetting.chinese_char;
            }
            if ("first_letter".equalsIgnoreCase(settingItem)) {
                settingCode |= IndexAnalysisSetting.first_letter;
            }
            if ("full_pinyin".equalsIgnoreCase(settingItem)) {
                settingCode |= IndexAnalysisSetting.full_pinyin;
            }
        }
    }
    if (settingCode == 0) {
        settingCode = IndexAnalysisSetting.chinese_char | IndexAnalysisSetting.first_letter | IndexAnalysisSetting.full_pinyin;
    }
    return settingCode;
}
 
Example 4
Source File: CJKBigramFilterFactory.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Inject
public CJKBigramFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    outputUnigrams = settings.getAsBoolean("output_unigrams", false);
    final String[] asArray = settings.getAsArray("ignored_scripts");
    Set<String> scripts = new HashSet<>(Arrays.asList("han", "hiragana", "katakana", "hangul"));
    if (asArray != null) {
        scripts.removeAll(Arrays.asList(asArray));
    }
    int flags = 0;
    for (String script : scripts) {
        if ("han".equals(script)) {
            flags |= CJKBigramFilter.HAN;
        } else if ("hiragana".equals(script)) {
            flags |= CJKBigramFilter.HIRAGANA;
        } else if ("katakana".equals(script)) {
            flags |= CJKBigramFilter.KATAKANA;
        } else if ("hangul".equals(script)) {
            flags |= CJKBigramFilter.HANGUL;
        }
    }
    this.flags = flags;
}
 
Example 5
Source File: KeepWordFilterFactory.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Inject
public KeepWordFilterFactory(Index index, IndexSettingsService indexSettingsService,
                             Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    final String[] arrayKeepWords = settings.getAsArray(KEEP_WORDS_KEY, null);
    final String keepWordsPath = settings.get(KEEP_WORDS_PATH_KEY, null);
    if ((arrayKeepWords == null && keepWordsPath == null) || (arrayKeepWords != null && keepWordsPath != null)) {
        // we don't allow both or none
        throw new IllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
                + KEEP_WORDS_PATH_KEY + "` to be configured");
    }
    if (version.onOrAfter(Version.LUCENE_4_4) && settings.get(ENABLE_POS_INC_KEY) != null) {
        throw new IllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain or use"
                + " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
    }
    enablePositionIncrements = version.onOrAfter(Version.LUCENE_4_4) ? true : settings.getAsBoolean(ENABLE_POS_INC_KEY, true);

    this.keepWords = Analysis.getWordSet(env, settings, KEEP_WORDS_KEY);

}
 
Example 6
Source File: AwarenessAllocationDecider.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Inject
public AwarenessAllocationDecider(Settings settings, NodeSettingsService nodeSettingsService) {
    super(settings);
    this.awarenessAttributes = settings.getAsArray(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES);

    forcedAwarenessAttributes = Maps.newHashMap();
    Map<String, Settings> forceGroups = settings.getGroups(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP);
    for (Map.Entry<String, Settings> entry : forceGroups.entrySet()) {
        String[] aValues = entry.getValue().getAsArray("values");
        if (aValues.length > 0) {
            forcedAwarenessAttributes.put(entry.getKey(), aValues);
        }
    }

    nodeSettingsService.addListener(new ApplySettings());
}
 
Example 7
Source File: Analysis.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion) {
    String value = settings.get("stem_exclusion");
    if (value != null) {
        if ("_none_".equals(value)) {
            return CharArraySet.EMPTY_SET;
        } else {
            // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
            return new CharArraySet(Strings.commaDelimitedListToSet(value), false);
        }
    }
    String[] stemExclusion = settings.getAsArray("stem_exclusion", null);
    if (stemExclusion != null) {
        // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
        return new CharArraySet(Arrays.asList(stemExclusion), false);
    } else {
        return defaultStemExclusion;
    }
}
 
Example 8
Source File: Analysis.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
/**
 * Fetches a list of words from the specified settings file. The list should either be available at the key
 * specified by settingsPrefix or in a file specified by settingsPrefix + _path.
 *
 * @throws IllegalArgumentException
 *          If the word list cannot be found at either key.
 */
public static List<String> getWordList(Environment env, Settings settings, String settingPrefix) {
    String wordListPath = settings.get(settingPrefix + "_path", null);

    if (wordListPath == null) {
        String[] explicitWordList = settings.getAsArray(settingPrefix, null);
        if (explicitWordList == null) {
            return null;
        } else {
            return Arrays.asList(explicitWordList);
        }
    }

    final Path wordListFile = env.configFile().resolve(wordListPath);

    try (BufferedReader reader = FileSystemUtils.newBufferedReader(wordListFile.toUri().toURL(), Charsets.UTF_8)) {
        return loadWordList(reader, "#");
    } catch (IOException ioe) {
        String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, ioe.getMessage());
        throw new IllegalArgumentException(message);
    }
}
 
Example 9
Source File: TransportService.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public void onRefreshSettings(Settings settings) {
    String[] newTracerLogInclude = settings.getAsArray(SETTING_TRACE_LOG_INCLUDE,
            TransportService.this.settings.getAsArray(SETTING_TRACE_LOG_INCLUDE, DEFAULT_TRACE_LOG_INCLUDE, true), true);
    String[] newTracerLogExclude = settings.getAsArray(SETTING_TRACE_LOG_EXCLUDE,
            TransportService.this.settings.getAsArray(SETTING_TRACE_LOG_EXCLUDE, DEFAULT_TRACE_LOG_EXCLUDE, true), true);
    if (newTracerLogInclude == TransportService.this.tracerLogInclude && newTracerLogExclude == TransportService.this.tracelLogExclude) {
        return;
    }
    if (Arrays.equals(newTracerLogInclude, TransportService.this.tracerLogInclude) &&
            Arrays.equals(newTracerLogExclude, TransportService.this.tracelLogExclude)) {
        return;
    }
    TransportService.this.tracerLogInclude = newTracerLogInclude;
    TransportService.this.tracelLogExclude = newTracerLogExclude;
    logger.info("tracer log updated to use include: {}, exclude: {}", newTracerLogInclude, newTracerLogExclude);
}
 
Example 10
Source File: HtmlStripCharFilterFactory.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public HtmlStripCharFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name);
    String[] escapedTags = settings.getAsArray("escaped_tags");
    if (escapedTags.length > 0) {
        this.escapedTags = ImmutableSet.copyOf(escapedTags);
    } else {
        this.escapedTags = null;
    }
}
 
Example 11
Source File: AwarenessAllocationDecider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public void onRefreshSettings(Settings settings) {
    String[] awarenessAttributes = settings.getAsArray(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES,
            AwarenessAllocationDecider.this.settings.getAsArray(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES));
    if ("".equals(settings.get(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES, null))) {
        awarenessAttributes = Strings.EMPTY_ARRAY; // the empty string resets this
    }
    if (awarenessAttributes != null && !Arrays.equals(AwarenessAllocationDecider.this.awarenessAttributes, awarenessAttributes)) {
        logger.info("updating [cluster.routing.allocation.awareness.attributes] from [{}] to [{}]", AwarenessAllocationDecider.this.awarenessAttributes, awarenessAttributes);
        AwarenessAllocationDecider.this.awarenessAttributes = awarenessAttributes;
    }
    Map<String, String[]> forcedAwarenessAttributes = new HashMap<>();
    Map<String, Settings> forceGroups = settings.getGroups(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP);
    if (forceGroups.isEmpty()) {
        // check initial values (from config file)
        forceGroups = AwarenessAllocationDecider.this.settings.getGroups(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP);
    }
    if (!forceGroups.isEmpty()) {
        for (Map.Entry<String, Settings> entry : forceGroups.entrySet()) {
            String[] aValues = entry.getValue().getAsArray("values");
            if (aValues.length > 0) {
                forcedAwarenessAttributes.put(entry.getKey(), aValues);
            }
        }
    }
    AwarenessAllocationDecider.this.forcedAwarenessAttributes = forcedAwarenessAttributes;
}
 
Example 12
Source File: TribeService.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public TribeService(Settings settings, ClusterService clusterService, DiscoveryService discoveryService) {
    super(settings);
    this.clusterService = clusterService;
    Map<String, Settings> nodesSettings = Maps.newHashMap(settings.getGroups("tribe", true));
    nodesSettings.remove("blocks"); // remove prefix settings that don't indicate a client
    nodesSettings.remove("on_conflict"); // remove prefix settings that don't indicate a client
    for (Map.Entry<String, Settings> entry : nodesSettings.entrySet()) {
        Settings clientSettings = buildClientSettings(entry.getKey(), settings, entry.getValue());
        nodes.add(new TribeClientNode(clientSettings));
    }

    String[] blockIndicesWrite = Strings.EMPTY_ARRAY;
    String[] blockIndicesRead = Strings.EMPTY_ARRAY;
    String[] blockIndicesMetadata = Strings.EMPTY_ARRAY;
    if (!nodes.isEmpty()) {
        // remove the initial election / recovery blocks since we are not going to have a
        // master elected in this single tribe  node local "cluster"
        clusterService.removeInitialStateBlock(discoveryService.getNoMasterBlock());
        clusterService.removeInitialStateBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK);
        if (settings.getAsBoolean("tribe.blocks.write", false)) {
            clusterService.addInitialStateBlock(TRIBE_WRITE_BLOCK);
        }
        blockIndicesWrite = settings.getAsArray("tribe.blocks.write.indices", Strings.EMPTY_ARRAY);
        if (settings.getAsBoolean("tribe.blocks.metadata", false)) {
            clusterService.addInitialStateBlock(TRIBE_METADATA_BLOCK);
        }
        blockIndicesMetadata = settings.getAsArray("tribe.blocks.metadata.indices", Strings.EMPTY_ARRAY);
        blockIndicesRead = settings.getAsArray("tribe.blocks.read.indices", Strings.EMPTY_ARRAY);
        for (Node node : nodes) {
            node.injector().getInstance(ClusterService.class).add(new TribeClusterStateListener(node));
        }
    }
    this.blockIndicesMetadata = blockIndicesMetadata;
    this.blockIndicesRead = blockIndicesRead;
    this.blockIndicesWrite = blockIndicesWrite;

    this.onConflict = settings.get("tribe.on_conflict", ON_CONFLICT_ANY);
}
 
Example 13
Source File: NettyTransport.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private BoundTransportAddress createBoundTransportAddress(String name, Settings profileSettings, List<InetSocketAddress> boundAddresses) {
    String[] boundAddressesHostStrings = new String[boundAddresses.size()];
    TransportAddress[] transportBoundAddresses = new TransportAddress[boundAddresses.size()];
    for (int i = 0; i < boundAddresses.size(); i++) {
        InetSocketAddress boundAddress = boundAddresses.get(i);
        boundAddressesHostStrings[i] = boundAddress.getHostString();
        transportBoundAddresses[i] = new InetSocketTransportAddress(boundAddress);
    }

    final String[] publishHosts;
    if (DEFAULT_PROFILE.equals(name)) {
        publishHosts = settings.getAsArray("transport.netty.publish_host", settings.getAsArray("transport.publish_host", settings.getAsArray("transport.host", null)));
    } else {
        publishHosts = profileSettings.getAsArray("publish_host", boundAddressesHostStrings);
    }

    final InetAddress publishInetAddress;
    try {
        publishInetAddress = networkService.resolvePublishHostAddresses(publishHosts);
    } catch (Exception e) {
        throw new BindTransportException("Failed to resolve publish address", e);
    }

    final int publishPort = resolvePublishPort(name, settings, profileSettings, boundAddresses, publishInetAddress);
    final TransportAddress publishAddress = new InetSocketTransportAddress(new InetSocketAddress(publishInetAddress, publishPort));
    return new BoundTransportAddress(transportBoundAddresses, publishAddress);
}
 
Example 14
Source File: TransportService.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public TransportService(Settings settings, Transport transport, ThreadPool threadPool) {
    super(settings);
    this.transport = transport;
    this.threadPool = threadPool;
    this.tracerLogInclude = settings.getAsArray(SETTING_TRACE_LOG_INCLUDE, DEFAULT_TRACE_LOG_INCLUDE, true);
    this.tracelLogExclude = settings.getAsArray(SETTING_TRACE_LOG_EXCLUDE, DEFAULT_TRACE_LOG_EXCLUDE, true);
    tracerLog = Loggers.getLogger(logger, ".tracer");
    adapter = createAdapter();
    taskManager = createTaskManager();
}
 
Example 15
Source File: KeepTypesFilterFactory.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public KeepTypesFilterFactory(Index index, IndexSettingsService indexSettingsService,
                             Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    final String[] arrayKeepTypes = settings.getAsArray(KEEP_TYPES_KEY, null);
    if ((arrayKeepTypes == null)) {
        throw new IllegalArgumentException("keep_types requires `" + KEEP_TYPES_KEY + "` to be configured");
    }

    this.keepTypes = new HashSet<>(Arrays.asList(arrayKeepTypes));
}
 
Example 16
Source File: PatternCaptureGroupTokenFilterFactory.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public PatternCaptureGroupTokenFilterFactory(Index index, Settings indexSettings, String name, Settings settings) {
    super(index, indexSettings, name, settings);
    String[] regexes = settings.getAsArray(PATTERNS_KEY, null, false);
    if (regexes == null) {
        throw new IllegalArgumentException("required setting '" + PATTERNS_KEY + "' is missing for token filter [" + name + "]");
    }
    patterns = new Pattern[regexes.length];
    for (int i = 0; i < regexes.length; i++) {
        patterns[i] = Pattern.compile(regexes[i]);
    }

    preserveOriginal = settings.getAsBoolean(PRESERVE_ORIG_KEY, true);
}
 
Example 17
Source File: NettyHttpServerTransport.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private CorsConfig buildCorsConfig(Settings settings) {
    if (settings.getAsBoolean(SETTING_CORS_ENABLED, false) == false) {
        return CorsConfigBuilder.forOrigins().disable().build();
    }
    String origin = settings.get(SETTING_CORS_ALLOW_ORIGIN);
    final CorsConfigBuilder builder;
    if (Strings.isNullOrEmpty(origin)) {
        builder = CorsConfigBuilder.forOrigins();
    } else if (origin.equals(ANY_ORIGIN)) {
        builder = CorsConfigBuilder.forAnyOrigin();
    } else {
        Pattern p = RestUtils.checkCorsSettingForRegex(origin);
        if (p == null) {
            builder = CorsConfigBuilder.forOrigins(RestUtils.corsSettingAsArray(origin));
        } else {
            builder = CorsConfigBuilder.forPattern(p);
        }
    }
    if (settings.getAsBoolean(SETTING_CORS_ALLOW_CREDENTIALS, false)) {
        builder.allowCredentials();
    }
    String[] strMethods = settings.getAsArray(SETTING_CORS_ALLOW_METHODS, DEFAULT_CORS_METHODS);
    HttpMethod[] methods = new HttpMethod[strMethods.length];
    for (int i = 0; i < methods.length; i++) {
        methods[i] = HttpMethod.valueOf(strMethods[i]);
    }
    return builder.allowedRequestMethods(methods)
                  .maxAge(settings.getAsInt(SETTING_CORS_MAX_AGE, DEFAULT_CORS_MAX_AGE))
                  .allowedRequestHeaders(settings.getAsArray(SETTING_CORS_ALLOW_HEADERS, DEFAULT_CORS_HEADERS))
                  .shortCircuit()
                  .build();
}
 
Example 18
Source File: HanLpIndicesAnalysis.java    From elasticsearch-analysis-hanlp with Apache License 2.0 4 votes vote down vote up
private void initSettings(Settings settings) {
    // get tokenizer settings
    analyzerIndexMode = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + INDEX_MODE, analyzerIndexMode);
    analyzerNameRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + NAME_RECOGNIZE, analyzerNameRecognize);
    analyzerTranslatedNameRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + TRANSLATED_NAME_RECOGNIZE, analyzerTranslatedNameRecognize);
    analyzerJapaneseNameRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + JAPANESE_NAME_RECOGNIZE, analyzerJapaneseNameRecognize);
    analyzerPlaceRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + PLACE_RECOGNIZE, analyzerPlaceRecognize);
    analyzerOrganizationRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + ORGANIZATION_RECOGNIZE, analyzerOrganizationRecognize);
    analyzerUseCustomDictionary = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + USE_CUSTOM_DICTIONARY, analyzerUseCustomDictionary);
    analyzerSpeechTagging = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + SPEECH_TAGGING, analyzerSpeechTagging);
    analyzerOffset = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + OFFSET, analyzerOffset);
    analyzerNumberQuantifierRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + NUMBER_QUANTIFIER_RECOGNIZE, analyzerNumberQuantifierRecognize);
    analyzerThreads = settings.getAsInt(ANALYZER_CONFIG_PREFIX + THREADS, analyzerThreads);

    // get tokenizer settings
    tokenizerIndexMode = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + INDEX_MODE, tokenizerIndexMode);
    tokenizerNameRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + NAME_RECOGNIZE, tokenizerNameRecognize);
    tokenizerTranslatedNameRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + TRANSLATED_NAME_RECOGNIZE, tokenizerTranslatedNameRecognize);
    tokenizerJapaneseNameRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + JAPANESE_NAME_RECOGNIZE, tokenizerJapaneseNameRecognize);
    tokenizerPlaceRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + PLACE_RECOGNIZE, tokenizerPlaceRecognize);
    tokenizerOrganizationRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + ORGANIZATION_RECOGNIZE, tokenizerOrganizationRecognize);
    tokenizerUseCustomDictionary = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + USE_CUSTOM_DICTIONARY, tokenizerUseCustomDictionary);
    tokenizerSpeechTagging = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + SPEECH_TAGGING, tokenizerSpeechTagging);
    tokenizerOffset = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + OFFSET, tokenizerOffset);
    tokenizerNumberQuantifierRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + NUMBER_QUANTIFIER_RECOGNIZE, tokenizerNumberQuantifierRecognize);
    tokenizerThreads = settings.getAsInt(TOKENIZER_CONFIG_PREFIX + THREADS, tokenizerThreads);

    // fix threads
    if (analyzerThreads < 1) {
        analyzerThreads = 1;
    }
    if (tokenizerThreads < 1) {
        tokenizerThreads = 1;
    }

    // get global HanLP settings
    HanLP.Config.CoreDictionaryPath =
        settings.get(CORE_DICTIONARY_PATH, HanLP.Config.CoreDictionaryPath);
    HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath =
        settings.get(CORE_DICTIONARY_TRANSFORM_MATRIX_DICTIONARY_PATH, HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath);
    HanLP.Config.BiGramDictionaryPath =
        settings.get(BI_GRAM_DICTIONARY_PATH, HanLP.Config.BiGramDictionaryPath);
    HanLP.Config.CoreStopWordDictionaryPath =
        settings.get(CORE_STOP_WORD_DICTIONARY_PATH, HanLP.Config.CoreStopWordDictionaryPath);
    HanLP.Config.CoreSynonymDictionaryDictionaryPath =
        settings.get(CORE_SYNONYM_DICTIONARY_DICTIONARY_PATH, HanLP.Config.CoreSynonymDictionaryDictionaryPath);
    HanLP.Config.PersonDictionaryPath =
        settings.get(PERSON_DICTIONARY_PATH, HanLP.Config.PersonDictionaryPath);
    HanLP.Config.PersonDictionaryTrPath =
        settings.get(PERSON_DICTIONARY_TR_PATH, HanLP.Config.PersonDictionaryTrPath);
    HanLP.Config.CustomDictionaryPath =
        settings.getAsArray(CUSTOM_DICTIONARY_PATH, HanLP.Config.CustomDictionaryPath);
    HanLP.Config.TraditionalChineseDictionaryPath =
        settings.get(TRADITIONAL_CHINESE_DICTIONARY_PATH, HanLP.Config.TraditionalChineseDictionaryPath);
    HanLP.Config.SYTDictionaryPath =
        settings.get(SYT_DICTIONARY_PATH, HanLP.Config.SYTDictionaryPath);
    HanLP.Config.PinyinDictionaryPath =
        settings.get(PINYIN_DICTIONARY_PATH, HanLP.Config.PinyinDictionaryPath);
    HanLP.Config.TranslatedPersonDictionaryPath =
        settings.get(TRANSLATED_PERSON_DICTIONARY_PATH, HanLP.Config.TranslatedPersonDictionaryPath);
    HanLP.Config.JapanesePersonDictionaryPath =
        settings.get(JAPANESE_PERSON_DICTIONARY_PATH, HanLP.Config.JapanesePersonDictionaryPath);
    HanLP.Config.PlaceDictionaryPath =
        settings.get(PLACE_DICTIONARY_PATH, HanLP.Config.PlaceDictionaryPath);
    HanLP.Config.PlaceDictionaryTrPath =
        settings.get(PLACE_DICTIONARY_TR_PATH, HanLP.Config.PlaceDictionaryTrPath);
    HanLP.Config.OrganizationDictionaryPath =
        settings.get(ORGANIZATION_DICTIONARY_PATH, HanLP.Config.OrganizationDictionaryPath);
    HanLP.Config.OrganizationDictionaryTrPath =
        settings.get(ORGANIZATION_DICTIONARY_TR_PATH, HanLP.Config.OrganizationDictionaryTrPath);
    HanLP.Config.CharTypePath =
        settings.get(CHAR_TYPE_PATH, HanLP.Config.CharTypePath);
    HanLP.Config.CharTablePath =
        settings.get(CHAR_TABLE_PATH, HanLP.Config.CharTablePath);
    HanLP.Config.WordNatureModelPath =
        settings.get(WORD_NATURE_MODEL_PATH, HanLP.Config.WordNatureModelPath);
    HanLP.Config.MaxEntModelPath =
        settings.get(MAX_ENT_MODEL_PATH, HanLP.Config.MaxEntModelPath);
    HanLP.Config.CRFSegmentModelPath =
        settings.get(CRF_SEGMENT_MODEL_PATH, HanLP.Config.CRFSegmentModelPath);
    HanLP.Config.CRFDependencyModelPath =
        settings.get(CRF_DEPENDENCY_MODEL_PATH, HanLP.Config.CRFDependencyModelPath);
    HanLP.Config.HMMSegmentModelPath =
        settings.get(HMM_SEGMENT_MODEL_PATH, HanLP.Config.HMMSegmentModelPath);
    HanLP.Config.ShowTermNature = settings.getAsBoolean(SHOW_TERM_NATURE, true);
    HanLP.Config.Normalization = settings.getAsBoolean(NORMALIZATION, false);
}
 
Example 19
Source File: FulltextAnalyzerResolver.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
/**
 * resolve the full settings necessary for the custom analyzer with name ``name``
 * to be included in index-settings to get applied on an index.
 *
 * Resolves all custom tokenizer, token-filter and char-filter settings and includes them
 *
 * @param name the name of the analyzer to resolve
 * @return Settings ready for inclusion into a CreateIndexRequest
 * @throws AnalyzerInvalidException if no custom analyzer with name ``name`` could be found
 */
public Settings resolveFullCustomAnalyzerSettings(String name) throws AnalyzerInvalidException {
    Settings.Builder builder = Settings.builder();
    Settings analyzerSettings = getCustomAnalyzer(name);
    if (analyzerSettings != null) {

        builder.put(analyzerSettings);

        String tokenizerName = analyzerSettings.get(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.tokenizer", name));
        if (tokenizerName != null) {
            Settings customTokenizerSettings = getCustomTokenizer(tokenizerName);
            if (customTokenizerSettings != null) {
                builder.put(customTokenizerSettings);
            } else if (!hasBuiltInTokenizer(tokenizerName)) {
                throw new AnalyzerInvalidException(String.format(Locale.ENGLISH, "Invalid Analyzer: could not resolve tokenizer '%s'", tokenizerName));
            }
        }

        String[] tokenFilterNames = analyzerSettings.getAsArray(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.filter", name));
        for (int i=0; i<tokenFilterNames.length; i++) {
            Settings customTokenFilterSettings = getCustomTokenFilter(tokenFilterNames[i]);
            if (customTokenFilterSettings != null) {
                builder.put(customTokenFilterSettings);
            } else if (!hasBuiltInTokenFilter(tokenFilterNames[i])) {
                throw new AnalyzerInvalidException(String.format(Locale.ENGLISH, "Invalid Analyzer: could not resolve token-filter '%s'", tokenFilterNames[i]));
            }
        }

        String[] charFilterNames = analyzerSettings.getAsArray(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.char_filter", name));
        for (int i=0; i<charFilterNames.length; i++) {
            Settings customCharFilterSettings = getCustomCharFilter(charFilterNames[i]);
            if (customCharFilterSettings != null) {
                builder.put(customCharFilterSettings);
            } else if (!hasBuiltInCharFilter(charFilterNames[i])) {
                throw new AnalyzerInvalidException(String.format(Locale.ENGLISH, "Invalid Analyzer: could not resolve char-filter '%s'", charFilterNames[i]));
            }
        }
    } else {
        throw new AnalyzerUnknownException(name);
    }
    return builder.build();
}
 
Example 20
Source File: Environment.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public Environment(Settings settings) {
    this.settings = settings;
    final Path homeFile;
    if (settings.get("path.home") != null) {
        homeFile = PathUtils.get(cleanPath(settings.get("path.home")));
    } else {
        throw new IllegalStateException("path.home is not configured");
    }

    if (settings.get("path.conf") != null) {
        configFile = PathUtils.get(cleanPath(settings.get("path.conf")));
    } else {
        configFile = homeFile.resolve("config");
    }

    if (settings.get("path.scripts") != null) {
        scriptsFile = PathUtils.get(cleanPath(settings.get("path.scripts")));
    } else {
        scriptsFile = configFile.resolve("scripts");
    }

    if (settings.get("path.plugins") != null) {
        pluginsFile = PathUtils.get(cleanPath(settings.get("path.plugins")));
    } else {
        pluginsFile = homeFile.resolve("plugins");
    }

    String[] dataPaths = settings.getAsArray("path.data");
    if (dataPaths.length > 0) {
        dataFiles = new Path[dataPaths.length];
        dataWithClusterFiles = new Path[dataPaths.length];
        dataPathLimits = new String[dataPaths.length];
        for (int i = 0; i < dataPaths.length; i++) {
            String[] dataPathConfig = dataPaths[i].split("#");
            dataFiles[i] = PathUtils.get(dataPathConfig[0]);
            dataWithClusterFiles[i] = dataFiles[i].resolve(ClusterName.clusterNameFromSettings(settings).value());
            if (dataPathConfig.length > 1) {
                dataPathLimits[i] = dataPathConfig[1];
            } else {
                dataPathLimits[i] = "-1";
            }
        }
    } else {
        dataFiles = new Path[]{homeFile.resolve("data")};
        dataWithClusterFiles = new Path[]{homeFile.resolve("data").resolve(ClusterName.clusterNameFromSettings(settings).value())};
        dataPathLimits = new String[]{"-1"};
    }
    if (settings.get("path.shared_data") != null) {
        sharedDataFile = PathUtils.get(cleanPath(settings.get("path.shared_data")));
    } else {
        sharedDataFile = null;
    }
    String[] repoPaths = settings.getAsArray("path.repo");
    if (repoPaths.length > 0) {
        repoFiles = new Path[repoPaths.length];
        for (int i = 0; i < repoPaths.length; i++) {
            repoFiles[i] = PathUtils.get(repoPaths[i]);
        }
    } else {
        repoFiles = new Path[0];
    }
    if (settings.get("path.logs") != null) {
        logsFile = PathUtils.get(cleanPath(settings.get("path.logs")));
    } else {
        logsFile = homeFile.resolve("logs");
    }

    if (settings.get("pidfile") != null) {
        pidFile = PathUtils.get(cleanPath(settings.get("pidfile")));
    } else {
        pidFile = null;
    }

    binFile = homeFile.resolve("bin");
    libFile = homeFile.resolve("lib");
    modulesFile = homeFile.resolve("modules");
}