Java Code Examples for org.elasticsearch.common.settings.Settings#getAsArray()

The following examples show how to use org.elasticsearch.common.settings.Settings#getAsArray() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: URLTokenizerFactory.java From elasticsearch-analysis-url with Apache License 2.0

6 votes

public URLTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
    super(indexSettings, name, settings);

    String[] parts = settings.getAsArray("part");
    if (parts != null && parts.length > 0) {
        this.parts = Arrays.stream(parts)
                .map(URLPart::fromString)
                .collect(Collectors.toList());
    }
    this.urlDecode = settings.getAsBoolean("url_decode", false);
    this.tokenizeHost = settings.getAsBoolean("tokenize_host", true);
    this.tokenizePath = settings.getAsBoolean("tokenize_path", true);
    this.tokenizeQuery = settings.getAsBoolean("tokenize_query", true);
    this.allowMalformed = settings.getAsBoolean("allow_malformed", false);
    this.tokenizeMalformed = settings.getAsBoolean("tokenize_malformed", false);
}

Example 2

Source File: OpenshiftRequestContextFactory.java From openshift-elasticsearch-plugin with Apache License 2.0

6 votes

public OpenshiftRequestContextFactory(
        final Settings settings,
        final RequestUtils utils,
        final OpenshiftAPIService apiService,
        final ThreadContext threadContext){
    this.threadContext = threadContext;
    this.apiService = apiService;
    this.utils = utils;
    this.operationsProjects = settings.getAsArray(ConfigurationSettings.OPENSHIFT_CONFIG_OPS_PROJECTS,
            ConfigurationSettings.DEFAULT_OPENSHIFT_OPS_PROJECTS);
    this.kibanaPrefix = settings.get(ConfigurationSettings.KIBANA_CONFIG_INDEX_NAME,
            ConfigurationSettings.DEFAULT_USER_PROFILE_PREFIX);
    this.kibanaIndexMode = settings.get(ConfigurationSettings.OPENSHIFT_KIBANA_INDEX_MODE, UNIQUE);
    if (!ArrayUtils.contains(new String[] { UNIQUE, SHARED_OPS, SHARED_NON_OPS }, kibanaIndexMode.toLowerCase())) {
        this.kibanaIndexMode = UNIQUE;
    }
    LOGGER.info("Using kibanaIndexMode: '{}'", this.kibanaIndexMode);
    
    contextCache = CacheBuilder.newBuilder()
            .maximumSize(settings.getAsInt(ConfigurationSettings.OPENSHIFT_CONTEXT_CACHE_MAXSIZE, 
                    ConfigurationSettings.DEFAULT_OPENSHIFT_CONTEXT_CACHE_MAXSIZE))
            .expireAfterWrite(settings.getAsLong(ConfigurationSettings.OPENSHIFT_CONTEXT_CACHE_EXPIRE_SECONDS, 
                    ConfigurationSettings.DEFAULT_OPENSHIFT_CONTEXT_CACHE_EXPIRE_SECONDS), TimeUnit.SECONDS)
            .removalListener(this)
            .build(this);
}

Example 3

Source File: AnalysisSetting.java From elasticsearch-analysis-lc-pinyin with Artistic License 2.0

6 votes

public static int parseIndexAnalysisSettings(Settings settings) {
    int settingCode = 0;
    if(settings != null) {
        String[] defaultSetting = new String[]{"chinese_char", "first_letter", "full_pinyin"};
        String[] indexAnalysisSetting = settings.getAsArray(analysisMode, defaultSetting);
        for (String settingItem : indexAnalysisSetting) {
            if ("chinese_char".equalsIgnoreCase(settingItem)) {
                settingCode |= IndexAnalysisSetting.chinese_char;
            }
            if ("first_letter".equalsIgnoreCase(settingItem)) {
                settingCode |= IndexAnalysisSetting.first_letter;
            }
            if ("full_pinyin".equalsIgnoreCase(settingItem)) {
                settingCode |= IndexAnalysisSetting.full_pinyin;
            }
        }
    }
    if (settingCode == 0) {
        settingCode = IndexAnalysisSetting.chinese_char | IndexAnalysisSetting.first_letter | IndexAnalysisSetting.full_pinyin;
    }
    return settingCode;
}

Example 4

Source File: CJKBigramFilterFactory.java From Elasticsearch with Apache License 2.0

6 votes

@Inject
public CJKBigramFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);
    outputUnigrams = settings.getAsBoolean("output_unigrams", false);
    final String[] asArray = settings.getAsArray("ignored_scripts");
    Set<String> scripts = new HashSet<>(Arrays.asList("han", "hiragana", "katakana", "hangul"));
    if (asArray != null) {
        scripts.removeAll(Arrays.asList(asArray));
    }
    int flags = 0;
    for (String script : scripts) {
        if ("han".equals(script)) {
            flags |= CJKBigramFilter.HAN;
        } else if ("hiragana".equals(script)) {
            flags |= CJKBigramFilter.HIRAGANA;
        } else if ("katakana".equals(script)) {
            flags |= CJKBigramFilter.KATAKANA;
        } else if ("hangul".equals(script)) {
            flags |= CJKBigramFilter.HANGUL;
        }
    }
    this.flags = flags;
}

Example 5

Source File: KeepWordFilterFactory.java From Elasticsearch with Apache License 2.0

6 votes

@Inject
public KeepWordFilterFactory(Index index, IndexSettingsService indexSettingsService,
                             Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    final String[] arrayKeepWords = settings.getAsArray(KEEP_WORDS_KEY, null);
    final String keepWordsPath = settings.get(KEEP_WORDS_PATH_KEY, null);
    if ((arrayKeepWords == null && keepWordsPath == null) || (arrayKeepWords != null && keepWordsPath != null)) {
        // we don't allow both or none
        throw new IllegalArgumentException("keep requires either `" + KEEP_WORDS_KEY + "` or `"
                + KEEP_WORDS_PATH_KEY + "` to be configured");
    }
    if (version.onOrAfter(Version.LUCENE_4_4) && settings.get(ENABLE_POS_INC_KEY) != null) {
        throw new IllegalArgumentException(ENABLE_POS_INC_KEY + " is not supported anymore. Please fix your analysis chain or use"
                + " an older compatibility version (<=4.3) but beware that it might cause highlighting bugs.");
    }
    enablePositionIncrements = version.onOrAfter(Version.LUCENE_4_4) ? true : settings.getAsBoolean(ENABLE_POS_INC_KEY, true);

    this.keepWords = Analysis.getWordSet(env, settings, KEEP_WORDS_KEY);

}

Example 6

Source File: AwarenessAllocationDecider.java From Elasticsearch with Apache License 2.0

6 votes

@Inject
public AwarenessAllocationDecider(Settings settings, NodeSettingsService nodeSettingsService) {
    super(settings);
    this.awarenessAttributes = settings.getAsArray(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES);

    forcedAwarenessAttributes = Maps.newHashMap();
    Map<String, Settings> forceGroups = settings.getGroups(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP);
    for (Map.Entry<String, Settings> entry : forceGroups.entrySet()) {
        String[] aValues = entry.getValue().getAsArray("values");
        if (aValues.length > 0) {
            forcedAwarenessAttributes.put(entry.getKey(), aValues);
        }
    }

    nodeSettingsService.addListener(new ApplySettings());
}

Example 7

Source File: Analysis.java From Elasticsearch with Apache License 2.0

6 votes

public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion) {
    String value = settings.get("stem_exclusion");
    if (value != null) {
        if ("_none_".equals(value)) {
            return CharArraySet.EMPTY_SET;
        } else {
            // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
            return new CharArraySet(Strings.commaDelimitedListToSet(value), false);
        }
    }
    String[] stemExclusion = settings.getAsArray("stem_exclusion", null);
    if (stemExclusion != null) {
        // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)?
        return new CharArraySet(Arrays.asList(stemExclusion), false);
    } else {
        return defaultStemExclusion;
    }
}

Example 8

Source File: Analysis.java From Elasticsearch with Apache License 2.0

6 votes

/**
 * Fetches a list of words from the specified settings file. The list should either be available at the key
 * specified by settingsPrefix or in a file specified by settingsPrefix + _path.
 *
 * @throws IllegalArgumentException
 *          If the word list cannot be found at either key.
 */
public static List<String> getWordList(Environment env, Settings settings, String settingPrefix) {
    String wordListPath = settings.get(settingPrefix + "_path", null);

    if (wordListPath == null) {
        String[] explicitWordList = settings.getAsArray(settingPrefix, null);
        if (explicitWordList == null) {
            return null;
        } else {
            return Arrays.asList(explicitWordList);
        }
    }

    final Path wordListFile = env.configFile().resolve(wordListPath);

    try (BufferedReader reader = FileSystemUtils.newBufferedReader(wordListFile.toUri().toURL(), Charsets.UTF_8)) {
        return loadWordList(reader, "#");
    } catch (IOException ioe) {
        String message = String.format(Locale.ROOT, "IOException while reading %s_path: %s", settingPrefix, ioe.getMessage());
        throw new IllegalArgumentException(message);
    }
}

Example 9

Source File: TransportService.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public void onRefreshSettings(Settings settings) {
    String[] newTracerLogInclude = settings.getAsArray(SETTING_TRACE_LOG_INCLUDE,
            TransportService.this.settings.getAsArray(SETTING_TRACE_LOG_INCLUDE, DEFAULT_TRACE_LOG_INCLUDE, true), true);
    String[] newTracerLogExclude = settings.getAsArray(SETTING_TRACE_LOG_EXCLUDE,
            TransportService.this.settings.getAsArray(SETTING_TRACE_LOG_EXCLUDE, DEFAULT_TRACE_LOG_EXCLUDE, true), true);
    if (newTracerLogInclude == TransportService.this.tracerLogInclude && newTracerLogExclude == TransportService.this.tracelLogExclude) {
        return;
    }
    if (Arrays.equals(newTracerLogInclude, TransportService.this.tracerLogInclude) &&
            Arrays.equals(newTracerLogExclude, TransportService.this.tracelLogExclude)) {
        return;
    }
    TransportService.this.tracerLogInclude = newTracerLogInclude;
    TransportService.this.tracelLogExclude = newTracerLogExclude;
    logger.info("tracer log updated to use include: {}, exclude: {}", newTracerLogInclude, newTracerLogExclude);
}

Example 10

Source File: HtmlStripCharFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public HtmlStripCharFilterFactory(Index index, IndexSettingsService indexSettingsService, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name);
    String[] escapedTags = settings.getAsArray("escaped_tags");
    if (escapedTags.length > 0) {
        this.escapedTags = ImmutableSet.copyOf(escapedTags);
    } else {
        this.escapedTags = null;
    }
}

Example 11

Source File: AwarenessAllocationDecider.java From Elasticsearch with Apache License 2.0

5 votes

@Override
public void onRefreshSettings(Settings settings) {
    String[] awarenessAttributes = settings.getAsArray(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES,
            AwarenessAllocationDecider.this.settings.getAsArray(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES));
    if ("".equals(settings.get(CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTES, null))) {
        awarenessAttributes = Strings.EMPTY_ARRAY; // the empty string resets this
    }
    if (awarenessAttributes != null && !Arrays.equals(AwarenessAllocationDecider.this.awarenessAttributes, awarenessAttributes)) {
        logger.info("updating [cluster.routing.allocation.awareness.attributes] from [{}] to [{}]", AwarenessAllocationDecider.this.awarenessAttributes, awarenessAttributes);
        AwarenessAllocationDecider.this.awarenessAttributes = awarenessAttributes;
    }
    Map<String, String[]> forcedAwarenessAttributes = new HashMap<>();
    Map<String, Settings> forceGroups = settings.getGroups(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP);
    if (forceGroups.isEmpty()) {
        // check initial values (from config file)
        forceGroups = AwarenessAllocationDecider.this.settings.getGroups(CLUSTER_ROUTING_ALLOCATION_AWARENESS_FORCE_GROUP);
    }
    if (!forceGroups.isEmpty()) {
        for (Map.Entry<String, Settings> entry : forceGroups.entrySet()) {
            String[] aValues = entry.getValue().getAsArray("values");
            if (aValues.length > 0) {
                forcedAwarenessAttributes.put(entry.getKey(), aValues);
            }
        }
    }
    AwarenessAllocationDecider.this.forcedAwarenessAttributes = forcedAwarenessAttributes;
}

Example 12

Source File: TribeService.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public TribeService(Settings settings, ClusterService clusterService, DiscoveryService discoveryService) {
    super(settings);
    this.clusterService = clusterService;
    Map<String, Settings> nodesSettings = Maps.newHashMap(settings.getGroups("tribe", true));
    nodesSettings.remove("blocks"); // remove prefix settings that don't indicate a client
    nodesSettings.remove("on_conflict"); // remove prefix settings that don't indicate a client
    for (Map.Entry<String, Settings> entry : nodesSettings.entrySet()) {
        Settings clientSettings = buildClientSettings(entry.getKey(), settings, entry.getValue());
        nodes.add(new TribeClientNode(clientSettings));
    }

    String[] blockIndicesWrite = Strings.EMPTY_ARRAY;
    String[] blockIndicesRead = Strings.EMPTY_ARRAY;
    String[] blockIndicesMetadata = Strings.EMPTY_ARRAY;
    if (!nodes.isEmpty()) {
        // remove the initial election / recovery blocks since we are not going to have a
        // master elected in this single tribe  node local "cluster"
        clusterService.removeInitialStateBlock(discoveryService.getNoMasterBlock());
        clusterService.removeInitialStateBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK);
        if (settings.getAsBoolean("tribe.blocks.write", false)) {
            clusterService.addInitialStateBlock(TRIBE_WRITE_BLOCK);
        }
        blockIndicesWrite = settings.getAsArray("tribe.blocks.write.indices", Strings.EMPTY_ARRAY);
        if (settings.getAsBoolean("tribe.blocks.metadata", false)) {
            clusterService.addInitialStateBlock(TRIBE_METADATA_BLOCK);
        }
        blockIndicesMetadata = settings.getAsArray("tribe.blocks.metadata.indices", Strings.EMPTY_ARRAY);
        blockIndicesRead = settings.getAsArray("tribe.blocks.read.indices", Strings.EMPTY_ARRAY);
        for (Node node : nodes) {
            node.injector().getInstance(ClusterService.class).add(new TribeClusterStateListener(node));
        }
    }
    this.blockIndicesMetadata = blockIndicesMetadata;
    this.blockIndicesRead = blockIndicesRead;
    this.blockIndicesWrite = blockIndicesWrite;

    this.onConflict = settings.get("tribe.on_conflict", ON_CONFLICT_ANY);
}

Example 13

Source File: NettyTransport.java From Elasticsearch with Apache License 2.0

5 votes

private BoundTransportAddress createBoundTransportAddress(String name, Settings profileSettings, List<InetSocketAddress> boundAddresses) {
    String[] boundAddressesHostStrings = new String[boundAddresses.size()];
    TransportAddress[] transportBoundAddresses = new TransportAddress[boundAddresses.size()];
    for (int i = 0; i < boundAddresses.size(); i++) {
        InetSocketAddress boundAddress = boundAddresses.get(i);
        boundAddressesHostStrings[i] = boundAddress.getHostString();
        transportBoundAddresses[i] = new InetSocketTransportAddress(boundAddress);
    }

    final String[] publishHosts;
    if (DEFAULT_PROFILE.equals(name)) {
        publishHosts = settings.getAsArray("transport.netty.publish_host", settings.getAsArray("transport.publish_host", settings.getAsArray("transport.host", null)));
    } else {
        publishHosts = profileSettings.getAsArray("publish_host", boundAddressesHostStrings);
    }

    final InetAddress publishInetAddress;
    try {
        publishInetAddress = networkService.resolvePublishHostAddresses(publishHosts);
    } catch (Exception e) {
        throw new BindTransportException("Failed to resolve publish address", e);
    }

    final int publishPort = resolvePublishPort(name, settings, profileSettings, boundAddresses, publishInetAddress);
    final TransportAddress publishAddress = new InetSocketTransportAddress(new InetSocketAddress(publishInetAddress, publishPort));
    return new BoundTransportAddress(transportBoundAddresses, publishAddress);
}

Example 14

Source File: TransportService.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public TransportService(Settings settings, Transport transport, ThreadPool threadPool) {
    super(settings);
    this.transport = transport;
    this.threadPool = threadPool;
    this.tracerLogInclude = settings.getAsArray(SETTING_TRACE_LOG_INCLUDE, DEFAULT_TRACE_LOG_INCLUDE, true);
    this.tracelLogExclude = settings.getAsArray(SETTING_TRACE_LOG_EXCLUDE, DEFAULT_TRACE_LOG_EXCLUDE, true);
    tracerLog = Loggers.getLogger(logger, ".tracer");
    adapter = createAdapter();
    taskManager = createTaskManager();
}

Example 15

Source File: KeepTypesFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public KeepTypesFilterFactory(Index index, IndexSettingsService indexSettingsService,
                             Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name, settings);

    final String[] arrayKeepTypes = settings.getAsArray(KEEP_TYPES_KEY, null);
    if ((arrayKeepTypes == null)) {
        throw new IllegalArgumentException("keep_types requires `" + KEEP_TYPES_KEY + "` to be configured");
    }

    this.keepTypes = new HashSet<>(Arrays.asList(arrayKeepTypes));
}

Example 16

Source File: PatternCaptureGroupTokenFilterFactory.java From Elasticsearch with Apache License 2.0

5 votes

public PatternCaptureGroupTokenFilterFactory(Index index, Settings indexSettings, String name, Settings settings) {
    super(index, indexSettings, name, settings);
    String[] regexes = settings.getAsArray(PATTERNS_KEY, null, false);
    if (regexes == null) {
        throw new IllegalArgumentException("required setting '" + PATTERNS_KEY + "' is missing for token filter [" + name + "]");
    }
    patterns = new Pattern[regexes.length];
    for (int i = 0; i < regexes.length; i++) {
        patterns[i] = Pattern.compile(regexes[i]);
    }

    preserveOriginal = settings.getAsBoolean(PRESERVE_ORIG_KEY, true);
}

Example 17

Source File: NettyHttpServerTransport.java From Elasticsearch with Apache License 2.0

5 votes

private CorsConfig buildCorsConfig(Settings settings) {
    if (settings.getAsBoolean(SETTING_CORS_ENABLED, false) == false) {
        return CorsConfigBuilder.forOrigins().disable().build();
    }
    String origin = settings.get(SETTING_CORS_ALLOW_ORIGIN);
    final CorsConfigBuilder builder;
    if (Strings.isNullOrEmpty(origin)) {
        builder = CorsConfigBuilder.forOrigins();
    } else if (origin.equals(ANY_ORIGIN)) {
        builder = CorsConfigBuilder.forAnyOrigin();
    } else {
        Pattern p = RestUtils.checkCorsSettingForRegex(origin);
        if (p == null) {
            builder = CorsConfigBuilder.forOrigins(RestUtils.corsSettingAsArray(origin));
        } else {
            builder = CorsConfigBuilder.forPattern(p);
        }
    }
    if (settings.getAsBoolean(SETTING_CORS_ALLOW_CREDENTIALS, false)) {
        builder.allowCredentials();
    }
    String[] strMethods = settings.getAsArray(SETTING_CORS_ALLOW_METHODS, DEFAULT_CORS_METHODS);
    HttpMethod[] methods = new HttpMethod[strMethods.length];
    for (int i = 0; i < methods.length; i++) {
        methods[i] = HttpMethod.valueOf(strMethods[i]);
    }
    return builder.allowedRequestMethods(methods)
                  .maxAge(settings.getAsInt(SETTING_CORS_MAX_AGE, DEFAULT_CORS_MAX_AGE))
                  .allowedRequestHeaders(settings.getAsArray(SETTING_CORS_ALLOW_HEADERS, DEFAULT_CORS_HEADERS))
                  .shortCircuit()
                  .build();
}

Example 18

Source File: HanLpIndicesAnalysis.java From elasticsearch-analysis-hanlp with Apache License 2.0

4 votes

private void initSettings(Settings settings) {
    // get tokenizer settings
    analyzerIndexMode = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + INDEX_MODE, analyzerIndexMode);
    analyzerNameRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + NAME_RECOGNIZE, analyzerNameRecognize);
    analyzerTranslatedNameRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + TRANSLATED_NAME_RECOGNIZE, analyzerTranslatedNameRecognize);
    analyzerJapaneseNameRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + JAPANESE_NAME_RECOGNIZE, analyzerJapaneseNameRecognize);
    analyzerPlaceRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + PLACE_RECOGNIZE, analyzerPlaceRecognize);
    analyzerOrganizationRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + ORGANIZATION_RECOGNIZE, analyzerOrganizationRecognize);
    analyzerUseCustomDictionary = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + USE_CUSTOM_DICTIONARY, analyzerUseCustomDictionary);
    analyzerSpeechTagging = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + SPEECH_TAGGING, analyzerSpeechTagging);
    analyzerOffset = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + OFFSET, analyzerOffset);
    analyzerNumberQuantifierRecognize = settings.getAsBoolean(ANALYZER_CONFIG_PREFIX + NUMBER_QUANTIFIER_RECOGNIZE, analyzerNumberQuantifierRecognize);
    analyzerThreads = settings.getAsInt(ANALYZER_CONFIG_PREFIX + THREADS, analyzerThreads);

    // get tokenizer settings
    tokenizerIndexMode = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + INDEX_MODE, tokenizerIndexMode);
    tokenizerNameRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + NAME_RECOGNIZE, tokenizerNameRecognize);
    tokenizerTranslatedNameRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + TRANSLATED_NAME_RECOGNIZE, tokenizerTranslatedNameRecognize);
    tokenizerJapaneseNameRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + JAPANESE_NAME_RECOGNIZE, tokenizerJapaneseNameRecognize);
    tokenizerPlaceRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + PLACE_RECOGNIZE, tokenizerPlaceRecognize);
    tokenizerOrganizationRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + ORGANIZATION_RECOGNIZE, tokenizerOrganizationRecognize);
    tokenizerUseCustomDictionary = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + USE_CUSTOM_DICTIONARY, tokenizerUseCustomDictionary);
    tokenizerSpeechTagging = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + SPEECH_TAGGING, tokenizerSpeechTagging);
    tokenizerOffset = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + OFFSET, tokenizerOffset);
    tokenizerNumberQuantifierRecognize = settings.getAsBoolean(TOKENIZER_CONFIG_PREFIX + NUMBER_QUANTIFIER_RECOGNIZE, tokenizerNumberQuantifierRecognize);
    tokenizerThreads = settings.getAsInt(TOKENIZER_CONFIG_PREFIX + THREADS, tokenizerThreads);

    // fix threads
    if (analyzerThreads < 1) {
        analyzerThreads = 1;
    }
    if (tokenizerThreads < 1) {
        tokenizerThreads = 1;
    }

    // get global HanLP settings
    HanLP.Config.CoreDictionaryPath =
        settings.get(CORE_DICTIONARY_PATH, HanLP.Config.CoreDictionaryPath);
    HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath =
        settings.get(CORE_DICTIONARY_TRANSFORM_MATRIX_DICTIONARY_PATH, HanLP.Config.CoreDictionaryTransformMatrixDictionaryPath);
    HanLP.Config.BiGramDictionaryPath =
        settings.get(BI_GRAM_DICTIONARY_PATH, HanLP.Config.BiGramDictionaryPath);
    HanLP.Config.CoreStopWordDictionaryPath =
        settings.get(CORE_STOP_WORD_DICTIONARY_PATH, HanLP.Config.CoreStopWordDictionaryPath);
    HanLP.Config.CoreSynonymDictionaryDictionaryPath =
        settings.get(CORE_SYNONYM_DICTIONARY_DICTIONARY_PATH, HanLP.Config.CoreSynonymDictionaryDictionaryPath);
    HanLP.Config.PersonDictionaryPath =
        settings.get(PERSON_DICTIONARY_PATH, HanLP.Config.PersonDictionaryPath);
    HanLP.Config.PersonDictionaryTrPath =
        settings.get(PERSON_DICTIONARY_TR_PATH, HanLP.Config.PersonDictionaryTrPath);
    HanLP.Config.CustomDictionaryPath =
        settings.getAsArray(CUSTOM_DICTIONARY_PATH, HanLP.Config.CustomDictionaryPath);
    HanLP.Config.TraditionalChineseDictionaryPath =
        settings.get(TRADITIONAL_CHINESE_DICTIONARY_PATH, HanLP.Config.TraditionalChineseDictionaryPath);
    HanLP.Config.SYTDictionaryPath =
        settings.get(SYT_DICTIONARY_PATH, HanLP.Config.SYTDictionaryPath);
    HanLP.Config.PinyinDictionaryPath =
        settings.get(PINYIN_DICTIONARY_PATH, HanLP.Config.PinyinDictionaryPath);
    HanLP.Config.TranslatedPersonDictionaryPath =
        settings.get(TRANSLATED_PERSON_DICTIONARY_PATH, HanLP.Config.TranslatedPersonDictionaryPath);
    HanLP.Config.JapanesePersonDictionaryPath =
        settings.get(JAPANESE_PERSON_DICTIONARY_PATH, HanLP.Config.JapanesePersonDictionaryPath);
    HanLP.Config.PlaceDictionaryPath =
        settings.get(PLACE_DICTIONARY_PATH, HanLP.Config.PlaceDictionaryPath);
    HanLP.Config.PlaceDictionaryTrPath =
        settings.get(PLACE_DICTIONARY_TR_PATH, HanLP.Config.PlaceDictionaryTrPath);
    HanLP.Config.OrganizationDictionaryPath =
        settings.get(ORGANIZATION_DICTIONARY_PATH, HanLP.Config.OrganizationDictionaryPath);
    HanLP.Config.OrganizationDictionaryTrPath =
        settings.get(ORGANIZATION_DICTIONARY_TR_PATH, HanLP.Config.OrganizationDictionaryTrPath);
    HanLP.Config.CharTypePath =
        settings.get(CHAR_TYPE_PATH, HanLP.Config.CharTypePath);
    HanLP.Config.CharTablePath =
        settings.get(CHAR_TABLE_PATH, HanLP.Config.CharTablePath);
    HanLP.Config.WordNatureModelPath =
        settings.get(WORD_NATURE_MODEL_PATH, HanLP.Config.WordNatureModelPath);
    HanLP.Config.MaxEntModelPath =
        settings.get(MAX_ENT_MODEL_PATH, HanLP.Config.MaxEntModelPath);
    HanLP.Config.CRFSegmentModelPath =
        settings.get(CRF_SEGMENT_MODEL_PATH, HanLP.Config.CRFSegmentModelPath);
    HanLP.Config.CRFDependencyModelPath =
        settings.get(CRF_DEPENDENCY_MODEL_PATH, HanLP.Config.CRFDependencyModelPath);
    HanLP.Config.HMMSegmentModelPath =
        settings.get(HMM_SEGMENT_MODEL_PATH, HanLP.Config.HMMSegmentModelPath);
    HanLP.Config.ShowTermNature = settings.getAsBoolean(SHOW_TERM_NATURE, true);
    HanLP.Config.Normalization = settings.getAsBoolean(NORMALIZATION, false);
}

Example 19

Source File: FulltextAnalyzerResolver.java From Elasticsearch with Apache License 2.0

4 votes

/**
 * resolve the full settings necessary for the custom analyzer with name ``name``
 * to be included in index-settings to get applied on an index.
 *
 * Resolves all custom tokenizer, token-filter and char-filter settings and includes them
 *
 * @param name the name of the analyzer to resolve
 * @return Settings ready for inclusion into a CreateIndexRequest
 * @throws AnalyzerInvalidException if no custom analyzer with name ``name`` could be found
 */
public Settings resolveFullCustomAnalyzerSettings(String name) throws AnalyzerInvalidException {
    Settings.Builder builder = Settings.builder();
    Settings analyzerSettings = getCustomAnalyzer(name);
    if (analyzerSettings != null) {

        builder.put(analyzerSettings);

        String tokenizerName = analyzerSettings.get(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.tokenizer", name));
        if (tokenizerName != null) {
            Settings customTokenizerSettings = getCustomTokenizer(tokenizerName);
            if (customTokenizerSettings != null) {
                builder.put(customTokenizerSettings);
            } else if (!hasBuiltInTokenizer(tokenizerName)) {
                throw new AnalyzerInvalidException(String.format(Locale.ENGLISH, "Invalid Analyzer: could not resolve tokenizer '%s'", tokenizerName));
            }
        }

        String[] tokenFilterNames = analyzerSettings.getAsArray(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.filter", name));
        for (int i=0; i<tokenFilterNames.length; i++) {
            Settings customTokenFilterSettings = getCustomTokenFilter(tokenFilterNames[i]);
            if (customTokenFilterSettings != null) {
                builder.put(customTokenFilterSettings);
            } else if (!hasBuiltInTokenFilter(tokenFilterNames[i])) {
                throw new AnalyzerInvalidException(String.format(Locale.ENGLISH, "Invalid Analyzer: could not resolve token-filter '%s'", tokenFilterNames[i]));
            }
        }

        String[] charFilterNames = analyzerSettings.getAsArray(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.char_filter", name));
        for (int i=0; i<charFilterNames.length; i++) {
            Settings customCharFilterSettings = getCustomCharFilter(charFilterNames[i]);
            if (customCharFilterSettings != null) {
                builder.put(customCharFilterSettings);
            } else if (!hasBuiltInCharFilter(charFilterNames[i])) {
                throw new AnalyzerInvalidException(String.format(Locale.ENGLISH, "Invalid Analyzer: could not resolve char-filter '%s'", charFilterNames[i]));
            }
        }
    } else {
        throw new AnalyzerUnknownException(name);
    }
    return builder.build();
}

Example 20

Source File: Environment.java From Elasticsearch with Apache License 2.0

4 votes

public Environment(Settings settings) {
    this.settings = settings;
    final Path homeFile;
    if (settings.get("path.home") != null) {
        homeFile = PathUtils.get(cleanPath(settings.get("path.home")));
    } else {
        throw new IllegalStateException("path.home is not configured");
    }

    if (settings.get("path.conf") != null) {
        configFile = PathUtils.get(cleanPath(settings.get("path.conf")));
    } else {
        configFile = homeFile.resolve("config");
    }

    if (settings.get("path.scripts") != null) {
        scriptsFile = PathUtils.get(cleanPath(settings.get("path.scripts")));
    } else {
        scriptsFile = configFile.resolve("scripts");
    }

    if (settings.get("path.plugins") != null) {
        pluginsFile = PathUtils.get(cleanPath(settings.get("path.plugins")));
    } else {
        pluginsFile = homeFile.resolve("plugins");
    }

    String[] dataPaths = settings.getAsArray("path.data");
    if (dataPaths.length > 0) {
        dataFiles = new Path[dataPaths.length];
        dataWithClusterFiles = new Path[dataPaths.length];
        dataPathLimits = new String[dataPaths.length];
        for (int i = 0; i < dataPaths.length; i++) {
            String[] dataPathConfig = dataPaths[i].split("#");
            dataFiles[i] = PathUtils.get(dataPathConfig[0]);
            dataWithClusterFiles[i] = dataFiles[i].resolve(ClusterName.clusterNameFromSettings(settings).value());
            if (dataPathConfig.length > 1) {
                dataPathLimits[i] = dataPathConfig[1];
            } else {
                dataPathLimits[i] = "-1";
            }
        }
    } else {
        dataFiles = new Path[]{homeFile.resolve("data")};
        dataWithClusterFiles = new Path[]{homeFile.resolve("data").resolve(ClusterName.clusterNameFromSettings(settings).value())};
        dataPathLimits = new String[]{"-1"};
    }
    if (settings.get("path.shared_data") != null) {
        sharedDataFile = PathUtils.get(cleanPath(settings.get("path.shared_data")));
    } else {
        sharedDataFile = null;
    }
    String[] repoPaths = settings.getAsArray("path.repo");
    if (repoPaths.length > 0) {
        repoFiles = new Path[repoPaths.length];
        for (int i = 0; i < repoPaths.length; i++) {
            repoFiles[i] = PathUtils.get(repoPaths[i]);
        }
    } else {
        repoFiles = new Path[0];
    }
    if (settings.get("path.logs") != null) {
        logsFile = PathUtils.get(cleanPath(settings.get("path.logs")));
    } else {
        logsFile = homeFile.resolve("logs");
    }

    if (settings.get("pidfile") != null) {
        pidFile = PathUtils.get(cleanPath(settings.get("pidfile")));
    } else {
        pidFile = null;
    }

    binFile = homeFile.resolve("bin");
    libFile = homeFile.resolve("lib");
    modulesFile = homeFile.resolve("modules");
}