Java Code Examples for org.apache.lucene.analysis.CharArraySet#addAll()

The following examples show how to use org.apache.lucene.analysis.CharArraySet#addAll() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File:    From Indra with MIT License 6 votes vote down vote up
private TokenStream getStopFilter(String lang, Set<String> metadataStopWords, TokenStream stream) {

        if (metadataStopWords != null && !metadataStopWords.isEmpty()) {
            return new StopFilter(stream, new CharArraySet(metadataStopWords, false));

        } else {
            try {
                Set<String> sws = getDefaultStopWordSet(lang);

                if (sws != null) {
                    CharArraySet stopWords = new CharArraySet(30, true);
                    return new StopFilter(stream, stopWords);
            } catch (IndraException e) {
                throw new IndraRuntimeException(String.format("Error creating stop filter for lang '%s'", lang), e);
        return stream;
Example 2
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Creates a new CapitalizationFilterFactory */
public CapitalizationFilterFactory(Map<String, String> args) {
  boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
  Set<String> k = getSet(args, KEEP);
  if (k != null) {
    keep = new CharArraySet(10, ignoreCase);

  k = getSet(args, OK_PREFIX);
  if (k != null) {
    okPrefix = new ArrayList<>();
    for (String item : k) {

  minWordLength = getInt(args, MIN_WORD_LENGTH, 0);
  maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT);
  maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
  onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true);
  forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true);
  if (!args.isEmpty()) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
Example 3
Source File:    From lucene-solr with Apache License 2.0 5 votes vote down vote up
 * Callback invoked by the {@link ManagedResource} instance to trigger this
 * class to create the CharArraySet used to create the StopFilter using the
 * wordset managed by {@link ManagedWordSetResource}. Keep in mind that
 * a schema.xml may reuse the same {@link ManagedStopFilterFactory} many
 * times for different field types; behind the scenes all instances of this
 * class/handle combination share the same managed data, hence the need for
 * a listener/callback scheme.
public void onManagedResourceInitialized(NamedList<?> args, ManagedResource res) 
    throws SolrException {

  Set<String> managedWords = ((ManagedWordSetResource)res).getWordSet(); 
  // first thing is to rebuild the Lucene CharArraySet from our managedWords set
  // which is slightly inefficient to do for every instance of the managed filter
  // but ManagedResource's don't have access to the luceneMatchVersion
  boolean ignoreCase = args.getBooleanArg("ignoreCase");
  stopWords = new CharArraySet(managedWords.size(), ignoreCase);
Example 4
Source File:    From crate with Apache License 2.0 5 votes vote down vote up
private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) {
    if (namedWords == null) {
        return new CharArraySet(words, ignoreCase);
    CharArraySet setWords = new CharArraySet(words.size(), ignoreCase);
    for (String word : words) {
        if (namedWords.containsKey(word)) {
        } else {
    return setWords;