Java Code Examples for org.apache.lucene.analysis.CharArraySet#addAll()

The following examples show how to use org.apache.lucene.analysis.CharArraySet#addAll() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: StandardPreProcessorIterator.java From Indra with MIT License

6 votes

private TokenStream getStopFilter(String lang, Set<String> metadataStopWords, TokenStream stream) {

        if (metadataStopWords != null && !metadataStopWords.isEmpty()) {
            return new StopFilter(stream, new CharArraySet(metadataStopWords, false));

        } else {
            try {
                Set<String> sws = getDefaultStopWordSet(lang);

                if (sws != null) {
                    CharArraySet stopWords = new CharArraySet(30, true);
                    stopWords.addAll(sws);
                    return new StopFilter(stream, stopWords);
                }
            } catch (IndraException e) {
                throw new IndraRuntimeException(String.format("Error creating stop filter for lang '%s'", lang), e);
            }
        }
        return stream;
    }

Example 2

Source File: CapitalizationFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

/** Creates a new CapitalizationFilterFactory */
public CapitalizationFilterFactory(Map<String, String> args) {
  super(args);
  boolean ignoreCase = getBoolean(args, KEEP_IGNORE_CASE, false);
  Set<String> k = getSet(args, KEEP);
  if (k != null) {
    keep = new CharArraySet(10, ignoreCase);
    keep.addAll(k);
  }

  k = getSet(args, OK_PREFIX);
  if (k != null) {
    okPrefix = new ArrayList<>();
    for (String item : k) {
      okPrefix.add(item.toCharArray());
    }
  }

  minWordLength = getInt(args, MIN_WORD_LENGTH, 0);
  maxWordCount = getInt(args, MAX_WORD_COUNT, CapitalizationFilter.DEFAULT_MAX_WORD_COUNT);
  maxTokenLength = getInt(args, MAX_TOKEN_LENGTH, CapitalizationFilter.DEFAULT_MAX_TOKEN_LENGTH);
  onlyFirstWord = getBoolean(args, ONLY_FIRST_WORD, true);
  forceFirstLetter = getBoolean(args, FORCE_FIRST_LETTER, true);
  if (!args.isEmpty()) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}

Example 3

Source File: ManagedStopFilterFactory.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Callback invoked by the {@link ManagedResource} instance to trigger this
 * class to create the CharArraySet used to create the StopFilter using the
 * wordset managed by {@link ManagedWordSetResource}. Keep in mind that
 * a schema.xml may reuse the same {@link ManagedStopFilterFactory} many
 * times for different field types; behind the scenes all instances of this
 * class/handle combination share the same managed data, hence the need for
 * a listener/callback scheme.
 */
@Override
public void onManagedResourceInitialized(NamedList<?> args, ManagedResource res) 
    throws SolrException {

  Set<String> managedWords = ((ManagedWordSetResource)res).getWordSet(); 
      
  // first thing is to rebuild the Lucene CharArraySet from our managedWords set
  // which is slightly inefficient to do for every instance of the managed filter
  // but ManagedResource's don't have access to the luceneMatchVersion
  boolean ignoreCase = args.getBooleanArg("ignoreCase");
  stopWords = new CharArraySet(managedWords.size(), ignoreCase);
  stopWords.addAll(managedWords);
}

Example 4

Source File: Analysis.java From crate with Apache License 2.0

5 votes

private static CharArraySet resolveNamedWords(Collection<String> words, Map<String, Set<?>> namedWords, boolean ignoreCase) {
    if (namedWords == null) {
        return new CharArraySet(words, ignoreCase);
    }
    CharArraySet setWords = new CharArraySet(words.size(), ignoreCase);
    for (String word : words) {
        if (namedWords.containsKey(word)) {
            setWords.addAll(namedWords.get(word));
        } else {
            setWords.add(word);
        }
    }
    return setWords;
}