Java Code Examples for org.apache.lucene.analysis.CharArraySet#unmodifiableSet()

The following examples show how to use org.apache.lucene.analysis.CharArraySet#unmodifiableSet() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: DutchAnalyzer.java From lucene-solr with Apache License 2.0

6 votes

public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
  this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
  this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
  if (stemOverrideDict.isEmpty()) {
    this.stemdict = null;
  } else {
    // we don't need to ignore case here since we lowercase in this analyzer anyway
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
    CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
    CharsRefBuilder spare = new CharsRefBuilder();
    while (iter.hasNext()) {
      char[] nextKey = iter.nextKey();
      spare.copyChars(nextKey, 0, nextKey.length);
      builder.add(spare.get(), iter.currentValue());
    }
    try {
      this.stemdict = builder.build();
    } catch (IOException ex) {
      throw new RuntimeException("can not build stem dict", ex);
    }
  }
}

Example 2

Source File: StopwordAnnotator.java From coreNlp with Apache License 2.0

5 votes

public static CharArraySet getStopWordList(Version luceneVersion, String stopwordList, boolean ignoreCase) {
    String[] terms = stopwordList.split(",");
    CharArraySet stopwordSet = new CharArraySet(luceneVersion, terms.length, ignoreCase);
    for (String term : terms) {
        stopwordSet.add(term);
    }
    return CharArraySet.unmodifiableSet(stopwordSet);
}

Example 3

Source File: SnowballAnalyzer.java From crate with Apache License 2.0

4 votes

/** Builds the named analyzer with the given stop words. */
SnowballAnalyzer(String name, CharArraySet stopWords) {
    this(name);
    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stopWords));
}

Example 4

Source File: ArabicAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop word. If a none-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * {@link ArabicStemFilter}.
 * 
 * @param stopwords
 *          a stopword set
 * @param stemExclusionSet
 *          a set of terms not to be stemmed
 */
public ArabicAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet){
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 5

Source File: EnglishAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public EnglishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 6

Source File: GalicianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public GalicianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 7

Source File: RomanianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public RomanianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 8

Source File: BasqueAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public BasqueAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 9

Source File: TurkishAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 *
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public TurkishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 10

Source File: CzechAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words and a set of work to be
 * excluded from the {@link CzechStemFilter}.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionTable a stemming exclusion set
 */
public CzechAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable) {
  super(stopwords);
  this.stemExclusionTable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
}

Example 11

Source File: ItalianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public ItalianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 12

Source File: RussianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words
 * 
 * @param stopwords
 *          a stopword set
 * @param stemExclusionSet a set of words not to be stemmed
 */
public RussianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 13

Source File: SpanishAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public SpanishAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 14

Source File: LatvianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public LatvianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 15

Source File: ArmenianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public ArmenianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 16

Source File: BengaliAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a stemming exclusion set
 */
public BengaliAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 17

Source File: NorwegianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public NorwegianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 18

Source File: EstonianAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 *
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public EstonianAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
    super(stopwords);
    this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 19

Source File: UkrainianMorfologikAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public UkrainianMorfologikAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}

Example 20

Source File: CatalanAnalyzer.java From lucene-solr with Apache License 2.0

2 votes

/**
 * Builds an analyzer with the given stop words. If a non-empty stem exclusion set is
 * provided this analyzer will add a {@link SetKeywordMarkerFilter} before
 * stemming.
 * 
 * @param stopwords a stopword set
 * @param stemExclusionSet a set of terms not to be stemmed
 */
public CatalanAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionSet) {
  super(stopwords);
  this.stemExclusionSet = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionSet));
}