org.apache.lucene.analysis.CharArrayMap Java Examples

The following examples show how to use org.apache.lucene.analysis.CharArrayMap. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DutchAnalyzer.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public DutchAnalyzer(CharArraySet stopwords, CharArraySet stemExclusionTable, CharArrayMap<String> stemOverrideDict) {
  this.stoptable = CharArraySet.unmodifiableSet(CharArraySet.copy(stopwords));
  this.excltable = CharArraySet.unmodifiableSet(CharArraySet.copy(stemExclusionTable));
  if (stemOverrideDict.isEmpty()) {
    this.stemdict = null;
  } else {
    // we don't need to ignore case here since we lowercase in this analyzer anyway
    StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(false);
    CharArrayMap<String>.EntryIterator iter = stemOverrideDict.entrySet().iterator();
    CharsRefBuilder spare = new CharsRefBuilder();
    while (iter.hasNext()) {
      char[] nextKey = iter.nextKey();
      spare.copyChars(nextKey, 0, nextKey.length);
      builder.add(spare.get(), iter.currentValue());
    }
    try {
      this.stemdict = builder.build();
    } catch (IOException ex) {
      throw new RuntimeException("can not build stem dict", ex);
    }
  }
}
 
Example #2
Source File: TurkishDeASCIIfyFilter.java    From lucene-solr-analysis-turkish with Apache License 2.0 6 votes vote down vote up
private static boolean turkish_match_pattern(CharArrayMap<Integer> dlist, int point, char[] turkish_string, int length) {
    final int turkish_context_size = 10;
    int rank = dlist.size() * 2;
    char[] str = turkish_get_context(turkish_context_size, point, turkish_string, length);

    //System.out.println("length = " + str.length);
    int start = 0;
    int end;
    int _len = str.length;

    while (start <= turkish_context_size) {
        end = turkish_context_size + 1;
        while (end <= _len) {

            Integer r = dlist.get(str, start, end - start);

            if (r != null && Math.abs(r) < Math.abs(rank)) {
                rank = r;
            }
            end++;
        }
        start++;
    }
    return rank > 0;
}
 
Example #3
Source File: PatternTableFactory.java    From lucene-solr-analysis-turkish with Apache License 2.0 6 votes vote down vote up
public static CharArrayMap<Integer> getMap(char c) {
    switch (c) {
        case 'c':
            return MapC.map;
        case 'g':
            return MapG.map;
        case 'i':
            return MapI.map;
        case 'o':
            return MapO.map;
        case 's':
            return MapS.map;
        case 'u':
            return MapU.map;
        default:
            return null;
    }
}
 
Example #4
Source File: BritishUSFilter.java    From lumongo with Apache License 2.0 6 votes vote down vote up
private static CharArrayMap<char[]> initializeDictHash() {
	CharArrayMap<char[]> charMap = new CharArrayMap<>(2000, false);

	try {
		URL url = Resources.getResource(BritishUSFilter.class, "british.txt");
		String text = Resources.toString(url, Charsets.UTF8_CHARSET);
		String[] lines = text.split("\n");
		for (String line : lines) {
			if (!line.startsWith("UK\tUS")) {
				String[] parts = line.split("\t");
				if (parts.length == 2) {
					charMap.put(parts[0].toCharArray(), parts[1].toCharArray());
				}
			}
		}

	}
	catch (Exception e) {
		throw new RuntimeException(e);
	}
	return charMap;

}
 
Example #5
Source File: TurkishDeASCIIfyFilter.java    From lucene-solr-analysis-turkish with Apache License 2.0 5 votes vote down vote up
/**
 * Determine if char at cursor needs correction.
 */
private static boolean turkish_need_correction(char c, int point, char[] turkish_string, int length) {

    final Character tr;

    if (turkish_asciify_table.containsKey(c))
        tr = turkish_asciify_table.get(c);
    else
        tr = c;

    CharArrayMap<Integer> pl = PatternTableFactory.getMap(Character.toLowerCase(tr));

    boolean m = false;
    if (pl != null) {
        m = turkish_match_pattern(pl, point, turkish_string, length);
    }

    if (tr.equals('I')) {
        if (c == tr) {
            return !m;
        } else {
            return m;
        }
    } else {
        if (c == tr) {
            return m;
        } else {
            return !m;
        }
    }
}
 
Example #6
Source File: TestDutchAnalyzer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testEmptyStemDictionary() throws IOException {
  DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, 
      CharArraySet.EMPTY_SET, CharArrayMap.<String>emptyMap());
  checkOneTerm(a, "fiets", "fiet");
  a.close();
}