Java Code Examples for org.tartarus.snowball.SnowballStemmer

The following examples show how to use org.tartarus.snowball.SnowballStemmer. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: LuceneCarrot2StemmerFactory.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Create and return an {@link IStemmer} adapter for a
 * {@link SnowballStemmer} for a given language code. An identity stemmer is
 * returned for unknown languages.
 */
public static IStemmer createStemmer(LanguageCode language) {
  final Class<? extends SnowballStemmer> stemmerClazz = snowballStemmerClasses
      .get(language);

  if (stemmerClazz == null) {
    log.warn("No Snowball stemmer class for: {}. "
        + "Quality of clustering may be degraded.", language.name());
    return IdentityStemmer.INSTANCE;
  }

  try {
    return new SnowballStemmerAdapter(stemmerClazz.getConstructor().newInstance());
  } catch (Exception e) {
    log.warn("Could not instantiate snowball stemmer for language: {}"
            + ". Quality of clustering may be degraded."
        , language.name(), e);

    return IdentityStemmer.INSTANCE;
  }
}
 
Example 2
Source Project: sasi   Source File: StemmerFactory.java    License: Apache License 2.0 6 votes vote down vote up
public static SnowballStemmer getStemmer(Locale locale)
{
    if (locale == null)
        return null;

    String rootLang = locale.getLanguage().substring(0, 2);
    try
    {
        Class clazz = SUPPORTED_LANGUAGES.get(rootLang);
        if(clazz == null)
            return null;
        Constructor<?> ctor = STEMMER_CONSTRUCTOR_CACHE.get(clazz);
        return (SnowballStemmer) ctor.newInstance();
    }
    catch (Exception e)
    {
        logger.debug("Failed to create new SnowballStemmer instance " +
                "for language [{}]", locale.getLanguage(), e);
    }
    return null;
}
 
Example 3
Source Project: spark-stemming   Source File: StemmerTest.java    License: BSD 2-Clause "Simplified" License 5 votes vote down vote up
@Test
public void englishSanityCheck() {

    SnowballStemmer snowballStemmer = new englishStemmer();
    snowballStemmer.setCurrent("Jumps");
    snowballStemmer.stem();
    String result = snowballStemmer.getCurrent();

    Assert.assertEquals("Jump", result);
}
 
Example 4
Source Project: lucene-solr   Source File: SnowballPorterFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  String className = "org.tartarus.snowball.ext." + language + "Stemmer";
  stemClass = loader.newInstance(className, SnowballStemmer.class).getClass();

  if (wordFiles != null) {
    protectedWords = getWordSet(loader, wordFiles, false);
  }
}
 
Example 5
Source Project: lucene-solr   Source File: SnowballFilter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Construct the named stemming filter.
 *
 * Available stemmers are listed in {@link org.tartarus.snowball.ext}.
 * The name of a stemmer is the part of the class name before "Stemmer",
 * e.g., the stemmer in {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English".
 *
 * @param in the input tokens to stem
 * @param name the name of a stemmer
 */
public SnowballFilter(TokenStream in, String name) {
  super(in);
  //Class.forName is frowned upon in place of the ResourceLoader but in this case,
  // the factory will use the other constructor so that the program is already loaded.
  try {
    Class<? extends SnowballStemmer> stemClass =
      Class.forName("org.tartarus.snowball.ext." + name + "Stemmer").asSubclass(SnowballStemmer.class);
    stemmer = stemClass.getConstructor().newInstance();
  } catch (Exception e) {
    throw new IllegalArgumentException("Invalid stemmer class specified: " + name, e);
  }
}
 
Example 6
Source Project: EDDI   Source File: StemmingCorrection.java    License: Apache License 2.0 5 votes vote down vote up
private SnowballStemmer createNewStemmer() {
    try {
        Class<?> stemClass = Class.forName("org.tartarus.snowball.ext." + language + "Stemmer");
        return (SnowballStemmer) stemClass.getDeclaredConstructor().newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
}
 
Example 7
Source Project: lesk-wsd-dsm   Source File: RevisedLesk.java    License: GNU General Public License v3.0 5 votes vote down vote up
private SnowballStemmer getStemmer(Language language) {
    if (language.equals(Language.EN)) {
        return new porterStemmer();
    } else if (language.equals(Language.ES)) {
        return new spanishStemmer();
    } else if (language.equals(Language.FR)) {
        return new frenchStemmer();
    } else if (language.equals(Language.DE)) {
        return new germanStemmer();
    } else if (language.equals(Language.IT)) {
        return new italianStemmer();
    } else {
        return null;
    }
}
 
Example 8
Source Project: lesk-wsd-dsm   Source File: RevisedLesk.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 *
 * @param text
 * @return
 * @throws IOException
 */
public Map<String, Float> buildBag(String text) throws IOException {
    Map<String, Float> bag = new HashMap<>();
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    SnowballStemmer stemmer = null;
    if (stemming) {
        stemmer = getStemmer(language);
        if (stemmer == null) {
            Logger.getLogger(RevisedLesk.class.getName()).log(Level.WARNING, "No stemmer for language {0}", language);
        }
    }
    TokenStream tokenStream = analyzer.tokenStream("gloss", new StringReader(text));
    while (tokenStream.incrementToken()) {
        TermAttribute token = (TermAttribute) tokenStream.getAttribute(TermAttribute.class);
        String term = token.term();
        if (stemmer != null) {
            stemmer.setCurrent(term);
            if (stemmer.stem()) {
                term = stemmer.getCurrent();
            }
        }
        Float c = bag.get(term);
        if (c == null) {
            bag.put(term, 1f);
        } else {
            bag.put(term, c + 1f);
        }
    }
    return bag;
}
 
Example 9
Source Project: lucene-solr   Source File: SnowballFilter.java    License: Apache License 2.0 4 votes vote down vote up
public SnowballFilter(TokenStream input, SnowballStemmer stemmer) {
  super(input);
  this.stemmer = stemmer;
}
 
Example 10
Source Project: lucene-solr   Source File: LuceneCarrot2StemmerFactory.java    License: Apache License 2.0 4 votes vote down vote up
public SnowballStemmerAdapter(SnowballStemmer snowballStemmer) {
  this.snowballStemmer = snowballStemmer;
}