Java Code Examples for org.apache.lucene.analysis.StopAnalyzer

The following examples show how to use org.apache.lucene.analysis.StopAnalyzer. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: coming   Source File: 387581_IndexTaskTest_0_t.java    License: MIT License 6 votes vote down vote up
/**
 *  The JUnit setup method
 *
 *@exception  IOException  Description of Exception
 */
public void setUp() throws Exception {
    Project project = new Project();
 
    IndexTask task = new IndexTask();
    FileSet fs = new FileSet();
    fs.setDir(new File(docsDir));
    task.addFileset(fs);
    task.setOverwrite(true);
    task.setDocumentHandler(docHandler);
    task.setIndex(new File(indexDir));
    task.setProject(project);
    task.execute();
 
    searcher = new IndexSearcher(indexDir);
    analyzer = new StopAnalyzer();
}
 
Example 2
Source Project: coming   Source File: 387581_IndexTaskTest_0_s.java    License: MIT License 6 votes vote down vote up
/**
 *  The JUnit setup method
 *
 *@exception  IOException  Description of Exception
 */
public void setUp() throws Exception {
    Project project = new Project();
 
    IndexTask task = new IndexTask();
    FileSet fs = new FileSet();
    fs.setDir(new File(docsDir));
    task.addFileset(fs);
    task.setOverwrite(true);
    task.setDocumentHandler(docHandler);
    task.setIndex(new File(indexDir));
    task.setProject(project);
    task.execute();
 
    searcher = new IndexSearcher(indexDir);
    analyzer = new StopAnalyzer();
}
 
Example 3
Source Project: coreNlp   Source File: StopwordAnnotator.java    License: Apache License 2.0 5 votes vote down vote up
public StopwordAnnotator(String annotatorClass, Properties props) {
    this.props = props;

    this.checkLemma = Boolean.parseBoolean(props.getProperty(CHECK_LEMMA, "false"));

    if (this.props.containsKey(STOPWORDS_LIST)) {
        String stopwordList = props.getProperty(STOPWORDS_LIST);
        boolean ignoreCase = Boolean.parseBoolean(props.getProperty(IGNORE_STOPWORD_CASE, "false"));
        this.stopwords = getStopWordList(Version.LUCENE_36, stopwordList, ignoreCase);
    } else {
        this.stopwords = (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }
}
 
Example 4
Source Project: coreNlp   Source File: StopwordAnnotatorTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test to validate that stopwords are properly annotated in the token list
 * @throws Exception
 */
@org.junit.Test
public void testLuceneStopwordList() throws Exception {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);

    //get the standard lucene stopword set
    Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}