org.apache.lucene.analysis.StopAnalyzer Java Examples

The following examples show how to use org.apache.lucene.analysis.StopAnalyzer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: 387581_IndexTaskTest_0_t.java    From coming with MIT License 6 votes vote down vote up
/**
 *  The JUnit setup method
 *
 *@exception  IOException  Description of Exception
 */
public void setUp() throws Exception {
    Project project = new Project();
 
    IndexTask task = new IndexTask();
    FileSet fs = new FileSet();
    fs.setDir(new File(docsDir));
    task.addFileset(fs);
    task.setOverwrite(true);
    task.setDocumentHandler(docHandler);
    task.setIndex(new File(indexDir));
    task.setProject(project);
    task.execute();
 
    searcher = new IndexSearcher(indexDir);
    analyzer = new StopAnalyzer();
}
 
Example #2
Source File: 387581_IndexTaskTest_0_s.java    From coming with MIT License 6 votes vote down vote up
/**
 *  The JUnit setup method
 *
 *@exception  IOException  Description of Exception
 */
public void setUp() throws Exception {
    Project project = new Project();
 
    IndexTask task = new IndexTask();
    FileSet fs = new FileSet();
    fs.setDir(new File(docsDir));
    task.addFileset(fs);
    task.setOverwrite(true);
    task.setDocumentHandler(docHandler);
    task.setIndex(new File(indexDir));
    task.setProject(project);
    task.execute();
 
    searcher = new IndexSearcher(indexDir);
    analyzer = new StopAnalyzer();
}
 
Example #3
Source File: StopwordAnnotator.java    From coreNlp with Apache License 2.0 5 votes vote down vote up
public StopwordAnnotator(String annotatorClass, Properties props) {
    this.props = props;

    this.checkLemma = Boolean.parseBoolean(props.getProperty(CHECK_LEMMA, "false"));

    if (this.props.containsKey(STOPWORDS_LIST)) {
        String stopwordList = props.getProperty(STOPWORDS_LIST);
        boolean ignoreCase = Boolean.parseBoolean(props.getProperty(IGNORE_STOPWORD_CASE, "false"));
        this.stopwords = getStopWordList(Version.LUCENE_36, stopwordList, ignoreCase);
    } else {
        this.stopwords = (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }
}
 
Example #4
Source File: StopwordAnnotatorTest.java    From coreNlp with Apache License 2.0 5 votes vote down vote up
/**
 * Test to validate that stopwords are properly annotated in the token list
 * @throws Exception
 */
@org.junit.Test
public void testLuceneStopwordList() throws Exception {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);

    //get the standard lucene stopword set
    Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}