org.apache.lucene.analysis.StopAnalyzer Java Examples

The following examples show how to use org.apache.lucene.analysis.StopAnalyzer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: 387581_IndexTaskTest_0_t.java From coming with MIT License

6 votes

/**
 *  The JUnit setup method
 *
 *@exception  IOException  Description of Exception
 */
public void setUp() throws Exception {
    Project project = new Project();
 
    IndexTask task = new IndexTask();
    FileSet fs = new FileSet();
    fs.setDir(new File(docsDir));
    task.addFileset(fs);
    task.setOverwrite(true);
    task.setDocumentHandler(docHandler);
    task.setIndex(new File(indexDir));
    task.setProject(project);
    task.execute();
 
    searcher = new IndexSearcher(indexDir);
    analyzer = new StopAnalyzer();
}

Example #2

Source File: 387581_IndexTaskTest_0_s.java From coming with MIT License

6 votes

/**
 *  The JUnit setup method
 *
 *@exception  IOException  Description of Exception
 */
public void setUp() throws Exception {
    Project project = new Project();
 
    IndexTask task = new IndexTask();
    FileSet fs = new FileSet();
    fs.setDir(new File(docsDir));
    task.addFileset(fs);
    task.setOverwrite(true);
    task.setDocumentHandler(docHandler);
    task.setIndex(new File(indexDir));
    task.setProject(project);
    task.execute();
 
    searcher = new IndexSearcher(indexDir);
    analyzer = new StopAnalyzer();
}

Example #3

Source File: StopwordAnnotator.java From coreNlp with Apache License 2.0

5 votes

public StopwordAnnotator(String annotatorClass, Properties props) {
    this.props = props;

    this.checkLemma = Boolean.parseBoolean(props.getProperty(CHECK_LEMMA, "false"));

    if (this.props.containsKey(STOPWORDS_LIST)) {
        String stopwordList = props.getProperty(STOPWORDS_LIST);
        boolean ignoreCase = Boolean.parseBoolean(props.getProperty(IGNORE_STOPWORD_CASE, "false"));
        this.stopwords = getStopWordList(Version.LUCENE_36, stopwordList, ignoreCase);
    } else {
        this.stopwords = (CharArraySet) StopAnalyzer.ENGLISH_STOP_WORDS_SET;
    }
}

Example #4

Source File: StopwordAnnotatorTest.java From coreNlp with Apache License 2.0

5 votes

/**
 * Test to validate that stopwords are properly annotated in the token list
 * @throws Exception
 */
@org.junit.Test
public void testLuceneStopwordList() throws Exception {
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "intoxicant.analytics.coreNlp.StopwordAnnotator");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(example);
    pipeline.annotate(document);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);

    //get the standard lucene stopword set
    Set<?> stopWords = StopAnalyzer.ENGLISH_STOP_WORDS_SET;

    for (CoreLabel token : tokens) {

        //get the stopword annotation
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);

        String word = token.word().toLowerCase();
        if (stopWords.contains(word)) {
            assertTrue(stopword.first());
        }
        else {
            assertFalse(stopword.first());
        }

        //not checking lemma, so always false
        assertFalse(stopword.second());
    }
}