Java Code Examples for org.apache.lucene.analysis.core.StopFilter

The following examples show how to use org.apache.lucene.analysis.core.StopFilter. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Indra   Source File: StandardPreProcessorIterator.java    License: MIT License 6 votes vote down vote up
private TokenStream getStopFilter(String lang, Set<String> metadataStopWords, TokenStream stream) {

        if (metadataStopWords != null && !metadataStopWords.isEmpty()) {
            return new StopFilter(stream, new CharArraySet(metadataStopWords, false));

        } else {
            try {
                Set<String> sws = getDefaultStopWordSet(lang);

                if (sws != null) {
                    CharArraySet stopWords = new CharArraySet(30, true);
                    stopWords.addAll(sws);
                    return new StopFilter(stream, stopWords);
                }
            } catch (IndraException e) {
                throw new IndraRuntimeException(String.format("Error creating stop filter for lang '%s'", lang), e);
            }
        }
        return stream;
    }
 
Example 2
Source Project: AdSearch_Endpoints   Source File: QueryParserImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
  public List<String> parseQuery(String queryStr) {
    // tokenize queryStr, remove stop word, stemming
	List<String> tokens = new ArrayList<String>();
	AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
	Tokenizer tokenizer = new StandardTokenizer(factory);
	tokenizer.setReader(new StringReader(queryStr));
	CharArraySet stopWords = EnglishAnalyzer.getDefaultStopSet();
    TokenStream tokenStream = new StopFilter(tokenizer, stopWords);
//    StringBuilder sb = new StringBuilder();
    CharTermAttribute charTermAttribute = tokenizer.addAttribute(CharTermAttribute.class);
    try {
    	tokenStream.reset();
        while (tokenStream.incrementToken()) {
            String term = charTermAttribute.toString();
            
            tokens.add(term);
//            sb.append(term + " ");
        }
        tokenStream.end();
        tokenStream.close();

        tokenizer.close();  
	} catch (IOException e) {
		e.printStackTrace();
	}
//	System.out.println("QU="+ sb.toString());
	return tokens;	
  }
 
Example 3
Source Project: Elasticsearch   Source File: StopTokenFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TokenStream create(TokenStream tokenStream) {
    if (removeTrailing) {
        if (version.onOrAfter(Version.LUCENE_4_4)) {
            return new StopFilter(tokenStream, stopWords);
        } else {
            return new Lucene43StopFilter(enablePositionIncrements, tokenStream, stopWords);
        }
    } else {
        return new SuggestStopFilter(tokenStream, stopWords);
    }
}
 
Example 4
Source Project: lucene-solr   Source File: ManagedStopFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Returns a StopFilter based on our managed stop word set.
 */
@Override
public TokenStream create(TokenStream input) {    
  if (stopWords == null) {
    throw new IllegalStateException("Managed stopwords not initialized correctly!");
  }
  return new StopFilter(input, stopWords);
}
 
Example 5
Source Project: oodt   Source File: CASAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
/** Builds an analyzer with the given stop words. */
public CASAnalyzer(CharArraySet stopWords) {
    Iterator iter = stopWords.iterator();
    List<String> sw = new ArrayList<>();
    while(iter.hasNext()) {
        char[] stopWord = (char[]) iter.next();
        sw.add(new String(stopWord));
    }
    stopSet = StopFilter.makeStopSet(sw);

}