org.apache.lucene.analysis.core.StopFilter Java Examples

The following examples show how to use org.apache.lucene.analysis.core.StopFilter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StandardPreProcessorIterator.java    From Indra with MIT License 6 votes vote down vote up
private TokenStream getStopFilter(String lang, Set<String> metadataStopWords, TokenStream stream) {

        if (metadataStopWords != null && !metadataStopWords.isEmpty()) {
            return new StopFilter(stream, new CharArraySet(metadataStopWords, false));

        } else {
            try {
                Set<String> sws = getDefaultStopWordSet(lang);

                if (sws != null) {
                    CharArraySet stopWords = new CharArraySet(30, true);
                    stopWords.addAll(sws);
                    return new StopFilter(stream, stopWords);
                }
            } catch (IndraException e) {
                throw new IndraRuntimeException(String.format("Error creating stop filter for lang '%s'", lang), e);
            }
        }
        return stream;
    }
 
Example #2
Source File: QueryParserImpl.java    From AdSearch_Endpoints with Apache License 2.0 6 votes vote down vote up
@Override
  public List<String> parseQuery(String queryStr) {
    // tokenize queryStr, remove stop word, stemming
	List<String> tokens = new ArrayList<String>();
	AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
	Tokenizer tokenizer = new StandardTokenizer(factory);
	tokenizer.setReader(new StringReader(queryStr));
	CharArraySet stopWords = EnglishAnalyzer.getDefaultStopSet();
    TokenStream tokenStream = new StopFilter(tokenizer, stopWords);
//    StringBuilder sb = new StringBuilder();
    CharTermAttribute charTermAttribute = tokenizer.addAttribute(CharTermAttribute.class);
    try {
    	tokenStream.reset();
        while (tokenStream.incrementToken()) {
            String term = charTermAttribute.toString();
            
            tokens.add(term);
//            sb.append(term + " ");
        }
        tokenStream.end();
        tokenStream.close();

        tokenizer.close();  
	} catch (IOException e) {
		e.printStackTrace();
	}
//	System.out.println("QU="+ sb.toString());
	return tokens;	
  }
 
Example #3
Source File: StopTokenFilterFactory.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public TokenStream create(TokenStream tokenStream) {
    if (removeTrailing) {
        if (version.onOrAfter(Version.LUCENE_4_4)) {
            return new StopFilter(tokenStream, stopWords);
        } else {
            return new Lucene43StopFilter(enablePositionIncrements, tokenStream, stopWords);
        }
    } else {
        return new SuggestStopFilter(tokenStream, stopWords);
    }
}
 
Example #4
Source File: ManagedStopFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a StopFilter based on our managed stop word set.
 */
@Override
public TokenStream create(TokenStream input) {    
  if (stopWords == null) {
    throw new IllegalStateException("Managed stopwords not initialized correctly!");
  }
  return new StopFilter(input, stopWords);
}
 
Example #5
Source File: CASAnalyzer.java    From oodt with Apache License 2.0 5 votes vote down vote up
/** Builds an analyzer with the given stop words. */
public CASAnalyzer(CharArraySet stopWords) {
    Iterator iter = stopWords.iterator();
    List<String> sw = new ArrayList<>();
    while(iter.hasNext()) {
        char[] stopWord = (char[]) iter.next();
        sw.add(new String(stopWord));
    }
    stopSet = StopFilter.makeStopSet(sw);

}