Java Code Examples for org.apache.lucene.analysis.core.StopFilterFactory

The following examples show how to use org.apache.lucene.analysis.core.StopFilterFactory. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: TestCustomAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
public void testStopWordsFromClasspath() throws Exception {
  CustomAnalyzer a = CustomAnalyzer.builder()
      .withTokenizer(WhitespaceTokenizerFactory.class)
      .addTokenFilter("stop",
          "ignoreCase", "true",
          "words", "org/apache/lucene/analysis/custom/teststop.txt",
          "format", "wordset")
      .build();
  
  assertSame(WhitespaceTokenizerFactory.class, a.getTokenizerFactory().getClass());
  assertEquals(Collections.emptyList(), a.getCharFilterFactories());
  List<TokenFilterFactory> tokenFilters = a.getTokenFilterFactories();
  assertEquals(1, tokenFilters.size());
  assertSame(StopFilterFactory.class, tokenFilters.get(0).getClass());
  assertEquals(0, a.getPositionIncrementGap("dummy"));
  assertEquals(1, a.getOffsetGap("dummy"));
  assertSame(Version.LATEST, a.getVersion());

  assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
  a.close();
}
 
Example 2
private Builder createDefaultAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example 3
private Builder createArtistAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example 4
Source Project: airsonic   Source File: AnalyzerFactory.java    License: GNU General Public License v3.0 5 votes vote down vote up
private Builder createDefaultAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example 5
Source Project: airsonic   Source File: AnalyzerFactory.java    License: GNU General Public License v3.0 5 votes vote down vote up
private Builder createArtistAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example 6
Source Project: lucene-solr   Source File: TestCustomAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testStopWordsFromClasspathWithMap() throws Exception {
  Map<String,String> stopConfig1 = new HashMap<>();
  stopConfig1.put("ignoreCase", "true");
  stopConfig1.put("words", "org/apache/lucene/analysis/custom/teststop.txt");
  stopConfig1.put("format", "wordset");
  
  Map<String,String> stopConfig2 = new HashMap<>(stopConfig1);
  Map<String,String> stopConfigImmutable = Collections.unmodifiableMap(new HashMap<>(stopConfig1));

  CustomAnalyzer a = CustomAnalyzer.builder()
      .withTokenizer("whitespace")
      .addTokenFilter("stop", stopConfig1)
      .build();
  assertTrue(stopConfig1.isEmpty());
  assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
  
  a = CustomAnalyzer.builder()
      .withTokenizer(WhitespaceTokenizerFactory.class)
      .addTokenFilter(StopFilterFactory.class, stopConfig2)
      .build();
  assertTrue(stopConfig2.isEmpty());
  assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
  
  // try with unmodifiableMap, should fail
  expectThrows(UnsupportedOperationException.class, () -> {
    CustomAnalyzer.builder()
        .withTokenizer("whitespace")
        .addTokenFilter("stop", stopConfigImmutable)
        .build();
  });
  a.close();
}
 
Example 7
/**
 * Obtains stop words for a field from the associated
 * {@link StopFilterFactory}, if any.
 */
private List<CharArraySet> getSolrStopWordsForField(String fieldName) {
  // No need to synchronize here, Carrot2 ensures that instances
  // of this class are not used by multiple threads at a time.
  synchronized (solrStopWords) {
    if (!solrStopWords.containsKey(fieldName)) {
      solrStopWords.put(fieldName, new ArrayList<>());

      IndexSchema schema = core.getLatestSchema();
      final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer();
      if (fieldAnalyzer instanceof TokenizerChain) {
        final TokenFilterFactory[] filterFactories = 
            ((TokenizerChain) fieldAnalyzer).getTokenFilterFactories();
        for (TokenFilterFactory factory : filterFactories) {
          if (factory instanceof StopFilterFactory) {
            // StopFilterFactory holds the stop words in a CharArraySet
            CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords();
            solrStopWords.get(fieldName).add(stopWords);
          }

          if (factory instanceof CommonGramsFilterFactory) {
            CharArraySet commonWords = ((CommonGramsFilterFactory) factory).getCommonWords();
            solrStopWords.get(fieldName).add(commonWords);
          }
        }
      }
    }
    return solrStopWords.get(fieldName);
  }
}
 
Example 8
Source Project: lucene-solr   Source File: TaggerRequestHandler.java    License: Apache License 2.0 5 votes vote down vote up
private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) {
  FieldType fieldType = req.getSchema().getFieldType(field);
  Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer
  if (analyzer instanceof TokenizerChain) {
    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories();
    for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
      if (tokenFilterFactory instanceof StopFilterFactory)
        return true;
    }
  }
  return false;
}
 
Example 9
Source Project: SolrTextTagger   Source File: TaggerRequestHandler.java    License: Apache License 2.0 5 votes vote down vote up
private boolean fieldHasIndexedStopFilter(String field, SolrQueryRequest req) {
  FieldType fieldType = req.getSchema().getFieldType(field);
  Analyzer analyzer = fieldType.getIndexAnalyzer();//index analyzer
  if (analyzer instanceof TokenizerChain) {
    TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
    TokenFilterFactory[] tokenFilterFactories = tokenizerChain.getTokenFilterFactories();
    for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
      if (tokenFilterFactory instanceof StopFilterFactory)
        return true;
    }
  }
  return false;
}