Java Code Examples for org.apache.lucene.analysis.StopFilter#makeStopSet()

The following examples show how to use org.apache.lucene.analysis.StopFilter#makeStopSet() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
public void testEndingHole() throws Exception {
  // Just deletes "of"
  Analyzer a = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String field) {
        Tokenizer tokenizer = new MockTokenizer();
        CharArraySet stopSet = StopFilter.makeStopSet("of");
        return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
      }
    };

  Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(
      new Input("wizard of oz", 50)
  );
  FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
  sug.build(new InputArrayIterator(keys));
  assertEquals("wizard _ oz/1.00",
               toString(sug.lookup("wizard of", 10)));

  // Falls back to unigram model, with backoff 0.4 times
  // prop 0.5:
  assertEquals("oz/0.20",
               toString(sug.lookup("wizard o", 10)));
  a.close();
}
 
Example 2
public void testTwoEndingHoles() throws Exception {
  // Just deletes "of"
  Analyzer a = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String field) {
        Tokenizer tokenizer = new MockTokenizer();
        CharArraySet stopSet = StopFilter.makeStopSet("of");
        return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
      }
    };

  Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(
      new Input("wizard of of oz", 50)
  );
  FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
  sug.build(new InputArrayIterator(keys));
  assertEquals("",
               toString(sug.lookup("wizard of of", 10)));
  a.close();
}
 
Example 3
public void testEndNotStopWord() throws Exception {
  CharArraySet stopWords = StopFilter.makeStopSet("to");
  Tokenizer stream = new MockTokenizer();
  stream.setReader(new StringReader("go to"));
  TokenStream filter = new SuggestStopFilter(stream, stopWords);
  assertTokenStreamContents(filter,
                            new String[] {"go", "to"},
                            new int[] {0, 3},
                            new int[] {2, 5},
                            null,
                            new int[] {1, 1},
                            null,
                            5,
                            new boolean[] {false, true},
                            true);
}
 
Example 4
public void testEndIsStopWord() throws Exception {
                            
  CharArraySet stopWords = StopFilter.makeStopSet("to");
  Tokenizer stream = new MockTokenizer();
  stream.setReader(new StringReader("go to "));
  TokenStream filter = new SuggestStopFilter(stream, stopWords);
  assertTokenStreamContents(filter,
                            new String[] {"go"},
                            new int[] {0},
                            new int[] {2},
                            null,
                            new int[] {1},
                            null,
                            6,
                            new boolean[] {false},
                            true);
}
 
Example 5
public void testMidStopWord() throws Exception {
                            
  CharArraySet stopWords = StopFilter.makeStopSet("to");
  Tokenizer stream = new MockTokenizer();
  stream.setReader(new StringReader("go to school"));
  TokenStream filter = new SuggestStopFilter(stream, stopWords);
  assertTokenStreamContents(filter,
                            new String[] {"go", "school"},
                            new int[] {0, 6},
                            new int[] {2, 12},
                            null,
                            new int[] {1, 2},
                            null,
                            12,
                            new boolean[] {false, false},
                            true);
}
 
Example 6
public void testMultipleStopWords() throws Exception {
                            
  CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a");
  Tokenizer stream = new MockTokenizer();
  stream.setReader(new StringReader("go to a the school"));
  TokenStream filter = new SuggestStopFilter(stream, stopWords);
  assertTokenStreamContents(filter,
                            new String[] { "go", "school" },
                            new int[] {0, 12},
                            new int[] {2, 18},
                            null,
                            new int[] {1, 4},
                            null,
                            18,
                            new boolean[] {false, false},
                            true);
}
 
Example 7
public void testMultipleStopWordsEnd() throws Exception {
                            
  CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a");
  Tokenizer stream = new MockTokenizer();
  stream.setReader(new StringReader("go to a the"));
  TokenStream filter = new SuggestStopFilter(stream, stopWords);
  assertTokenStreamContents(filter,
                            new String[] { "go", "the"},
                            new int[] {0, 8},
                            new int[] {2, 11},
                            null,
                            new int[] {1, 3},
                            null,
                            11,
                            new boolean[] {false, true},
                            true);
}
 
Example 8
public void testMultipleStopWordsEnd2() throws Exception {
                            
  CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a");
  Tokenizer stream = new MockTokenizer();
  stream.setReader(new StringReader("go to a the "));
  TokenStream filter = new SuggestStopFilter(stream, stopWords);
  assertTokenStreamContents(filter,
                            new String[] { "go"},
                            new int[] {0},
                            new int[] {2},
                            null,
                            new int[] {1},
                            null,
                            12,
                            new boolean[] {false},
                            true);
}
 
Example 9
@Test
  public void testWithStopword() throws Exception {
    for (boolean preservePosInc : new boolean[]{true, false}) {
      Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
      String input = "a mykeyword a keyword"; //LUCENE-8344 add "a"
      tokenStream.setReader(new StringReader(input));
      TokenFilter tokenFilter = new StopFilter(tokenStream, StopFilter.makeStopSet("a"));
      ConcatenateGraphFilter concatStream = new ConcatenateGraphFilter(tokenFilter, SEP_LABEL, preservePosInc, 10);
      CharsRefBuilder builder = new CharsRefBuilder();
      if (preservePosInc) {
        builder.append(SEP_LABEL);
      }
      builder.append("mykeyword");
      builder.append(SEP_LABEL);
      if (preservePosInc) {
        builder.append(SEP_LABEL);
      }
      builder.append("keyword");
//      if (preservePosInc) { LUCENE-8344 uncomment
//        builder.append(SEP_LABEL);
//      }
      assertTokenStreamContents(concatStream, new String[]{builder.toCharsRef().toString()});
    }
  }
 
Example 10
@Test
public void testSeparatorWithStopWords() throws IOException {
  Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  String input = "A B C D E F J H";
  tokenStream.setReader(new StringReader(input));
  TokenStream tokenFilter = new StopFilter(tokenStream, StopFilter.makeStopSet("A", "D", "E", "J"));
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(tokenFilter, '-', false, 100);

  assertTokenStreamContents(stream, new String[] {"B-C-F-H"}, null, null, new int[] { 1 });
}
 
Example 11
@Test
public void testSeparatorWithStopWordsAndPreservePositionIncrements() throws IOException {
  Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  String input = "A B C D E F J H";
  tokenStream.setReader(new StringReader(input));
  TokenStream tokenFilter = new StopFilter(tokenStream, StopFilter.makeStopSet("A", "D", "E", "J"));
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(tokenFilter, '-', true, 100);

  assertTokenStreamContents(stream, new String[] {"-B-C---F--H"}, null, null, new int[] { 1 });
}