Java Code Examples for org.apache.lucene.analysis.core.KeywordTokenizer

The following examples show how to use org.apache.lucene.analysis.core.KeywordTokenizer. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: TestPhoneticFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Encoder encoders[] = new Encoder[] {
      new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2()
  };
  for (final Encoder e : encoders) {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new KeywordTokenizer();
        return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean()));
      }
    };
    checkOneTerm(a, "", "");
    a.close();
  }
}
 
Example 2
Source Project: lucene-solr   Source File: TestSnowballVocab.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * For the supplied language, run the stemmer against all strings in voc.txt
 * The output should be the same as the string in output.txt
 */
private void assertCorrectOutput(final String snowballLanguage, String zipfile)
    throws IOException {
  if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);
  
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer t = new KeywordTokenizer();
      return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
    }  
  };
  
  assertVocabulary(a, getDataPath(zipfile), "voc.txt", "output.txt");
  a.close();
}
 
Example 3
Source Project: lucene-solr   Source File: TestSynonymMapFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Random random = random();
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random.nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random.nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new KeywordTokenizer();
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
      }
    };

    checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
    analyzer.close();
  }
}
 
Example 4
Source Project: lucene-solr   Source File: TestHunspellStemFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testIgnoreCaseNoSideEffects() throws Exception {
  final Dictionary d;
  // no multiple try-with to workaround bogus VerifyError
  InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
  InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
  Directory tempDir = getDirectory();
  try {
    d = new Dictionary(tempDir, "dictionary", affixStream, Collections.singletonList(dictStream), true);
  } finally {
    IOUtils.closeWhileHandlingException(affixStream, dictStream);
  }
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d));
    }
  };
  checkOneTerm(a, "NoChAnGy", "NoChAnGy");
  a.close();
  tempDir.close();
}
 
Example 5
Source Project: lucene-solr   Source File: TestJapaneseBaseFormFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 6
Source Project: lucene-solr   Source File: TestJapaneseReadingFormFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 7
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new JapaneseKatakanaStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 8
Source Project: lucene-solr   Source File: TestICUNormalizer2Filter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 9
Source Project: lucene-solr   Source File: TestICUTransformFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testOptimizer() throws Exception {
  String rules = "a > b; b > c;"; // convert a's to b's and b's to c's
  Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
  assertTrue(custom.getFilter() == null);
  final KeywordTokenizer input = new KeywordTokenizer();
  input.setReader(new StringReader(""));
  new ICUTransformFilter(input, custom);
  assertTrue(custom.getFilter().equals(new UnicodeSet("[ab]")));
}
 
Example 10
Source Project: lucene-solr   Source File: TestICUTransformFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testOptimizerSurrogate() throws Exception {
  String rules = "\\U00020087 > x;"; // convert CJK UNIFIED IDEOGRAPH-20087 to an x
  Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
  assertTrue(custom.getFilter() == null);
  final KeywordTokenizer input = new KeywordTokenizer();
  input.setReader(new StringReader(""));
  new ICUTransformFilter(input, custom);
  assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
}
 
Example 11
Source Project: lucene-solr   Source File: TestCzechStemmer.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new CzechStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 12
Source Project: lucene-solr   Source File: TestICUTransformFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, Transliterator.getInstance("Any-Latin")));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 13
Source Project: lucene-solr   Source File: TestICUFoldingFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 14
Source Project: lucene-solr   Source File: TestKoreanReadingFormFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new KoreanReadingFormFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 15
Source Project: lucene-solr   Source File: DoubleMetaphoneFilterTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, 8, random().nextBoolean()));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 16
Source Project: lucene-solr   Source File: TestBeiderMorseFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 17
Source Project: lucene-solr   Source File: TestIndicNormalizer.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new IndicNormalizationFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 18
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new SerbianNormalizationRegularFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 19
Source Project: lucene-solr   Source File: TestCJKAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 20
Source Project: lucene-solr   Source File: TestCJKWidthFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new CJKWidthFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 21
Source Project: lucene-solr   Source File: TestGermanStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new GermanStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 22
Source Project: lucene-solr   Source File: TestGermanMinimalStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new GermanMinimalStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 23
Source Project: lucene-solr   Source File: TestGermanNormalizationFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new GermanNormalizationFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 24
Source Project: lucene-solr   Source File: ShingleFilterTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 25
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new NorwegianMinimalStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 26
Source Project: lucene-solr   Source File: TestNorwegianLightStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new NorwegianLightStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 27
Source Project: lucene-solr   Source File: TestFinnishLightStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new FinnishLightStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 28
Source Project: lucene-solr   Source File: TestLatvianStemmer.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example 29
public NaturalSortKeyAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name,
                                      Settings settings) {
    super(indexSettings, name, settings);
    this.collator = createCollator(settings);
    this.digits = settings.getAsInt("digits", 1);
    this.maxTokens = settings.getAsInt("maxTokens", 2);
    this.bufferSize = settings.getAsInt("bufferSize", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
}
 
Example 30
Source Project: lucene-solr   Source File: TestArabicStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ArabicStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}