org.apache.lucene.analysis.core.KeywordTokenizer Java Examples

The following examples show how to use org.apache.lucene.analysis.core.KeywordTokenizer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: lucene-solr   Author: apache   File: TestPhoneticFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Encoder encoders[] = new Encoder[] {
      new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2()
  };
  for (final Encoder e : encoders) {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new KeywordTokenizer();
        return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean()));
      }
    };
    checkOneTerm(a, "", "");
    a.close();
  }
}
 
Example #2
Source Project: lucene-solr   Author: apache   File: TestSnowballVocab.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * For the supplied language, run the stemmer against all strings in voc.txt
 * The output should be the same as the string in output.txt
 */
private void assertCorrectOutput(final String snowballLanguage, String zipfile)
    throws IOException {
  if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);
  
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer t = new KeywordTokenizer();
      return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
    }  
  };
  
  assertVocabulary(a, getDataPath(zipfile), "voc.txt", "output.txt");
  a.close();
}
 
Example #3
Source Project: lucene-solr   Author: apache   File: TestSynonymMapFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Random random = random();
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random.nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random.nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new KeywordTokenizer();
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
      }
    };

    checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
    analyzer.close();
  }
}
 
Example #4
Source Project: lucene-solr   Author: apache   File: TestHunspellStemFilter.java    License: Apache License 2.0 6 votes vote down vote up
public void testIgnoreCaseNoSideEffects() throws Exception {
  final Dictionary d;
  // no multiple try-with to workaround bogus VerifyError
  InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
  InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
  Directory tempDir = getDirectory();
  try {
    d = new Dictionary(tempDir, "dictionary", affixStream, Collections.singletonList(dictStream), true);
  } finally {
    IOUtils.closeWhileHandlingException(affixStream, dictStream);
  }
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d));
    }
  };
  checkOneTerm(a, "NoChAnGy", "NoChAnGy");
  a.close();
  tempDir.close();
}
 
Example #5
Source Project: lucene-solr   Author: apache   File: TestJapaneseBaseFormFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new JapaneseBaseFormFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #6
Source Project: lucene-solr   Author: apache   File: TestJapaneseReadingFormFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new JapaneseReadingFormFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #7
Source Project: lucene-solr   Author: apache   File: TestJapaneseKatakanaStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new JapaneseKatakanaStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #8
Source Project: lucene-solr   Author: apache   File: TestICUNormalizer2Filter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #9
Source Project: lucene-solr   Author: apache   File: TestICUTransformFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testOptimizer() throws Exception {
  String rules = "a > b; b > c;"; // convert a's to b's and b's to c's
  Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
  assertTrue(custom.getFilter() == null);
  final KeywordTokenizer input = new KeywordTokenizer();
  input.setReader(new StringReader(""));
  new ICUTransformFilter(input, custom);
  assertTrue(custom.getFilter().equals(new UnicodeSet("[ab]")));
}
 
Example #10
Source Project: lucene-solr   Author: apache   File: TestICUTransformFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testOptimizerSurrogate() throws Exception {
  String rules = "\\U00020087 > x;"; // convert CJK UNIFIED IDEOGRAPH-20087 to an x
  Transliterator custom = Transliterator.createFromRules("test", rules, Transliterator.FORWARD);
  assertTrue(custom.getFilter() == null);
  final KeywordTokenizer input = new KeywordTokenizer();
  input.setReader(new StringReader(""));
  new ICUTransformFilter(input, custom);
  assertTrue(custom.getFilter().equals(new UnicodeSet("[\\U00020087]")));
}
 
Example #11
Source Project: lucene-solr   Author: apache   File: TestCzechStemmer.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new CzechStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #12
Source Project: lucene-solr   Author: apache   File: TestICUTransformFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ICUTransformFilter(tokenizer, Transliterator.getInstance("Any-Latin")));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #13
Source Project: lucene-solr   Author: apache   File: TestICUFoldingFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ICUFoldingFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #14
Source Project: lucene-solr   Author: apache   File: TestKoreanReadingFormFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new KoreanReadingFormFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #15
Source Project: lucene-solr   Author: apache   File: DoubleMetaphoneFilterTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new DoubleMetaphoneFilter(tokenizer, 8, random().nextBoolean()));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #16
Source Project: lucene-solr   Author: apache   File: TestBeiderMorseFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new BeiderMorseFilter(tokenizer, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true)));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #17
Source Project: lucene-solr   Author: apache   File: TestIndicNormalizer.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new IndicNormalizationFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #18
Source Project: lucene-solr   Author: apache   File: TestSerbianNormalizationRegularFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new SerbianNormalizationRegularFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #19
Source Project: lucene-solr   Author: apache   File: TestCJKAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new CJKBigramFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #20
Source Project: lucene-solr   Author: apache   File: TestCJKWidthFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new CJKWidthFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #21
Source Project: lucene-solr   Author: apache   File: TestGermanStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new GermanStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #22
Source Project: lucene-solr   Author: apache   File: TestGermanMinimalStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new GermanMinimalStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #23
Source Project: lucene-solr   Author: apache   File: TestGermanNormalizationFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new GermanNormalizationFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #24
Source Project: lucene-solr   Author: apache   File: ShingleFilterTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #25
Source Project: lucene-solr   Author: apache   File: TestNorwegianMinimalStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new NorwegianMinimalStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #26
Source Project: lucene-solr   Author: apache   File: TestNorwegianLightStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new NorwegianLightStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #27
Source Project: lucene-solr   Author: apache   File: TestFinnishLightStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new FinnishLightStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #28
Source Project: lucene-solr   Author: apache   File: TestLatvianStemmer.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new LatvianStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #29
public NaturalSortKeyAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name,
                                      Settings settings) {
    super(indexSettings, name, settings);
    this.collator = createCollator(settings);
    this.digits = settings.getAsInt("digits", 1);
    this.maxTokens = settings.getAsInt("maxTokens", 2);
    this.bufferSize = settings.getAsInt("bufferSize", KeywordTokenizer.DEFAULT_BUFFER_SIZE);
}
 
Example #30
Source Project: lucene-solr   Author: apache   File: TestArabicStemFilter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ArabicStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}