org.apache.lucene.analysis.core.KeywordTokenizer Java Examples

The following examples show how to use org.apache.lucene.analysis.core.KeywordTokenizer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSynonymMapFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Random random = random();
  final int numIters = atLeast(10);
  for (int i = 0; i < numIters; i++) {
    b = new SynonymMap.Builder(random.nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random.nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new KeywordTokenizer();
        return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, ignoreCase));
      }
    };

    checkAnalysisConsistency(random, analyzer, random.nextBoolean(), "");
    analyzer.close();
  }
}
 
Example #2
Source File: TestPhoneticFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Encoder encoders[] = new Encoder[] {
      new Metaphone(), new DoubleMetaphone(), new Soundex(), new RefinedSoundex(), new Caverphone2()
  };
  for (final Encoder e : encoders) {
    Analyzer a = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new KeywordTokenizer();
        return new TokenStreamComponents(tokenizer, new PhoneticFilter(tokenizer, e, random().nextBoolean()));
      }
    };
    checkOneTerm(a, "", "");
    a.close();
  }
}
 
Example #3
Source File: TestSnowballVocab.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * For the supplied language, run the stemmer against all strings in voc.txt
 * The output should be the same as the string in output.txt
 */
private void assertCorrectOutput(final String snowballLanguage, String zipfile)
    throws IOException {
  if (VERBOSE) System.out.println("checking snowball language: " + snowballLanguage);
  
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer t = new KeywordTokenizer();
      return new TokenStreamComponents(t, new SnowballFilter(t, snowballLanguage));
    }  
  };
  
  assertVocabulary(a, getDataPath(zipfile), "voc.txt", "output.txt");
  a.close();
}
 
Example #4
Source File: TestHunspellStemFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIgnoreCaseNoSideEffects() throws Exception {
  final Dictionary d;
  // no multiple try-with to workaround bogus VerifyError
  InputStream affixStream = TestStemmer.class.getResourceAsStream("simple.aff");
  InputStream dictStream = TestStemmer.class.getResourceAsStream("simple.dic");
  Directory tempDir = getDirectory();
  try {
    d = new Dictionary(tempDir, "dictionary", affixStream, Collections.singletonList(dictStream), true);
  } finally {
    IOUtils.closeWhileHandlingException(affixStream, dictStream);
  }
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new HunspellStemFilter(tokenizer, d));
    }
  };
  checkOneTerm(a, "NoChAnGy", "NoChAnGy");
  a.close();
  tempDir.close();
}
 
Example #5
Source File: TestBengaliNormalizer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new BengaliNormalizationFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #6
Source File: TestKStemmer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new KStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #7
Source File: TestASCIIFoldingFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer,
        new ASCIIFoldingFilter(tokenizer, random().nextBoolean()));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #8
Source File: TestLengthFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new LengthFilter(tokenizer, 0, 5));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #9
Source File: TestCodepointCountFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new CodepointCountFilter(tokenizer, 0, 5));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #10
Source File: TestPortugueseLightStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new PortugueseLightStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #11
Source File: TestHyphenatedWordsFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new HyphenatedWordsFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #12
Source File: TestScandinavianNormalizationFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** check that the empty string doesn't cause issues */
public void testEmptyTerm() throws Exception {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ScandinavianNormalizationFilter(tokenizer));
    } 
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #13
Source File: TestSpanishMinimalStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new EnglishMinimalStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #14
Source File: SynonymLoader.java    From elasticsearch-analysis-synonym with Apache License 2.0 5 votes vote down vote up
protected static Analyzer getAnalyzer(final boolean ignoreCase) {
    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(final String fieldName) {
            final Tokenizer tokenizer = new KeywordTokenizer();
            final TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer;
            return new TokenStreamComponents(tokenizer, stream);
        }
    };
}
 
Example #15
Source File: TestTrimFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new TrimFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #16
Source File: TestElision.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ElisionFilter(tokenizer, FrenchAnalyzer.DEFAULT_ARTICLES));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #17
Source File: TestTurkishLowerCaseFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new TurkishLowerCaseFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #18
Source File: TestItalianLightStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new ItalianLightStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #19
Source File: TestIndonesianStemmer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new IndonesianStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #20
Source File: TestFrenchMinimalStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new FrenchMinimalStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #21
Source File: TestFrenchLightStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new FrenchLightStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #22
Source File: TestSoraniNormalizationFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new SoraniNormalizationFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #23
Source File: TestSoraniStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new SoraniStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #24
Source File: TestBrazilianAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new BrazilianStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #25
Source File: TestGalicianStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new GalicianStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #26
Source File: TestHindiNormalizer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new HindiNormalizationFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #27
Source File: TestHindiStemmer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new HindiStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #28
Source File: TestBulgarianStemmer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new BulgarianStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #29
Source File: TestSwedishLightStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new SwedishLightStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}
 
Example #30
Source File: TestPersianNormalizationFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new PersianNormalizationFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}