Java Code Examples for org.apache.lucene.analysis.Analyzer#TokenStreamComponents

The following examples show how to use org.apache.lucene.analysis.Analyzer#TokenStreamComponents . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestKoreanTokenizer.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCustomDictionary() throws Exception {
  Tokenizer tokenizer = new KoreanTokenizer(newAttributeFactory(),
      new TokenInfoDictionary(ResourceScheme.CLASSPATH, "org/apache/lucene/analysis/ko/dict/TokenInfoDictionary"),
      new UnknownDictionary(ResourceScheme.CLASSPATH, "org/apache/lucene/analysis/ko/dict/UnknownDictionary"),
      new ConnectionCosts(ResourceScheme.CLASSPATH, "org/apache/lucene/analysis/ko/dict/ConnectionCosts"),
      readDict(), DecompoundMode.NONE, false, false);
  try (Analyzer a = new Analyzer() {
    @Override
    protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
      return new Analyzer.TokenStreamComponents(tokenizer, tokenizer);
    }
  }) {
    assertTokenStreamContents(a.tokenStream("foo", "커스텀사전검사"),
        new String[] { "커스텀", "사전", "검사"  },
        new int[] { 0, 3, 5 },
        new int[] { 3, 5, 7 },
        7
    );
  }
}
 
Example 2
Source File: HanLPCRFAnalyzer.java    From elasticsearch-analysis-hanlp with Apache License 2.0 5 votes vote down vote up
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(
        TokenizerBuilder.tokenizer(AccessController.doPrivileged((PrivilegedAction<Segment>)() -> {
            try {
                return new CRFLexicalAnalyzer();
            } catch (IOException e) {
                logger.error("can not use crf analyzer, provider default", e);
                return HanLP.newSegment();
            }
        }), configuration));
}
 
Example 3
Source File: HanLPNLPAnalyzer.java    From elasticsearch-analysis-hanlp with Apache License 2.0 5 votes vote down vote up
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(
        TokenizerBuilder.tokenizer(AccessController.doPrivileged((PrivilegedAction<Segment>)() -> {
            try {
                return new PerceptronLexicalAnalyzer();
            } catch (IOException e) {
                logger.error("can not use nlp analyzer, provider default", e);
                return HanLP.newSegment();
            }
        }), configuration));
}
 
Example 4
Source File: HanLPDijkstraAnalyzer.java    From elasticsearch-analysis-hanlp with Apache License 2.0 4 votes vote down vote up
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(TokenizerBuilder.tokenizer(
        AccessController.doPrivileged((PrivilegedAction<Segment>)() -> new DijkstraSegment().enableCustomDictionary(
            false).enablePlaceRecognize(true).enableOrganizationRecognize(true)), configuration));
}
 
Example 5
Source File: HanLPSpeedAnalyzer.java    From elasticsearch-analysis-hanlp with Apache License 2.0 4 votes vote down vote up
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(TokenizerBuilder.tokenizer(AccessController
            .doPrivileged((PrivilegedAction<Segment>)() -> new DoubleArrayTrieSegment().enableCustomDictionary(false)),
        configuration));
}
 
Example 6
Source File: HanLPIndexAnalyzer.java    From elasticsearch-analysis-hanlp with Apache License 2.0 4 votes vote down vote up
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(
        TokenizerBuilder.tokenizer(AccessController.doPrivileged((PrivilegedAction<Segment>)() ->
            HanLP.newSegment().enableIndexMode(true)), configuration));
}
 
Example 7
Source File: HanLPStandardAnalyzer.java    From elasticsearch-analysis-hanlp with Apache License 2.0 4 votes vote down vote up
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(TokenizerBuilder.tokenizer(AccessController.doPrivileged((PrivilegedAction<Segment>)HanLP::newSegment), configuration));
}
 
Example 8
Source File: HanLPNShortAnalyzer.java    From elasticsearch-analysis-hanlp with Apache License 2.0 4 votes vote down vote up
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(TokenizerBuilder.tokenizer(
        AccessController.doPrivileged((PrivilegedAction<Segment>)() -> new NShortSegment().enableCustomDictionary(
            false).enablePlaceRecognize(true).enableOrganizationRecognize(true)), configuration));
}
 
Example 9
Source File: HanLPAnalyzer.java    From elasticsearch-analysis-hanlp with Apache License 2.0 4 votes vote down vote up
@Override
protected Analyzer.TokenStreamComponents createComponents(String fieldName) {
    return new Analyzer.TokenStreamComponents(TokenizerBuilder.tokenizer(AccessController.doPrivileged((PrivilegedAction<Segment>)HanLP::newSegment), configuration));
}