org.apache.lucene.analysis.custom.CustomAnalyzer Java Examples

The following examples show how to use org.apache.lucene.analysis.custom.CustomAnalyzer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestOpenNLPPOSFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPOS() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      SENTENCES_posTags, null, null, true);

  analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      null, null, null, true, toPayloads(SENTENCES_posTags));
}
 
Example #2
Source File: DemoTest.java    From HongsCORE with MIT License 6 votes vote down vote up
public static void main(String[] args) throws IOException {
    Analyzer az = CustomAnalyzer.builder()
        //.withTokenizer("Standard")
        .withTokenizer("Name")
        .addTokenFilter("EdgeNGram", "minGramSize", "1", "maxGramSize", "20")
        //.addTokenFilter("ICUTransform", "id", "Han-Latin;NFD;[[:NonspacingMark:][:Space:]] Remove")
        //.addTokenFilter("EdgeNGram", "minGramSize", "1", "maxGramSize", "20")
        .build();

    StringReader      sr = new StringReader(args[0]);
    TokenStream       ts = az.tokenStream  ("" , sr);
    OffsetAttribute   oa = ts.addAttribute (OffsetAttribute.class);
    CharTermAttribute ta = ts.addAttribute (CharTermAttribute.class);

    try {
        ts.reset(); // Resets this stream to the beginning. (Required)
        while (ts.incrementToken()) {
            System.out.println(ta.toString() + "|" + ta.length()
                    + "[" + oa.startOffset() + "," + oa.endOffset() + "]");
        }
        ts.end(  ); // Perform end-of-stream operations, e.g. set the final offset.
    } finally {
        ts.close(); // Release resources associated with this stream.
    }

}
 
Example #3
Source File: NestPathField.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void setArgs(IndexSchema schema, Map<String, String> args) {
  args.putIfAbsent("stored", "false");
  args.putIfAbsent("omitTermFreqAndPositions", "true");
  args.putIfAbsent("omitNorms", "true");
  args.putIfAbsent("maxCharsForDocValues", "-1");
  super.setArgs(schema, args);

  // CustomAnalyzer is easy to use
  CustomAnalyzer customAnalyzer;
  try {
    customAnalyzer = CustomAnalyzer.builder(schema.getResourceLoader())
        .withDefaultMatchVersion(schema.getDefaultLuceneMatchVersion())
        .withTokenizer(KeywordTokenizerFactory.class)
        .addTokenFilter(PatternReplaceFilterFactory.class,
            "pattern", "#\\d*",
            "replace", "all")
        .build();
  } catch (IOException e) {
    throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);//impossible?
  }
  // Solr HTTP Schema APIs don't know about CustomAnalyzer so use TokenizerChain instead
  setIndexAnalyzer(new TokenizerChain(customAnalyzer));
  // leave queryAnalyzer as literal
}
 
Example #4
Source File: AnalysisImplTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Test
public void testAnalyze_custom() {
  AnalysisImpl analysis = new AnalysisImpl();
  Map<String, String> tkParams = new HashMap<>();
  tkParams.put("maxTokenLen", "128");
  CustomAnalyzerConfig.Builder builder = new CustomAnalyzerConfig.Builder(
      "keyword", tkParams)
      .addTokenFilterConfig("lowercase", Collections.emptyMap());
  CustomAnalyzer analyzer = (CustomAnalyzer) analysis.buildCustomAnalyzer(builder.build());
  assertEquals("org.apache.lucene.analysis.custom.CustomAnalyzer", analyzer.getClass().getName());
  assertEquals("org.apache.lucene.analysis.core.KeywordTokenizerFactory", analyzer.getTokenizerFactory().getClass().getName());
  assertEquals("org.apache.lucene.analysis.core.LowerCaseFilterFactory", analyzer.getTokenFilterFactories().get(0).getClass().getName());

  String text = "Apache Lucene";
  List<Analysis.Token> tokens = analysis.analyze(text);
  assertNotNull(tokens);
}
 
Example #5
Source File: TestOpenNLPLemmatizerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test1SentenceDictionaryAndMaxEnt() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
      .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict", "lemmatizerModel", lemmatizerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCE_both, SENTENCE_both_punc, null, null,
      SENTENCE_both_posTags, null, null, true);
}
 
Example #6
Source File: TestOpenNLPPOSFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testNoBreak() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, NO_BREAK, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets,
      null, null, null, true);
}
 
Example #7
Source File: TestOpenNLPTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testTokenizer() throws IOException {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin", "tokenizerModel", "en-test-tokenizer.bin")
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
  assertAnalyzesTo(analyzer, SENTENCE1, SENTENCE1_punc);
}
 
Example #8
Source File: TestOpenNLPTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testTokenizerNoSentenceDetector() throws IOException {
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
        .withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin")
        .build();
  });
  assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'sentenceModel'"));
}
 
Example #9
Source File: TestOpenNLPTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testTokenizerNoTokenizer() throws IOException {
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
        .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin")
        .build();
  });
  assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'tokenizerModel'"));
}
 
Example #10
Source File: TestOpenNLPLemmatizerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test1SentenceDictionaryOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
      .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict")
      .build();
  assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_dict_punc, null, null,
      SENTENCE_posTags, null, null, true);
}
 
Example #11
Source File: TestOpenNLPLemmatizerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test2SentencesDictionaryOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_punc, null, null,
      SENTENCES_posTags, null, null, true);
}
 
Example #12
Source File: TestOpenNLPLemmatizerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test1SentenceMaxEntOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_maxent_punc, null, null,
      SENTENCE_posTags, null, null, true);
}
 
Example #13
Source File: TestOpenNLPLemmatizerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test2SentencesMaxEntOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("OpenNLPLemmatizer", "lemmatizerModel", lemmatizerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_maxent_punc, null, null,
      SENTENCES_posTags, null, null, true);
}
 
Example #14
Source File: TestOpenNLPPOSFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBasic() throws IOException {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
}
 
Example #15
Source File: TestOpenNLPLemmatizerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test2SentencesDictionaryAndMaxEnt() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_punc, null, null,
      SENTENCES_both_posTags, null, null, true);
}
 
Example #16
Source File: TestOpenNLPLemmatizerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testKeywordAttributeAwarenessDictionaryOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter(KeywordRepeatFilterFactory.class)
      .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
      .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_keep_orig_punc, null, null,
      SENTENCES_keep_orig_posTags, null, null, true);
}
 
Example #17
Source File: TestOpenNLPLemmatizerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testKeywordAttributeAwarenessMaxEntOnly() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter(KeywordRepeatFilterFactory.class)
      .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
      .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_max_ent_keep_orig_punc, null, null,
      SENTENCES_keep_orig_posTags, null, null, true);
}
 
Example #18
Source File: TestOpenNLPLemmatizerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testKeywordAttributeAwarenessDictionaryAndMaxEnt() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter(KeywordRepeatFilterFactory.class)
      .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile)
      .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_keep_orig_punc, null, null,
      SENTENCES_both_keep_orig_posTags, null, null, true);
}
 
Example #19
Source File: TokenizerChain.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Copy from CustomAnalyzer. */
public TokenizerChain(CustomAnalyzer customAnalyzer) {
  this(
      customAnalyzer.getCharFilterFactories().toArray(new CharFilterFactory[0]),
      customAnalyzer.getTokenizerFactory(),
      customAnalyzer.getTokenFilterFactories().toArray(new TokenFilterFactory[0]));
  setPositionIncrementGap(customAnalyzer.getPositionIncrementGap(null));
  setVersion(customAnalyzer.getVersion());
  assert customAnalyzer.getOffsetGap(null) == 1; // note: we don't support setting the offset gap
}
 
Example #20
Source File: TokenAnalyzerMaker.java    From lucene4ir with Apache License 2.0 5 votes vote down vote up
public Analyzer createAnalyzer( String tokenFilterFile) {
    Analyzer analyzer = null;
    try {
        lucene4ir.utils.TokenFilters tokenFilters = JAXB.unmarshal(new File(tokenFilterFile), lucene4ir.utils.TokenFilters.class);
        CustomAnalyzer.Builder builder;
        if (tokenFilters.getResourceDir() != null) {
            builder = CustomAnalyzer.builder(Paths.get(tokenFilters.getResourceDir()));
        } else {
            builder = CustomAnalyzer.builder();
        }

        builder.withTokenizer(tokenFilters.getTokenizer());
        for (lucene4ir.utils.TokenFilter filter : tokenFilters.getTokenFilters()) {
            System.out.println("Token filter: " + filter.getName());
            List<lucene4ir.utils.Param> params = filter.getParams();
            if (params.size() > 0) {
                Map<String, String> paramMap = new HashMap<>();
                for (lucene4ir.utils.Param param : params) {
                    paramMap.put(param.getKey(), param.getValue());
                }
                builder.addTokenFilter(filter.getName(), paramMap);
            } else {
                builder.addTokenFilter(filter.getName());
            }
        }
        analyzer = builder.build();


    }
    catch (IOException ioe){
        System.out.println(" caught a " + ioe.getClass() +
                "\n with message: " + ioe.getMessage());
    }

    return analyzer;

}
 
Example #21
Source File: LuceneAnalyzerIntegrationTest.java    From tutorials with MIT License 5 votes vote down vote up
@Test
public void whenUseCustomAnalyzerBuilder_thenAnalyzed() throws IOException {
    Analyzer analyzer = CustomAnalyzer.builder()
        .withTokenizer("standard")
        .addTokenFilter("lowercase")
        .addTokenFilter("stop")
        .addTokenFilter("porterstem")
        .addTokenFilter("capitalization")
        .build();
    List<String> result = analyze(SAMPLE_TEXT, analyzer);

    assertThat(result, contains("Baeldung.com", "Lucen", "Analyz", "Test"));
}
 
Example #22
Source File: TestOpenNLPChunkerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testPayloads() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
      .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      null, null, null, true, toPayloads(SENTENCES_chunks));
}
 
Example #23
Source File: AnalyzerFactory.java    From airsonic-advanced with GNU General Public License v3.0 5 votes vote down vote up
private Builder createArtistAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example #24
Source File: BibleSearchIndex.java    From Quelea with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Create a new empty search index.
 */
public BibleSearchIndex() {
    chapters = new HashMap<>();
    try {
        analyzer = CustomAnalyzer.builder()
                .withTokenizer(StandardTokenizerFactory.class)
                .addTokenFilter(LowerCaseFilterFactory.class)
                .addTokenFilter(ASCIIFoldingFilterFactory.class)
                .build();
        index = new MMapDirectory(Files.createTempDirectory("quelea-mmap-bible").toAbsolutePath());
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, "Couldn't create song search index");
        throw new RuntimeException("Couldn't create song search index", ex);
    }
}
 
Example #25
Source File: SongSearchIndex.java    From Quelea with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Create a new empty search index.
 */
public SongSearchIndex() {
    songs = new HashMap<>();
    try {
        analyzer = CustomAnalyzer.builder()
                .withTokenizer(StandardTokenizerFactory.class)
                .addTokenFilter(LowerCaseFilterFactory.class)
                .addTokenFilter(ASCIIFoldingFilterFactory.class)
                .build();
        index = new MMapDirectory(Files.createTempDirectory("quelea-mmap-song").toAbsolutePath());
    }
    catch(IOException ex) {
        LOGGER.log(Level.SEVERE, "Couldn't create song search index");
        throw new RuntimeException("Couldn't create song search index", ex);
    }
}
 
Example #26
Source File: AnalyzerFactory.java    From airsonic with GNU General Public License v3.0 5 votes vote down vote up
private Builder createDefaultAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example #27
Source File: AnalyzerFactory.java    From airsonic with GNU General Public License v3.0 5 votes vote down vote up
private Builder createArtistAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example #28
Source File: AnalyzerPaneProvider.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void setAnalyzer(Analyzer analyzer) {
  analyzerNameLbl.setText(analyzer.getClass().getName());

  if (analyzer instanceof CustomAnalyzer) {
    CustomAnalyzer customAnalyzer = (CustomAnalyzer) analyzer;

    DefaultListModel<String> charFilterListModel = new DefaultListModel<>();
    customAnalyzer.getCharFilterFactories().stream()
        .map(f -> f.getClass().getSimpleName())
        .forEach(charFilterListModel::addElement);
    charFilterList.setModel(charFilterListModel);

    tokenizerTF.setText(customAnalyzer.getTokenizerFactory().getClass().getSimpleName());

    DefaultListModel<String> tokenFilterListModel = new DefaultListModel<>();
    customAnalyzer.getTokenFilterFactories().stream()
        .map(f -> f.getClass().getSimpleName())
        .forEach(tokenFilterListModel::addElement);
    tokenFilterList.setModel(tokenFilterListModel);

    charFilterList.setBackground(Color.white);
    tokenizerTF.setBackground(Color.white);
    tokenFilterList.setBackground(Color.white);
  } else {
    charFilterList.setModel(new DefaultListModel<>());
    tokenizerTF.setText("");
    tokenFilterList.setModel(new DefaultListModel<>());

    charFilterList.setBackground(Color.lightGray);
    tokenizerTF.setBackground(Color.lightGray);
    tokenFilterList.setBackground(Color.lightGray);
  }
}
 
Example #29
Source File: AnalysisPanelProvider.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
void showAnalysisChainDialog() {
  if (getCurrentAnalyzer() instanceof CustomAnalyzer) {
    CustomAnalyzer analyzer = (CustomAnalyzer) getCurrentAnalyzer();
    new DialogOpener<>(analysisChainDialogFactory).open("Analysis chain", 600, 320,
        (factory) -> {
          factory.setAnalyzer(analyzer);
        });
  }
}
 
Example #30
Source File: AnalysisImpl.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Analyzer buildCustomAnalyzer(CustomAnalyzerConfig config) {
  Objects.requireNonNull(config);
  try {
    // create builder
    CustomAnalyzer.Builder builder = config.getConfigDir()
        .map(path -> CustomAnalyzer.builder(FileSystems.getDefault().getPath(path)))
        .orElse(CustomAnalyzer.builder());

    // set tokenizer
    builder.withTokenizer(config.getTokenizerConfig().getName(), config.getTokenizerConfig().getParams());

    // add char filters
    for (CustomAnalyzerConfig.ComponentConfig cfConf : config.getCharFilterConfigs()) {
      builder.addCharFilter(cfConf.getName(), cfConf.getParams());
    }

    // add token filters
    for (CustomAnalyzerConfig.ComponentConfig tfConf : config.getTokenFilterConfigs()) {
      builder.addTokenFilter(tfConf.getName(), tfConf.getParams());
    }

    // build analyzer
    this.analyzer = builder.build();
    return analyzer;
  } catch (Exception e) {
    throw new LukeException("Failed to build custom analyzer.", e);
  }
}