org.apache.lucene.analysis.util.TokenizerFactory Java Examples

The following examples show how to use org.apache.lucene.analysis.util.TokenizerFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HanLpTokenizerFactoryTestCase.java    From jstarcraft-nlp with Apache License 2.0 6 votes vote down vote up
@Test
public void testCreate() throws Exception {
    Map<String, String> args = new TreeMap<>();
    args.put("enableTraditionalChineseMode", "true");
    TokenizerFactory factory = new HanLpTokenizerFactory(args);
    Tokenizer tokenizer = factory.create(null);

    tokenizer.setReader(new StringReader("大衛貝克漢不僅僅是名著名球員,球場以外,其妻為前" + "辣妹合唱團成員維多利亞·碧咸,亦由於他擁有" + "突出外表、百變髮型及正面的形象,以至自己" + "品牌的男士香水等商品,及長期擔任運動品牌" + "Adidas的代言人,因此對大眾傳播媒介和時尚界" + "等方面都具很大的影響力,在足球圈外所獲得的" + "認受程度可謂前所未見。"));
    tokenizer.reset();
    while (tokenizer.incrementToken()) {
        CharTermAttribute attribute = tokenizer.getAttribute(CharTermAttribute.class);
        // 偏移量
        OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
        // 距离
        PositionIncrementAttribute positionAttr = tokenizer.getAttribute(PositionIncrementAttribute.class);
        // 词性
        TypeAttribute typeAttr = tokenizer.getAttribute(TypeAttribute.class);
        System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
    }
}
 
Example #2
Source File: ResourceLoaderTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCacheWrongType() throws Exception {
  clearCache();

  SolrResourceLoader loader = new SolrResourceLoader();
  @SuppressWarnings({"rawtypes"})
  Class[] params = { Map.class };
  Map<String,String> args = Map.of("minGramSize", "1", "maxGramSize", "2");
  final String className = "solr.NGramTokenizerFactory";

  // We could fail here since the class name and expected type don't match, but instead we try to infer what the user actually meant
  TokenFilterFactory tff = loader.newInstance(className, TokenFilterFactory.class, new String[0], params, new Object[]{new HashMap<>(args)});
  assertNotNull("Did not load TokenFilter when asking for corresponding Tokenizer", tff);

  // This should work, but won't if earlier call succeeding corrupting the cache
  TokenizerFactory tf = loader.newInstance(className, TokenizerFactory.class, new String[0], params, new Object[]{new HashMap<>(args)});
  assertNotNull("Did not load Tokenizer after bad call earlier", tf);
  loader.close();
}
 
Example #3
Source File: AnalysisImpl.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void addExternalJars(List<String> jarFiles) {
  List<URL> urls = new ArrayList<>();

  for (String jarFile : jarFiles) {
    Path path = FileSystems.getDefault().getPath(jarFile);
    if (!Files.exists(path) || !jarFile.endsWith(".jar")) {
      throw new LukeException(String.format(Locale.ENGLISH, "Invalid jar file path: %s", jarFile));
    }
    try {
      URL url = path.toUri().toURL();
      urls.add(url);
    } catch (IOException e) {
      throw new LukeException(e.getMessage(), e);
    }
  }

  // reload available tokenizers, charfilters, and tokenfilters
  URLClassLoader classLoader = new URLClassLoader(
      urls.toArray(new URL[0]), this.getClass().getClassLoader());
  CharFilterFactory.reloadCharFilters(classLoader);
  TokenizerFactory.reloadTokenizers(classLoader);
  TokenFilterFactory.reloadTokenFilters(classLoader);
}
 
Example #4
Source File: SolrUtil.java    From ambari-logsearch with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static boolean checkTokenizer(Class<? extends TokenizerFactory> tokenizerFactoryClass, Map<String, Object> fieldTypeInfoMap) {
  HashMap<String, Object> analyzer = (HashMap<String, Object>) fieldTypeInfoMap.get("analyzer");
  HashMap<String, Object> tokenizerMap = (HashMap<String, Object>)MapUtils.getObject(analyzer, "tokenizer");
  if (tokenizerMap != null) {
    String tokenizerClass = (String) tokenizerMap.get("class");
    if (StringUtils.isNotEmpty(tokenizerClass)) {
      tokenizerClass = tokenizerClass.replace("solr.", "");
      return tokenizerClass.equalsIgnoreCase(tokenizerFactoryClass.getSimpleName());
    }
  }
  
  return false;
}
 
Example #5
Source File: AnalysisFactoryTestCase.java    From crate with Apache License 2.0 5 votes vote down vote up
public void testTokenizers() {
    Set<String> missing = new TreeSet<String>();
    missing.addAll(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers()
                       .stream().map(key -> key.toLowerCase(Locale.ROOT)).collect(Collectors.toSet()));
    missing.removeAll(getTokenizers().keySet());
    assertTrue("new tokenizers found, please update KNOWN_TOKENIZERS: " + missing.toString(), missing.isEmpty());
}
 
Example #6
Source File: PluginsService.java    From crate with Apache License 2.0 5 votes vote down vote up
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}
 
Example #7
Source File: MMSegTokenizerFactoryTest.java    From mmseg4j-solr with Apache License 2.0 5 votes vote down vote up
private Dictionary getDictionaryByFieldType(String fieldTypeName) {
	FieldType ft = h.getCore().getLatestSchema().getFieldTypeByName(fieldTypeName);
	Analyzer a = ft.getIndexAnalyzer();
	Assert.assertEquals(a.getClass(), TokenizerChain.class);
	
	TokenizerChain tc = (TokenizerChain) a;
	TokenizerFactory tf = tc.getTokenizerFactory();
	Assert.assertEquals(tf.getClass(), MMSegTokenizerFactory.class);
	
	MMSegTokenizerFactory mtf = (MMSegTokenizerFactory) tf;
	
	Assert.assertNotNull(mtf.dic);
	return mtf.dic;
}
 
Example #8
Source File: HanLPTokenizerFactoryTest.java    From hanlp-lucene-plugin with Apache License 2.0 5 votes vote down vote up
public void testCreate() throws Exception
{
    Map<String, String> args = new TreeMap<>();
    args.put("enableTraditionalChineseMode", "true");
    TokenizerFactory factory = new HanLPTokenizerFactory(args);
    Tokenizer tokenizer = factory.create(null);

    tokenizer.setReader(new StringReader("大衛貝克漢不僅僅是名著名球員,球場以外,其妻為前" +
                                                 "辣妹合唱團成員維多利亞·碧咸,亦由於他擁有" +
                                                 "突出外表、百變髮型及正面的形象,以至自己" +
                                                 "品牌的男士香水等商品,及長期擔任運動品牌" +
                                                 "Adidas的代言人,因此對大眾傳播媒介和時尚界" +
                                                 "等方面都具很大的影響力,在足球圈外所獲得的" +
                                                 "認受程度可謂前所未見。"));
    tokenizer.reset();
    while (tokenizer.incrementToken())
    {
        CharTermAttribute attribute = tokenizer.getAttribute(CharTermAttribute.class);
        // 偏移量
        OffsetAttribute offsetAtt = tokenizer.getAttribute(OffsetAttribute.class);
        // 距离
        PositionIncrementAttribute positionAttr = tokenizer.getAttribute(PositionIncrementAttribute.class);
        // 词性
        TypeAttribute typeAttr = tokenizer.getAttribute(TypeAttribute.class);
        System.out.printf("[%d:%d %d] %s/%s\n", offsetAtt.startOffset(), offsetAtt.endOffset(), positionAttr.getPositionIncrement(), attribute, typeAttr.type());
    }
}
 
Example #9
Source File: FieldAnalysisRequestHandlerTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test //See SOLR-8460
public void testCustomAttribute() throws Exception {
  FieldAnalysisRequest request = new FieldAnalysisRequest();
  request.addFieldType("skutype1");
  request.setFieldValue("hi, 3456-12 a Test");
  request.setShowMatch(false);
  FieldType fieldType = new TextField();
  Analyzer analyzer = new TokenizerChain(
      new TokenizerFactory(Collections.emptyMap()) {
        @Override
        public Tokenizer create(AttributeFactory factory) {
          return new CustomTokenizer(factory);
        }
      },
      new TokenFilterFactory[] {
          new TokenFilterFactory(Collections.emptyMap()) {
            @Override
            public TokenStream create(TokenStream input) {
              return new CustomTokenFilter(input);
            }
          }
      }
  );
  fieldType.setIndexAnalyzer(analyzer);

  @SuppressWarnings({"rawtypes"})
  NamedList<NamedList> result = handler.analyzeValues(request, fieldType, "fieldNameUnused");
  // just test that we see "900" in the flags attribute here
  @SuppressWarnings({"unchecked", "rawtypes"})
  List<NamedList> tokenInfoList = (List<NamedList>) result.findRecursive("index", CustomTokenFilter.class.getName());
  // '1' from CustomTokenFilter plus 900 from CustomFlagsAttributeImpl.
  assertEquals(901, tokenInfoList.get(0).get("org.apache.lucene.analysis.tokenattributes.FlagsAttribute#flags"));
}
 
Example #10
Source File: TokenizerChain.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** 
 * Creates a new TokenizerChain.
 *
 * @param charFilters Factories for the CharFilters to use, if any - if null, will be treated as if empty.
 * @param tokenizer Factory for the Tokenizer to use, must not be null.
 * @param filters Factories for the TokenFilters to use if any- if null, will be treated as if empty.
 */
public TokenizerChain(CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
  charFilters = null == charFilters ? EMPTY_CHAR_FITLERS : charFilters;
  filters = null == filters ? EMPTY_TOKEN_FITLERS : filters;
  if (null == tokenizer) {
    throw new NullPointerException("TokenizerFactory must not be null");
  }
  
  this.charFilters = charFilters;
  this.tokenizer = tokenizer;
  this.filters = filters;
}
 
Example #11
Source File: TestFactories.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void doTestTokenizer(String tokenizer) throws IOException {
  Class<? extends TokenizerFactory> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
  TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:
    if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
      // beast it just a little, it shouldnt throw exceptions:
      // (it should have thrown them in initialize)
      Analyzer a = new FactoryAnalyzer(factory, null, null);
      checkRandomData(random(), a, 3, 20, false, false);
      a.close();
    }
  }
}
 
Example #12
Source File: TestFactories.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test() throws IOException {
  for (String tokenizer : TokenizerFactory.availableTokenizers()) {
    doTestTokenizer(tokenizer);
  }
  
  for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
    doTestTokenFilter(tokenFilter);
  }
  
  for (String charFilter : CharFilterFactory.availableCharFilters()) {
    doTestCharFilter(charFilter);
  }
}
 
Example #13
Source File: TestCustomAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testNullTokenizerFactory() throws Exception {
  expectThrows(NullPointerException.class, () -> {
    CustomAnalyzer.builder()
      .withTokenizer((Class<TokenizerFactory>) null)
      .build();
  });
}
 
Example #14
Source File: CustomAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Uses the given tokenizer.
 * @param name is used to look up the factory with {@link TokenizerFactory#forName(String, Map)}.
 *  The list of possible names can be looked up with {@link TokenizerFactory#availableTokenizers()}.
 * @param params the map of parameters to be passed to factory. The map must be modifiable.
 */
public Builder withTokenizer(String name, Map<String,String> params) throws IOException {
  Objects.requireNonNull(name, "Tokenizer name may not be null");
  tokenizer.set(applyResourceLoader(TokenizerFactory.forName(name, applyDefaultParams(params))));
  componentsAdded = true;
  return this;
}
 
Example #15
Source File: CustomAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Uses the given tokenizer.
 * @param factory class that is used to create the tokenizer.
 * @param params the map of parameters to be passed to factory. The map must be modifiable.
 */
public Builder withTokenizer(Class<? extends TokenizerFactory> factory, Map<String,String> params) throws IOException {
  Objects.requireNonNull(factory, "Tokenizer factory may not be null");
  tokenizer.set(applyResourceLoader(newFactoryClassInstance(factory, applyDefaultParams(params))));
  componentsAdded = true;
  return this;
}
 
Example #16
Source File: CustomAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
CustomAnalyzer(Version defaultMatchVersion, CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] tokenFilters, Integer posIncGap, Integer offsetGap) {
  this.charFilters = charFilters;
  this.tokenizer = tokenizer;
  this.tokenFilters = tokenFilters;
  this.posIncGap = posIncGap;
  this.offsetGap = offsetGap;
  if (defaultMatchVersion != null) {
    setVersion(defaultMatchVersion);
  }
}
 
Example #17
Source File: PluginsService.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}
 
Example #18
Source File: SynonymFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException {
  Class<? extends TokenizerFactory> clazz = loader.findClass(cname, TokenizerFactory.class);
  try {
    TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(tokArgs);
    if (tokFactory instanceof ResourceLoaderAware) {
      ((ResourceLoaderAware) tokFactory).inform(loader);
    }
    return tokFactory;
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example #19
Source File: SynonymGraphFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private TokenizerFactory loadTokenizerFactory(ResourceLoader loader, String cname) throws IOException {
  Class<? extends TokenizerFactory> clazz = loader.findClass(cname, TokenizerFactory.class);
  try {
    TokenizerFactory tokFactory = clazz.getConstructor(Map.class).newInstance(tokArgs);
    if (tokFactory instanceof ResourceLoaderAware) {
      ((ResourceLoaderAware) tokFactory).inform(loader);
    }
    return tokFactory;
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example #20
Source File: AlfrescoFieldType.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
public void add(Object current)
{
    if (!(current instanceof MultiTermAwareComponent))
        return;
    AbstractAnalysisFactory newComponent = ((MultiTermAwareComponent) current).getMultiTermComponent();
    if (newComponent instanceof TokenFilterFactory)
    {
        if (filters == null)
        {
            filters = new ArrayList<TokenFilterFactory>(2);
        }
        filters.add((TokenFilterFactory) newComponent);
    }
    else if (newComponent instanceof TokenizerFactory)
    {
        tokenizer = (TokenizerFactory) newComponent;
    }
    else if (newComponent instanceof CharFilterFactory)
    {
        if (charFilters == null)
        {
            charFilters = new ArrayList<CharFilterFactory>(1);
        }
        charFilters.add((CharFilterFactory) newComponent);

    }
    else
    {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent);
    }
}
 
Example #21
Source File: TestFactories.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void doTestTokenizer(String tokenizer) throws IOException {
  Class<? extends TokenizerFactory> factoryClazz = TokenizerFactory.lookupClass(tokenizer);
  TokenizerFactory factory = (TokenizerFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:
    if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
      // beast it just a little, it shouldnt throw exceptions:
      // (it should have thrown them in initialize)
      Analyzer a = new FactoryAnalyzer(factory, null, null);
      checkRandomData(random(), a, 3, 20, false, false);
      a.close();
    }
  }
}
 
Example #22
Source File: TestFactories.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test() throws IOException {
  for (String tokenizer : TokenizerFactory.availableTokenizers()) {
    doTestTokenizer(tokenizer);
  }
  
  for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
    doTestTokenFilter(tokenFilter);
  }
  
  for (String charFilter : CharFilterFactory.availableCharFilters()) {
    doTestCharFilter(charFilter);
  }
}
 
Example #23
Source File: AnalyzerFactoryTask.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * This method looks up a class with its fully qualified name (FQN), or a short-name
 * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
 * as the package prefix (e.g. "standard.ClassicTokenizerFactory" -&gt;
 * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
 *
 * If className contains a period, the class is first looked up as-is, assuming that it
 * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
 * package prefix to the class name.
 *
 * If className does not contain a period, the analysis SPI *Factory.lookupClass()
 * methods are used to find the class.
 *
 * @param className The name or the short name of the class.
 * @param expectedType The superclass className is expected to extend
 * @return the loaded class.
 * @throws ClassNotFoundException if lookup fails
 */
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
    throws ClassNotFoundException {
  if (className.contains(".")) {
    try {
      // First, try className == FQN
      return Class.forName(className).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
      try {
        // Second, retry lookup after prepending the Lucene analysis package prefix
        return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
      } catch (ClassNotFoundException e1) {
        throw new ClassNotFoundException("Can't find class '" + className
                                         + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
      }
    }
  }
  // No dot - use analysis SPI lookup
  final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
  if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
    return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
    return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
    return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  }

  throw new ClassNotFoundException("Can't find class '" + className + "'");
}
 
Example #24
Source File: AnalyzerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
                       TokenizerFactory tokenizerFactory,
                       List<TokenFilterFactory> tokenFilterFactories) {
  this.charFilterFactories = charFilterFactories;
  assert null != tokenizerFactory;
  this.tokenizerFactory = tokenizerFactory;
  this.tokenFilterFactories = tokenFilterFactories;
}
 
Example #25
Source File: TestHMMChineseTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Test showing the behavior */
public void testSimple() throws Exception {
  Reader reader = new StringReader("我购买了道具和服装。");
  TokenizerFactory factory = new HMMChineseTokenizerFactory(new HashMap<String,String>());
  Tokenizer tokenizer = factory.create(newAttributeFactory());
  tokenizer.setReader(reader);
  // TODO: fix smart chinese to not emit punctuation tokens
  // at the moment: you have to clean up with WDF, or use the stoplist, etc
  assertTokenStreamContents(tokenizer, 
     new String[] { "我", "购买", "了", "道具", "和", "服装", "," });
}
 
Example #26
Source File: CustomAnalyzer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Returns the tokenizer that is used in this analyzer. */
public TokenizerFactory getTokenizerFactory() {
  return tokenizer;
}
 
Example #27
Source File: AnalysisChainDialogFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private JPanel analysisChain() {
  JPanel panel = new JPanel(new GridBagLayout());
  panel.setOpaque(false);

  GridBagConstraints c = new GridBagConstraints();
  c.fill = GridBagConstraints.HORIZONTAL;
  c.insets = new Insets(5, 5, 5, 5);

  c.gridx = 0;
  c.gridy = 0;
  c.weightx = 0.1;
  c.weighty = 0.5;
  panel.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.dialog.chain.label.charfilters")), c);

  String[] charFilters = analyzer.getCharFilterFactories().stream().map(f -> CharFilterFactory.findSPIName(f.getClass())).toArray(String[]::new);
  JList<String> charFilterList = new JList<>(charFilters);
  charFilterList.setVisibleRowCount(charFilters.length == 0 ? 1 : Math.min(charFilters.length, 5));
  c.gridx = 1;
  c.gridy = 0;
  c.weightx = 0.5;
  c.weighty = 0.5;
  panel.add(new JScrollPane(charFilterList), c);

  c.gridx = 0;
  c.gridy = 1;
  c.weightx = 0.1;
  c.weighty = 0.1;
  panel.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.dialog.chain.label.tokenizer")), c);

  String tokenizer = TokenizerFactory.findSPIName(analyzer.getTokenizerFactory().getClass());
  JTextField tokenizerTF = new JTextField(tokenizer);
  tokenizerTF.setColumns(30);
  tokenizerTF.setEditable(false);
  tokenizerTF.setPreferredSize(new Dimension(300, 25));
  tokenizerTF.setBorder(BorderFactory.createLineBorder(Color.gray));
  c.gridx = 1;
  c.gridy = 1;
  c.weightx = 0.5;
  c.weighty = 0.1;
  panel.add(tokenizerTF, c);

  c.gridx = 0;
  c.gridy = 2;
  c.weightx = 0.1;
  c.weighty = 0.5;
  panel.add(new JLabel(MessageUtils.getLocalizedMessage("analysis.dialog.chain.label.tokenfilters")), c);

  String[] tokenFilters = analyzer.getTokenFilterFactories().stream().map(f -> TokenFilterFactory.findSPIName(f.getClass())).toArray(String[]::new);
  JList<String> tokenFilterList = new JList<>(tokenFilters);
  tokenFilterList.setVisibleRowCount(tokenFilters.length == 0 ? 1 : Math.min(tokenFilters.length, 5));
  tokenFilterList.setMinimumSize(new Dimension(300, 25));
  c.gridx = 1;
  c.gridy = 2;
  c.weightx = 0.5;
  c.weighty = 0.5;
  panel.add(new JScrollPane(tokenFilterList), c);

  return panel;
}
 
Example #28
Source File: AnalysisImpl.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public Collection<String> getAvailableTokenizers() {
  return TokenizerFactory.availableTokenizers().stream().sorted().collect(Collectors.toList());
}
 
Example #29
Source File: TokenizerChain.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** @return the TokenizerFactory in use, will never be null */
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
 
Example #30
Source File: TestFactories.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
FactoryAnalyzer(TokenizerFactory tokenizer, TokenFilterFactory tokenfilter, CharFilterFactory charFilter) {
  assert tokenizer != null;
  this.tokenizer = tokenizer;
  this.charFilter = charFilter;
  this.tokenfilter = tokenfilter;
}