Java Code Examples for org.apache.lucene.analysis.util.TokenFilterFactory

The following examples show how to use org.apache.lucene.analysis.util.TokenFilterFactory. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: AnalysisImpl.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void addExternalJars(List<String> jarFiles) {
  List<URL> urls = new ArrayList<>();

  for (String jarFile : jarFiles) {
    Path path = FileSystems.getDefault().getPath(jarFile);
    if (!Files.exists(path) || !jarFile.endsWith(".jar")) {
      throw new LukeException(String.format(Locale.ENGLISH, "Invalid jar file path: %s", jarFile));
    }
    try {
      URL url = path.toUri().toURL();
      urls.add(url);
    } catch (IOException e) {
      throw new LukeException(e.getMessage(), e);
    }
  }

  // reload available tokenizers, charfilters, and tokenfilters
  URLClassLoader classLoader = new URLClassLoader(
      urls.toArray(new URL[0]), this.getClass().getClassLoader());
  CharFilterFactory.reloadCharFilters(classLoader);
  TokenizerFactory.reloadTokenizers(classLoader);
  TokenFilterFactory.reloadTokenFilters(classLoader);
}
 
Example 2
Source Project: lucene-solr   Source File: TestSynonymFilterFactory.java    License: Apache License 2.0 6 votes vote down vote up
/** Test that analyzer and tokenizerFactory is both specified */
public void testAnalyzer() throws Exception {
  final String analyzer = CJKAnalyzer.class.getName();
  final String tokenizerFactory = PatternTokenizerFactory.class.getName();
  TokenFilterFactory factory = null;

  factory = tokenFilterFactory("Synonym",
      "synonyms", "synonyms2.txt",
      "analyzer", analyzer);
  assertNotNull(factory);

  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    tokenFilterFactory("Synonym",
        "synonyms", "synonyms.txt",
        "analyzer", analyzer,
        "tokenizerFactory", tokenizerFactory);
  });
  assertTrue(expected.getMessage().contains("Analyzer and TokenizerFactory can't be specified both"));
}
 
Example 3
Source Project: lucene-solr   Source File: TestCustomAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
public void testWhitespaceFactoryWithFolding() throws Exception {
  CustomAnalyzer a = CustomAnalyzer.builder()
      .withTokenizer(WhitespaceTokenizerFactory.class)
      .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "true")
      .addTokenFilter(LowerCaseFilterFactory.class)
      .build();
  
  assertSame(WhitespaceTokenizerFactory.class, a.getTokenizerFactory().getClass());
  assertEquals(Collections.emptyList(), a.getCharFilterFactories());
  List<TokenFilterFactory> tokenFilters = a.getTokenFilterFactories();
  assertEquals(2, tokenFilters.size());
  assertSame(ASCIIFoldingFilterFactory.class, tokenFilters.get(0).getClass());
  assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
  assertEquals(0, a.getPositionIncrementGap("dummy"));
  assertEquals(1, a.getOffsetGap("dummy"));
  assertSame(Version.LATEST, a.getVersion());

  assertAnalyzesTo(a, "foo bar FOO BAR", 
      new String[] { "foo", "bar", "foo", "bar" },
      new int[]    { 1,     1,     1,     1});
  assertAnalyzesTo(a, "föó bär FÖÖ BAR", 
      new String[] { "foo", "föó", "bar", "bär", "foo", "föö", "bar" },
      new int[]    { 1,     0,     1,     0,     1,     0,     1});
  a.close();
}
 
Example 4
Source Project: lucene-solr   Source File: TestCustomAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
public void testWhitespaceWithFolding() throws Exception {
  CustomAnalyzer a = CustomAnalyzer.builder()
      .withTokenizer("whitespace")
      .addTokenFilter("asciifolding", "preserveOriginal", "true")
      .addTokenFilter("lowercase")
      .build();
  
  assertSame(WhitespaceTokenizerFactory.class, a.getTokenizerFactory().getClass());
  assertEquals(Collections.emptyList(), a.getCharFilterFactories());
  List<TokenFilterFactory> tokenFilters = a.getTokenFilterFactories();
  assertEquals(2, tokenFilters.size());
  assertSame(ASCIIFoldingFilterFactory.class, tokenFilters.get(0).getClass());
  assertSame(LowerCaseFilterFactory.class, tokenFilters.get(1).getClass());
  assertEquals(0, a.getPositionIncrementGap("dummy"));
  assertEquals(1, a.getOffsetGap("dummy"));
  assertSame(Version.LATEST, a.getVersion());

  assertAnalyzesTo(a, "foo bar FOO BAR", 
      new String[] { "foo", "bar", "foo", "bar" },
      new int[]    { 1,     1,     1,     1});
  assertAnalyzesTo(a, "föó bär FÖÖ BAR", 
      new String[] { "foo", "föó", "bar", "bär", "foo", "föö", "bar" },
      new int[]    { 1,     0,     1,     0,     1,     0,     1});
  a.close();
}
 
Example 5
Source Project: lucene-solr   Source File: TestCustomAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
public void testStopWordsFromClasspath() throws Exception {
  CustomAnalyzer a = CustomAnalyzer.builder()
      .withTokenizer(WhitespaceTokenizerFactory.class)
      .addTokenFilter("stop",
          "ignoreCase", "true",
          "words", "org/apache/lucene/analysis/custom/teststop.txt",
          "format", "wordset")
      .build();
  
  assertSame(WhitespaceTokenizerFactory.class, a.getTokenizerFactory().getClass());
  assertEquals(Collections.emptyList(), a.getCharFilterFactories());
  List<TokenFilterFactory> tokenFilters = a.getTokenFilterFactories();
  assertEquals(1, tokenFilters.size());
  assertSame(StopFilterFactory.class, tokenFilters.get(0).getClass());
  assertEquals(0, a.getPositionIncrementGap("dummy"));
  assertEquals(1, a.getOffsetGap("dummy"));
  assertSame(Version.LATEST, a.getVersion());

  assertAnalyzesTo(a, "foo Foo Bar", new String[0]);
  a.close();
}
 
Example 6
Source Project: lucene-solr   Source File: TestAsciiFoldingFilterFactory.java    License: Apache License 2.0 6 votes vote down vote up
public void testMultiTermAnalysis() throws IOException {
  TokenFilterFactory factory = new ASCIIFoldingFilterFactory(Collections.emptyMap());
  TokenStream stream = new CannedTokenStream(new Token("Été", 0, 3));
  stream = factory.create(stream);
  assertTokenStreamContents(stream, new String[] { "Ete" });

  stream = new CannedTokenStream(new Token("Été", 0, 3));
  stream = factory.normalize(stream);
  assertTokenStreamContents(stream, new String[] { "Ete" });

  factory = new ASCIIFoldingFilterFactory(new HashMap<>(Collections.singletonMap("preserveOriginal", "true")));
  stream = new CannedTokenStream(new Token("Été", 0, 3));
  stream = factory.create(stream);
  assertTokenStreamContents(stream, new String[] { "Ete", "Été" });

  stream = new CannedTokenStream(new Token("Été", 0, 3));
  stream = factory.normalize(stream);
  assertTokenStreamContents(stream, new String[] { "Ete" });
}
 
Example 7
Source Project: lucene-solr   Source File: SolrQueryParserBase.java    License: Apache License 2.0 6 votes vote down vote up
protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) {
  if (leadingWildcards == null) leadingWildcards = new HashMap<>();
  ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType);
  if (fac != null || leadingWildcards.containsKey(fieldType)) {
    return fac;
  }

  Analyzer a = fieldType.getIndexAnalyzer();
  if (a instanceof TokenizerChain) {
    // examine the indexing analysis chain if it supports leading wildcards
    TokenizerChain tc = (TokenizerChain)a;
    TokenFilterFactory[] factories = tc.getTokenFilterFactories();
    for (TokenFilterFactory factory : factories) {
      if (factory instanceof ReversedWildcardFilterFactory) {
        fac = (ReversedWildcardFilterFactory)factory;
        break;
      }
    }
  }

  leadingWildcards.put(fieldType, fac);
  return fac;
}
 
Example 8
/** 
 * Helper method, public for testing purposes only.
 * <p>
 * Given an analyzer, inspects it to determine if:
 * <ul>
 *  <li>it is a {@link TokenizerChain}</li>
 *  <li>it contains exactly one instance of {@link ShingleFilterFactory}</li>
 * </ul>
 * <p>
 * If these these conditions are met, then this method returns the <code>maxShingleSize</code> 
 * in effect for this analyzer, otherwise returns -1.
 * </p>
 * 
 * @param analyzer An analyzer inspect
 * @return <code>maxShingleSize</code> if available
 * @lucene.internal
 */
public static int getMaxShingleSize(Analyzer analyzer) {
  if (!TokenizerChain.class.isInstance(analyzer)) {
    return -1;
  }
  
  final TokenFilterFactory[] factories = ((TokenizerChain) analyzer).getTokenFilterFactories();
  if (0 == factories.length) {
    return -1;
  }
  int result = -1;
  for (TokenFilterFactory tff : factories) {
    if (ShingleFilterFactory.class.isInstance(tff)) {
      if (0 < result) {
        // more then one shingle factory in our analyzer, which is weird, so make no assumptions...
        return -1;
      }
      // would be nice if there was an easy way to just ask a factory for the effective value
      // of an arguement...
      final Map<String,String> args = tff.getOriginalArgs();
      result = args.containsKey("maxShingleSize")
        ? Integer.parseInt(args.get("maxShingleSize")) : ShingleFilter.DEFAULT_MAX_SHINGLE_SIZE;
    }
  }
  return result;
}
 
Example 9
Source Project: lucene-solr   Source File: PayloadUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static String getPayloadEncoder(FieldType fieldType) {
  // TODO: support custom payload encoding fields too somehow - maybe someone has a custom component that encodes payloads as floats
  String encoder = null;
  Analyzer a = fieldType.getIndexAnalyzer();
  if (a instanceof TokenizerChain) {
    // examine the indexing analysis chain for DelimitedPayloadTokenFilterFactory or NumericPayloadTokenFilterFactory
    TokenizerChain tc = (TokenizerChain)a;
    TokenFilterFactory[] factories = tc.getTokenFilterFactories();
    for (TokenFilterFactory factory : factories) {
      if (factory instanceof DelimitedPayloadTokenFilterFactory) {
        encoder = factory.getOriginalArgs().get(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR);
        break;
      }

      if (factory instanceof NumericPayloadTokenFilterFactory) {
        // encodes using `PayloadHelper.encodeFloat(payload)`
        encoder = "float";
        break;
      }
    }
  }

  return encoder;
}
 
Example 10
Source Project: lucene-solr   Source File: CustomAnalyzerStrField.java    License: Apache License 2.0 6 votes vote down vote up
public CustomAnalyzerStrField() {
  Random r = LuceneTestCase.random();

  // two arg constructor
  Analyzer a2 = new TokenizerChain
    (new KeywordTokenizerFactory(new HashMap<>()),
     r.nextBoolean() ? null : new TokenFilterFactory[0]);
  
  // three arg constructor
  Analyzer a3 = new TokenizerChain
    (r.nextBoolean() ? null : new CharFilterFactory[0],
     new KeywordTokenizerFactory(new HashMap<>()),
     r.nextBoolean() ? null : new TokenFilterFactory[0]);

  if (r.nextBoolean()) {
    indexAnalyzer = a2;
    queryAnalyzer = a3;
  } else {
    queryAnalyzer = a2;
    indexAnalyzer = a3;
  }
}
 
Example 11
Source Project: lucene-solr   Source File: ResourceLoaderTest.java    License: Apache License 2.0 6 votes vote down vote up
public void testCacheWrongType() throws Exception {
  clearCache();

  SolrResourceLoader loader = new SolrResourceLoader();
  @SuppressWarnings({"rawtypes"})
  Class[] params = { Map.class };
  Map<String,String> args = Map.of("minGramSize", "1", "maxGramSize", "2");
  final String className = "solr.NGramTokenizerFactory";

  // We could fail here since the class name and expected type don't match, but instead we try to infer what the user actually meant
  TokenFilterFactory tff = loader.newInstance(className, TokenFilterFactory.class, new String[0], params, new Object[]{new HashMap<>(args)});
  assertNotNull("Did not load TokenFilter when asking for corresponding Tokenizer", tff);

  // This should work, but won't if earlier call succeeding corrupting the cache
  TokenizerFactory tf = loader.newInstance(className, TokenizerFactory.class, new String[0], params, new Object[]{new HashMap<>(args)});
  assertNotNull("Did not load Tokenizer after bad call earlier", tf);
  loader.close();
}
 
Example 12
Source Project: Elasticsearch   Source File: PluginsService.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Reloads all Lucene SPI implementations using the new classloader.
 * This method must be called after the new classloader has been created to
 * register the services for use.
 */
static void reloadLuceneSPI(ClassLoader loader) {
    // do NOT change the order of these method calls!

    // Codecs:
    PostingsFormat.reloadPostingsFormats(loader);
    DocValuesFormat.reloadDocValuesFormats(loader);
    Codec.reloadCodecs(loader);
    // Analysis:
    CharFilterFactory.reloadCharFilters(loader);
    TokenFilterFactory.reloadTokenFilters(loader);
    TokenizerFactory.reloadTokenizers(loader);
}
 
Example 13
public void add(Object current)
{
    if (!(current instanceof MultiTermAwareComponent))
        return;
    AbstractAnalysisFactory newComponent = ((MultiTermAwareComponent) current).getMultiTermComponent();
    if (newComponent instanceof TokenFilterFactory)
    {
        if (filters == null)
        {
            filters = new ArrayList<TokenFilterFactory>(2);
        }
        filters.add((TokenFilterFactory) newComponent);
    }
    else if (newComponent instanceof TokenizerFactory)
    {
        tokenizer = (TokenizerFactory) newComponent;
    }
    else if (newComponent instanceof CharFilterFactory)
    {
        if (charFilters == null)
        {
            charFilters = new ArrayList<CharFilterFactory>(1);
        }
        charFilters.add((CharFilterFactory) newComponent);

    }
    else
    {
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Unknown analysis component from MultiTermAwareComponent: " + newComponent);
    }
}
 
Example 14
Source Project: lucene-solr   Source File: AnalyzerFactoryTask.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This method looks up a class with its fully qualified name (FQN), or a short-name
 * class-simplename, or with a package suffix, assuming "org.apache.lucene.analysis."
 * as the package prefix (e.g. "standard.ClassicTokenizerFactory" -&gt;
 * "org.apache.lucene.analysis.standard.ClassicTokenizerFactory").
 *
 * If className contains a period, the class is first looked up as-is, assuming that it
 * is an FQN.  If this fails, lookup is retried after prepending the Lucene analysis
 * package prefix to the class name.
 *
 * If className does not contain a period, the analysis SPI *Factory.lookupClass()
 * methods are used to find the class.
 *
 * @param className The name or the short name of the class.
 * @param expectedType The superclass className is expected to extend
 * @return the loaded class.
 * @throws ClassNotFoundException if lookup fails
 */
public <T> Class<? extends T> lookupAnalysisClass(String className, Class<T> expectedType)
    throws ClassNotFoundException {
  if (className.contains(".")) {
    try {
      // First, try className == FQN
      return Class.forName(className).asSubclass(expectedType);
    } catch (ClassNotFoundException e) {
      try {
        // Second, retry lookup after prepending the Lucene analysis package prefix
        return Class.forName(LUCENE_ANALYSIS_PACKAGE_PREFIX + className).asSubclass(expectedType);
      } catch (ClassNotFoundException e1) {
        throw new ClassNotFoundException("Can't find class '" + className
                                         + "' or '" + LUCENE_ANALYSIS_PACKAGE_PREFIX + className + "'");
      }
    }
  }
  // No dot - use analysis SPI lookup
  final String analysisComponentName = ANALYSIS_COMPONENT_SUFFIX_PATTERN.matcher(className).replaceFirst("");
  if (CharFilterFactory.class.isAssignableFrom(expectedType)) {
    return CharFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenizerFactory.class.isAssignableFrom(expectedType)) {
    return TokenizerFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  } else if (TokenFilterFactory.class.isAssignableFrom(expectedType)) {
    return TokenFilterFactory.lookupClass(analysisComponentName).asSubclass(expectedType);
  }

  throw new ClassNotFoundException("Can't find class '" + className + "'");
}
 
Example 15
Source Project: lucene-solr   Source File: AnalyzerFactory.java    License: Apache License 2.0 5 votes vote down vote up
public AnalyzerFactory(List<CharFilterFactory> charFilterFactories,
                       TokenizerFactory tokenizerFactory,
                       List<TokenFilterFactory> tokenFilterFactories) {
  this.charFilterFactories = charFilterFactories;
  assert null != tokenizerFactory;
  this.tokenizerFactory = tokenizerFactory;
  this.tokenFilterFactories = tokenFilterFactories;
}
 
Example 16
Source Project: lucene-solr   Source File: AnalyzerFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String toString() {
  StringBuilder sb = new StringBuilder("AnalyzerFactory(");
  if (null != name) {
    sb.append("name:");
    sb.append(name);
    sb.append(", ");
  }
  if (null != positionIncrementGap) {
    sb.append("positionIncrementGap:");
    sb.append(positionIncrementGap);
    sb.append(", ");
  }
  if (null != offsetGap) {
    sb.append("offsetGap:");
    sb.append(offsetGap);
    sb.append(", ");
  }
  for (CharFilterFactory charFilterFactory: charFilterFactories) {
    sb.append(charFilterFactory);
    sb.append(", ");
  }
  sb.append(tokenizerFactory);
  for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
    sb.append(", ");
    sb.append(tokenFilterFactory);
  }
  sb.append(')');
  return sb.toString();
}
 
Example 17
Source Project: lucene-solr   Source File: TestFactories.java    License: Apache License 2.0 5 votes vote down vote up
public void test() throws IOException {
  for (String tokenizer : TokenizerFactory.availableTokenizers()) {
    doTestTokenizer(tokenizer);
  }
  
  for (String tokenFilter : TokenFilterFactory.availableTokenFilters()) {
    doTestTokenFilter(tokenFilter);
  }
  
  for (String charFilter : CharFilterFactory.availableCharFilters()) {
    doTestCharFilter(charFilter);
  }
}
 
Example 18
Source Project: lucene-solr   Source File: TestFactories.java    License: Apache License 2.0 5 votes vote down vote up
private void doTestTokenFilter(String tokenfilter) throws IOException {
  Class<? extends TokenFilterFactory> factoryClazz = TokenFilterFactory.lookupClass(tokenfilter);
  TokenFilterFactory factory = (TokenFilterFactory) initialize(factoryClazz);
  if (factory != null) {
    // we managed to fully create an instance. check a few more things:
    if (!EXCLUDE_FACTORIES_RANDOM_DATA.contains(factory.getClass())) {
      // beast it just a little, it shouldnt throw exceptions:
      // (it should have thrown them in initialize)
      Analyzer a = new FactoryAnalyzer(assertingTokenizer, factory, null);
      checkRandomData(random(), a, 3, 20, false, false);
      a.close();
    }
  }
}
 
Example 19
Source Project: lucene-solr   Source File: CustomAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
CustomAnalyzer(Version defaultMatchVersion, CharFilterFactory[] charFilters, TokenizerFactory tokenizer, TokenFilterFactory[] tokenFilters, Integer posIncGap, Integer offsetGap) {
  this.charFilters = charFilters;
  this.tokenizer = tokenizer;
  this.tokenFilters = tokenFilters;
  this.posIncGap = posIncGap;
  this.offsetGap = offsetGap;
  if (defaultMatchVersion != null) {
    setVersion(defaultMatchVersion);
  }
}
 
Example 20
Source Project: lucene-solr   Source File: CustomAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected TokenStreamComponents createComponents(String fieldName) {
  final Tokenizer tk = tokenizer.create(attributeFactory(fieldName));
  TokenStream ts = tk;
  for (final TokenFilterFactory filter : tokenFilters) {
    ts = filter.create(ts);
  }
  return new TokenStreamComponents(tk, ts);
}
 
Example 21
Source Project: lucene-solr   Source File: CustomAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
  TokenStream result = in;
  for (TokenFilterFactory filter : tokenFilters) {
    result = filter.normalize(result);
  }
  return result;
}
 
Example 22
Source Project: lucene-solr   Source File: CustomAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
/** Adds the given token filter.
 * @param factory class that is used to create the token filter.
 * @param params the map of parameters to be passed to factory. The map must be modifiable.
 */
public Builder addTokenFilter(Class<? extends TokenFilterFactory> factory, Map<String,String> params) throws IOException {
  Objects.requireNonNull(factory, "TokenFilter name may not be null");
  tokenFilters.add(applyResourceLoader(newFactoryClassInstance(factory, applyDefaultParams(params))));
  componentsAdded = true;
  return this;
}
 
Example 23
Source Project: lucene-solr   Source File: CustomAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
/** Adds the given token filter.
 * @param name is used to look up the factory with {@link TokenFilterFactory#forName(String, Map)}.
 *  The list of possible names can be looked up with {@link TokenFilterFactory#availableTokenFilters()}.
 * @param params the map of parameters to be passed to factory. The map must be modifiable.
 */
public Builder addTokenFilter(String name, Map<String,String> params) throws IOException {
  Objects.requireNonNull(name, "TokenFilter name may not be null");
  tokenFilters.add(applyResourceLoader(TokenFilterFactory.forName(name, applyDefaultParams(params))));
  componentsAdded = true;
  return this;
}
 
Example 24
Source Project: lucene-solr   Source File: CustomAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
/** Builds the analyzer. */
public CustomAnalyzer build() {
  if (tokenizer.get() == null) {
    throw new IllegalStateException("You have to set at least a tokenizer.");
  }
  return new CustomAnalyzer(
    defaultMatchVersion.get(),
    charFilters.toArray(new CharFilterFactory[charFilters.size()]),
    tokenizer.get(), 
    tokenFilters.toArray(new TokenFilterFactory[tokenFilters.size()]),
    posIncGap.get(),
    offsetGap.get()
  );
}
 
Example 25
Source Project: lucene-solr   Source File: ProtectedTermFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
private void populateInnerFilters(LinkedHashMap<String, Map<String, String>> wrappedFilterArgs) {
  List<TokenFilterFactory> innerFilters = new ArrayList<>();
  wrappedFilterArgs.forEach((filterName, filterArgs) -> {
    int idSuffixPos = filterName.indexOf(FILTER_NAME_ID_SEPARATOR); // Format: SPIname[-id]
    if (idSuffixPos != -1) {                                        // Strip '-id' suffix, if any, prior to SPI lookup
      filterName = filterName.substring(0, idSuffixPos);
    }
    innerFilters.add(TokenFilterFactory.forName(filterName, filterArgs));
  });
  setInnerFilters(innerFilters);
}
 
Example 26
Source Project: lucene-solr   Source File: ConditionalTokenFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public TokenStream create(TokenStream input) {
  if (innerFilters == null || innerFilters.size() == 0) {
    return input;
  }
  Function<TokenStream, TokenStream> innerStream = ts -> {
    for (TokenFilterFactory factory : innerFilters) {
      ts = factory.create(ts);
    }
    return ts;
  };
  return create(input, innerStream);
}
 
Example 27
Source Project: lucene-solr   Source File: ConditionalTokenFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public final void inform(ResourceLoader loader) throws IOException {
  if (innerFilters == null)
    return;
  for (TokenFilterFactory factory : innerFilters) {
    if (factory instanceof ResourceLoaderAware) {
      ((ResourceLoaderAware)factory).inform(loader);
    }
  }
  doInform(loader);
}
 
Example 28
Source Project: lucene-solr   Source File: TestSynonymFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/** checks for synonyms of "GB" in synonyms.txt */
private void checkSolrSynonyms(TokenFilterFactory factory) throws Exception {
  Reader reader = new StringReader("GB");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = factory.create(stream);
  assertTrue(stream instanceof SynonymFilter);
  assertTokenStreamContents(stream,
      new String[] { "GB", "gib", "gigabyte", "gigabytes" },
      new int[] { 1, 0, 0, 0 });
}
 
Example 29
Source Project: lucene-solr   Source File: TestSynonymFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/** checks for synonyms of "second" in synonyms-wordnet.txt */
private void checkWordnetSynonyms(TokenFilterFactory factory) throws Exception {
  Reader reader = new StringReader("second");
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = factory.create(stream);
  assertTrue(stream instanceof SynonymFilter);
  assertTokenStreamContents(stream,
      new String[] { "second", "2nd", "two" },
      new int[] { 1, 0, 0 });
}
 
Example 30
Source Project: lucene-solr   Source File: TestSynonymFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/** Test that we can parse TokenierFactory's arguments */
public void testTokenizerFactoryArguments() throws Exception {
  final String clazz = PatternTokenizerFactory.class.getName();
  TokenFilterFactory factory = null;

  // simple arg form
  factory = tokenFilterFactory("Synonym", 
      "synonyms", "synonyms.txt", 
      "tokenizerFactory", clazz,
      "pattern", "(.*)",
      "group", "0");
  assertNotNull(factory);
  // prefix
  factory = tokenFilterFactory("Synonym", 
      "synonyms", "synonyms.txt", 
      "tokenizerFactory", clazz,
      "tokenizerFactory.pattern", "(.*)",
      "tokenizerFactory.group", "0");
  assertNotNull(factory);

  // sanity check that sub-PatternTokenizerFactory fails w/o pattern
  expectThrows(Exception.class, () -> {
    tokenFilterFactory("Synonym", 
        "synonyms", "synonyms.txt", 
        "tokenizerFactory", clazz);
  });

  // sanity check that sub-PatternTokenizerFactory fails on unexpected
  expectThrows(Exception.class, () -> {
    tokenFilterFactory("Synonym", 
        "synonyms", "synonyms.txt", 
        "tokenizerFactory", clazz,
        "tokenizerFactory.pattern", "(.*)",
        "tokenizerFactory.bogusbogusbogus", "bogus",
        "tokenizerFactory.group", "0");
  });
}