org.apache.lucene.analysis.util.ResourceLoader Java Examples

The following examples show how to use org.apache.lucene.analysis.util.ResourceLoader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: EnglishLemmatisationFilterFactory.java    From jate with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
    if (lemmatiserResourceDir != null ) {
        try {
            String path=((SolrResourceLoader) loader).getConfigDir();
            if(!path.endsWith(File.separator))
                    path=path+File.separator;
            lemmatiser = new EngLemmatiser(path+lemmatiserResourceDir,
                    false, false);
        } catch (Exception e) {
            StringBuilder sb = new StringBuilder("Initiating ");
            sb.append(this.getClass().getName()).append(" failed due to:\n");
            sb.append(ExceptionUtils.getFullStackTrace(e));
            throw new IllegalArgumentException(sb.toString());
        }
    }
}
 
Example #2
Source File: ICUTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  assert tailored != null : "init must be called first!";
  if (tailored.isEmpty()) {
    config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords);
  } else {
    final BreakIterator breakers[] = new BreakIterator[1 + UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)];
    for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
      int code = entry.getKey();
      String resourcePath = entry.getValue();
      breakers[code] = parseRules(resourcePath, loader);
    }
    config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) {
      
      @Override
      public RuleBasedBreakIterator getBreakIterator(int script) {
        if (breakers[script] != null) {
          return (RuleBasedBreakIterator) breakers[script].clone();
        } else {
          return super.getBreakIterator(script);
        }
      }
      // TODO: we could also allow codes->types mapping
    };
  }
}
 
Example #3
Source File: KoreanTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    try (InputStream stream = loader.openResource(userDictionaryPath)) {
      String encoding = userDictionaryEncoding;
      if (encoding == null) {
        encoding = IOUtils.UTF_8;
      }
      CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
      Reader reader = new InputStreamReader(stream, decoder);
      userDictionary = UserDictionary.open(reader);
    }
  } else {
    userDictionary = null;
  }
}
 
Example #4
Source File: PhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  clazz = registry.get(name.toUpperCase(Locale.ROOT));
  if( clazz == null ) {
    clazz = resolveEncoder(name, loader);
  }

  if (maxCodeLength != null) {
    try {
      setMaxCodeLenMethod = clazz.getMethod("setMaxCodeLen", int.class);
    } catch (Exception e) {
      throw new IllegalArgumentException("Encoder " + name + " / " + clazz + " does not support " + MAX_CODE_LENGTH, e);
    }
  }

  getEncoder();//trigger initialization for potential problems to be thrown now
}
 
Example #5
Source File: SynonymGraphFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  List<String> files = splitFileNames(synonyms);
  for (String file : files) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(file), decoder));
  }
  return parser.build();
}
 
Example #6
Source File: SynonymFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  List<String> files = splitFileNames(synonyms);
  for (String file : files) {
    decoder.reset();
    try (final Reader isr = new InputStreamReader(loader.openResource(file), decoder)) {
      parser.parse(isr);
    }
  }
  return parser.build();
}
 
Example #7
Source File: HyphenationCompoundWordTokenFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  InputStream stream = null;
  try {
    if (dictFile != null) // the dictionary can be empty.
      dictionary = getWordSet(loader, dictFile, false);
    // TODO: Broken, because we cannot resolve real system id
    // ResourceLoader should also supply method like ClassLoader to get resource URL
    stream = loader.openResource(hypFile);
    final InputSource is = new InputSource(stream);
    is.setEncoding(encoding); // if it's null let xml parser decide
    is.setSystemId(hypFile);
    hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}
 
Example #8
Source File: MappingCharFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (mapping != null) {
    List<String> wlist = null;
    List<String> files = splitFileNames(mapping);
    wlist = new ArrayList<>();
    for (String file : files) {
      List<String> lines = getLines(loader, file.trim());
      wlist.addAll(lines);
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    normMap = builder.build();
    if (normMap.map == null) {
      // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
      // so just set the whole map to null
      normMap = null;
    }
  }
}
 
Example #9
Source File: AbstractQuerqyDismaxQParserPluginTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Test
public void testThatASimpleQuerqyQParserFactoryIsCreatedIfOnlyTheParserClassIsConfigured() throws Exception {

    NamedList<NamedList<String>> args = mock(NamedList.class);
    when(args.get("parser")).thenReturn(parserConfig);

    when(parserConfig.get("factory")).thenReturn(null);
    when(parserConfig.get("class")).thenReturn("querqy.parser.WhiteSpaceQuerqyParser");
    ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader());

    final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args);

    assertNotNull(factory);
    assertTrue(factory instanceof SimpleQuerqyQParserFactory);
    SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory;
    assertEquals(WhiteSpaceQuerqyParser.class, qParserFactory.querqyParserClass);

}
 
Example #10
Source File: OpenNLPTokenizerFactory.java    From jate with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
    if(sentenceModelFile!=null) {
        sentenceOp = new SentenceDetectorME(new SentenceModel(
                loader.openResource(sentenceModelFile)));
    }

    if(tokenizerModelFile==null)
        throw new IOException("Parameter 'tokenizerModle' is required, but is invalid:"+tokenizerModelFile);
    tokenizerOp = new TokenizerME(new TokenizerModel(
            loader.openResource(tokenizerModelFile)
    ));

    if(parChunkingClass!=null) {
        try {
            Class c = Class.forName(parChunkingClass);
            Object o = c.newInstance();
            paragraphChunker = (ParagraphChunker) o;
        }catch (Exception e){
            throw new IOException(e);
        }
    }

}
 
Example #11
Source File: StemmerOverrideFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (dictionaryFiles != null) {
    List<String> files = splitFileNames(dictionaryFiles);
    if (files.size() > 0) {
      StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
      for (String file : files) {
        List<String> list = getLines(loader, file.trim());
        for (String line : list) {
          String[] mapping = line.split("\t", 2);
          builder.add(mapping[0], mapping[1]);
        }
      }
      dictionary = builder.build();
    }
  }
}
 
Example #12
Source File: TestKeepFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);

  factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt, keep-2.txt",
      "ignoreCase", "true");
  words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}
 
Example #13
Source File: MtasConfiguration.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Read mtas tokenizer configurations.
 *
 * @param resourceLoader
 *          the resource loader
 * @param configFile
 *          the config file
 * @return the hash map
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public static HashMap<String, MtasConfiguration> readMtasTokenizerConfigurations(
    ResourceLoader resourceLoader, String configFile) throws IOException {
  HashMap<String, HashMap<String, String>> configs = readConfigurations(
      resourceLoader, configFile, MtasTokenizerFactory.class.getName());
  if (configs == null) {
    throw new IOException("no configurations");
  } else {
    HashMap<String, MtasConfiguration> result = new HashMap<String, MtasConfiguration>();
    for (Entry<String, HashMap<String, String>> entry : configs.entrySet()) {
      HashMap<String, String> config = entry.getValue();
      if (config.containsKey(TOKENIZER_CONFIGURATION_FILE)) {
        result.put(entry.getKey(), readConfiguration(resourceLoader
            .openResource(config.get(TOKENIZER_CONFIGURATION_FILE))));
      } else {
        throw new IOException("configuration " + entry.getKey() + " has no "
            + TOKENIZER_CONFIGURATION_FILE);
      }
    }
    return result;
  }
}
 
Example #14
Source File: DefaultQuerqyDismaxQParserTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Override
public RewriterFactory createFactory(final String rewriterId, NamedList<?> args, ResourceLoader resourceLoader) {
    return new RewriterFactory(rewriterId) {
        @Override
        public QueryRewriter createRewriter(ExpandedQuery input, SearchEngineRequestAdapter searchEngineRequestAdapter) {
            return query -> {
                query.setUserQuery(new MatchAllQuery());
                query.addFilterQuery(WhiteSpaceQuerqyParser.parseString("a"));
                return query;
            };
        }

        @Override
        public Set<Term> getGenerableTerms() {
            return Collections.emptySet();
        }
    };
}
 
Example #15
Source File: SafeXMLParsing.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Parses a config file from ResourceLoader. Xinclude and external entities are enabled, but cannot escape the resource loader. */
public static Document parseConfigXML(Logger log, ResourceLoader loader, String file) throws SAXException, IOException {
  try (InputStream in = loader.openResource(file)) {
    final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(false);
    dbf.setNamespaceAware(true);
    trySetDOMFeature(dbf, XMLConstants.FEATURE_SECURE_PROCESSING, true);
    try {
      dbf.setXIncludeAware(true);
    } catch (UnsupportedOperationException e) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser doesn't support XInclude option", e);
    }
    
    final DocumentBuilder db = dbf.newDocumentBuilder();
    db.setEntityResolver(new SystemIdResolver(loader));
    db.setErrorHandler(new XMLErrorLogger(log));
    return db.parse(in, SystemIdResolver.createSystemIdFromResourceName(file));
  } catch (ParserConfigurationException pce) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser cannot be configured", pce);
  }
}
 
Example #16
Source File: MtasTokenizerFactory.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Instantiates a new mtas tokenizer factory.
 *
 * @param args the args
 * @param resourceLoader the resource loader
 * @throws IOException Signals that an I/O exception has occurred.
 */
public MtasTokenizerFactory(Map<String, String> args,
    ResourceLoader resourceLoader) throws IOException {
  super(args);
  configFileArgument = get(args, ARGUMENT_CONFIGFILE);
  configArgument = get(args, ARGUMENT_CONFIG);
  analyzerArgument = get(args, ARGUMENT_PARSER);
  analyzerArgumentParserArgs = get(args, ARGUMENT_PARSER_ARGS);
  defaultArgument = get(args, ARGUMENT_DEFAULT);
  int numberOfArgs = 0;
  numberOfArgs = (configFileArgument==null)?numberOfArgs:numberOfArgs+1;
  numberOfArgs = (configArgument==null)?numberOfArgs:numberOfArgs+1;
  numberOfArgs = (analyzerArgument==null)?numberOfArgs:numberOfArgs+1;
  
  if (numberOfArgs>1) {
    throw new IOException(this.getClass().getName() + " can't have multiple of "
        + ARGUMENT_CONFIGFILE + ", " + ARGUMENT_CONFIG+" AND "+ARGUMENT_PARSER);
  } else if (configArgument == null && defaultArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have "
        + ARGUMENT_DEFAULT + " without " + ARGUMENT_CONFIG);
  } else if (numberOfArgs==0) {
    throw new IOException(this.getClass().getName() + " should have "
        + ARGUMENT_CONFIGFILE + " or " + ARGUMENT_CONFIG+" or "+ARGUMENT_PARSER);
  }
  init(resourceLoader);
}
 
Example #17
Source File: MMSegTokenizerFactory.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) {
    String dicPath = getOriginalArgs().get("dicPath");

    dic = Utils.getDict(dicPath, loader);

    logger.info("dic load... in={}", dic.getDicPath().toURI());
}
 
Example #18
Source File: Utils.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
public static Dictionary getDict(String dicPath, ResourceLoader loader) {
    Dictionary dic = null;
    if (dicPath != null) {
        File f = new File(dicPath);
        dic = Dictionary.getInstance(f);
    } else {
        dic = Dictionary.getInstance();
    }
    return dic;
}
 
Example #19
Source File: IkTokenizerFactory.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
/**
 * 通知方法,用于获取工厂使用的资源文件路径等信息,实现与{@link ResourceLoaderAware#inform(ResourceLoader)} 当该方法被调用时,将当前实例注册到更新任务中
 *
 * @param resourceLoader 类路径资源加载实例
 * @throws IOException IO读写异常
 */
@Override
public void inform(ResourceLoader resourceLoader) throws IOException {
    System.out.println(String.format("IKTokenizerFactory " + this.hashCode() + " inform conf: %s", getConf()));
    this.loader = resourceLoader;
    update();
    if ((getConf() != null) && (!getConf().trim().isEmpty())) {
        UpdateThread.getInstance().register(this);
    }
}
 
Example #20
Source File: TestSuggestStopFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  SuggestStopFilterFactory factory = createFactory(
      "words", "stop-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = createFactory("words", "stop-1.txt, stop-2.txt",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = createFactory("words", "stop-snowball.txt",
      "format", "snowball",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));

  // defaults
  factory = createFactory();
  assertEquals(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
  assertEquals(false, factory.isIgnoreCase());
}
 
Example #21
Source File: OpenNLPPOSTaggerFactory.java    From jate with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
    if (posTaggerModelFile != null && posTaggerClass != null) {
        try {
            tagger = InstanceCreator.createPOSTagger(posTaggerClass, loader.openResource(posTaggerModelFile));
        } catch (Exception e) {
            StringBuilder sb = new StringBuilder("Initiating ");
            sb.append(this.getClass().getName()).append(" failed due to:\n");
            sb.append(ExceptionUtils.getFullStackTrace(e));
            throw new IllegalArgumentException(sb.toString());
        }
    }
}
 
Example #22
Source File: OpenNLPTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  // register models in cache with file/resource names
  if (sentenceModelFile != null) {
    OpenNLPOpsFactory.getSentenceModel(sentenceModelFile, loader);
  }
  if (tokenizerModelFile != null) {
    OpenNLPOpsFactory.getTokenizerModel(tokenizerModelFile, loader);
  }
}
 
Example #23
Source File: OpenNLPChunkerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) {
  try {
    // load and register read-only models in cache with file/resource names
    if (chunkerModelFile != null) {
      OpenNLPOpsFactory.getChunkerModel(chunkerModelFile, loader);
    }
  } catch (IOException e) {
    throw new IllegalArgumentException(e);
  }
}
 
Example #24
Source File: SimpleQuerqyQParserFactory.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Override
public void init(@SuppressWarnings("rawtypes") NamedList parserConfig, ResourceLoader loader) throws IOException,
      SolrException {

   String className = (String) parserConfig.get("class");
   if (className == null) {
      throw new IOException("Missing attribute 'class' in querqy parser configuration");
   }

   init(className, loader);

}
 
Example #25
Source File: OpenNLPOpsFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static LemmatizerModel getLemmatizerModel(String modelName, ResourceLoader loader) throws IOException {
  LemmatizerModel model = lemmatizerModels.get(modelName);
  if (model == null) {
    try (InputStream resource = loader.openResource(modelName)) {
      model = new LemmatizerModel(resource);
    }
    lemmatizerModels.put(modelName, model);
  }
  return model;
}
 
Example #26
Source File: OpenNLPPOSFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) {
  try { // load and register the read-only model in cache with file/resource name
    OpenNLPOpsFactory.getPOSTaggerModel(posTaggerModelFile, loader);
  } catch (IOException e) {
    throw new IllegalArgumentException(e);
  }
}
 
Example #27
Source File: ICUTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private BreakIterator parseRules(String filename, ResourceLoader loader) throws IOException {
  StringBuilder rules = new StringBuilder();
  InputStream rulesStream = loader.openResource(filename);
  BufferedReader reader = new BufferedReader
      (IOUtils.getDecodingReader(rulesStream, StandardCharsets.UTF_8));
  String line = null;
  while ((line = reader.readLine()) != null) {
    if ( ! line.startsWith("#"))
      rules.append(line);
    rules.append('\n');
  }
  reader.close();
  return new RuleBasedBreakIterator(rules.toString());
}
 
Example #28
Source File: PhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Class<? extends Encoder> resolveEncoder(String name, ResourceLoader loader) {
  String lookupName = name;
  if (name.indexOf('.') == -1) {
    lookupName = PACKAGE_CONTAINING_ENCODERS + name;
  }
  try {
    return loader.newInstance(lookupName, Encoder.class).getClass();
  } catch (RuntimeException e) {
    throw new IllegalArgumentException("Error loading encoder '" + name + "': must be full class name or one of " + registry.keySet(), e);
  }
}
 
Example #29
Source File: MorfologikFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (resourceName == null) {
    // Get the dictionary lazily, does not hold up memory.
    this.dictionary = new PolishStemmer().getDictionary();
  } else {
    try (InputStream dict = loader.openResource(resourceName);
         InputStream meta = loader.openResource(DictionaryMetadata.getExpectedMetadataFileName(resourceName))) {
      this.dictionary = Dictionary.read(dict, meta);
    }
  }
}
 
Example #30
Source File: TestMorfologikFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testExplicitDictionary() throws Exception {
  final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);

  StringReader reader = new StringReader("inflected1 inflected2");
  Map<String,String> params = new HashMap<>();
  params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "custom-dictionary.dict");
  MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
  factory.inform(loader);
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = factory.create(stream);
  assertTokenStreamContents(stream, new String[] {"lemma1", "lemma2"});
}