Java Code Examples for org.apache.lucene.analysis.util.ResourceLoader#openResource()

The following examples show how to use org.apache.lucene.analysis.util.ResourceLoader#openResource() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: OpenNLPOpsFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public static String getLemmatizerDictionary(String dictionaryFile, ResourceLoader loader) throws IOException {
  String dictionary = lemmaDictionaries.get(dictionaryFile);
  if (dictionary == null) {
    try (Reader reader = new InputStreamReader(loader.openResource(dictionaryFile), StandardCharsets.UTF_8)) {
      StringBuilder builder = new StringBuilder();
      char[] chars = new char[8092];
      int numRead = 0;
      do {
        numRead = reader.read(chars, 0, chars.length);
        if (numRead > 0) {
          builder.append(chars, 0, numRead);
        }
      } while (numRead > 0);
      dictionary = builder.toString();
      lemmaDictionaries.put(dictionaryFile, dictionary);
    }
  }
  return dictionary;
}
 
Example 2
Source File: OpenNLPTokenizerFactory.java    From jate with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
    if(sentenceModelFile!=null) {
        sentenceOp = new SentenceDetectorME(new SentenceModel(
                loader.openResource(sentenceModelFile)));
    }

    if(tokenizerModelFile==null)
        throw new IOException("Parameter 'tokenizerModle' is required, but is invalid:"+tokenizerModelFile);
    tokenizerOp = new TokenizerME(new TokenizerModel(
            loader.openResource(tokenizerModelFile)
    ));

    if(parChunkingClass!=null) {
        try {
            Class c = Class.forName(parChunkingClass);
            Object o = c.newInstance();
            paragraphChunker = (ParagraphChunker) o;
        }catch (Exception e){
            throw new IOException(e);
        }
    }

}
 
Example 3
Source File: SafeXMLParsing.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Parses a config file from ResourceLoader. Xinclude and external entities are enabled, but cannot escape the resource loader. */
public static Document parseConfigXML(Logger log, ResourceLoader loader, String file) throws SAXException, IOException {
  try (InputStream in = loader.openResource(file)) {
    final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(false);
    dbf.setNamespaceAware(true);
    trySetDOMFeature(dbf, XMLConstants.FEATURE_SECURE_PROCESSING, true);
    try {
      dbf.setXIncludeAware(true);
    } catch (UnsupportedOperationException e) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser doesn't support XInclude option", e);
    }
    
    final DocumentBuilder db = dbf.newDocumentBuilder();
    db.setEntityResolver(new SystemIdResolver(loader));
    db.setErrorHandler(new XMLErrorLogger(log));
    return db.parse(in, SystemIdResolver.createSystemIdFromResourceName(file));
  } catch (ParserConfigurationException pce) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser cannot be configured", pce);
  }
}
 
Example 4
Source File: HyphenationCompoundWordTokenFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  InputStream stream = null;
  try {
    if (dictFile != null) // the dictionary can be empty.
      dictionary = getWordSet(loader, dictFile, false);
    // TODO: Broken, because we cannot resolve real system id
    // ResourceLoader should also supply method like ClassLoader to get resource URL
    stream = loader.openResource(hypFile);
    final InputSource is = new InputSource(stream);
    is.setEncoding(encoding); // if it's null let xml parser decide
    is.setSystemId(hypFile);
    hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}
 
Example 5
Source File: SynonymFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  List<String> files = splitFileNames(synonyms);
  for (String file : files) {
    decoder.reset();
    try (final Reader isr = new InputStreamReader(loader.openResource(file), decoder)) {
      parser.parse(isr);
    }
  }
  return parser.build();
}
 
Example 6
Source File: KoreanTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    try (InputStream stream = loader.openResource(userDictionaryPath)) {
      String encoding = userDictionaryEncoding;
      if (encoding == null) {
        encoding = IOUtils.UTF_8;
      }
      CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
      Reader reader = new InputStreamReader(stream, decoder);
      userDictionary = UserDictionary.open(reader);
    }
  } else {
    userDictionary = null;
  }
}
 
Example 7
Source File: JapaneseTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    try (InputStream stream = loader.openResource(userDictionaryPath)) {
      String encoding = userDictionaryEncoding;
      if (encoding == null) {
        encoding = IOUtils.UTF_8;
      }
      CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
      Reader reader = new InputStreamReader(stream, decoder);
      userDictionary = UserDictionary.open(reader);
    }
  } else {
    userDictionary = null;
  }
}
 
Example 8
Source File: MorfologikFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (resourceName == null) {
    // Get the dictionary lazily, does not hold up memory.
    this.dictionary = new PolishStemmer().getDictionary();
  } else {
    try (InputStream dict = loader.openResource(resourceName);
         InputStream meta = loader.openResource(DictionaryMetadata.getExpectedMetadataFileName(resourceName))) {
      this.dictionary = Dictionary.read(dict, meta);
    }
  }
}
 
Example 9
Source File: ICUTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private BreakIterator parseRules(String filename, ResourceLoader loader) throws IOException {
  StringBuilder rules = new StringBuilder();
  InputStream rulesStream = loader.openResource(filename);
  BufferedReader reader = new BufferedReader
      (IOUtils.getDecodingReader(rulesStream, StandardCharsets.UTF_8));
  String line = null;
  while ((line = reader.readLine()) != null) {
    if ( ! line.startsWith("#"))
      rules.append(line);
    rules.append('\n');
  }
  reader.close();
  return new RuleBasedBreakIterator(rules.toString());
}
 
Example 10
Source File: HunspellStemFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  String dicts[] = dictionaryFiles.split(",");

  InputStream affix = null;
  List<InputStream> dictionaries = new ArrayList<>();

  try {
    dictionaries = new ArrayList<>();
    for (String file : dicts) {
      dictionaries.add(loader.openResource(file));
    }
    affix = loader.openResource(affixFile);

    Path tempPath = Files.createTempDirectory(Dictionary.getDefaultTempDir(), "Hunspell");
    try (Directory tempDir = FSDirectory.open(tempPath)) {
      this.dictionary = new Dictionary(tempDir, "hunspell", affix, dictionaries, ignoreCase);
    } finally {
      IOUtils.rm(tempPath); 
    }
  } catch (ParseException e) {
    throw new IOException("Unable to load hunspell data! [dictionary=" + dictionaries + ",affix=" + affixFile + "]", e);
  } finally {
    IOUtils.closeWhileHandlingException(affix);
    IOUtils.closeWhileHandlingException(dictionaries);
  }
}
 
Example 11
Source File: ICUCollationField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Read custom rules from a file, and create a RuleBasedCollator
 * The file cannot support comments, as # might be in the rules!
 */
static Collator createFromRules(String fileName, ResourceLoader loader) {
  InputStream input = null;
  try {
   input = loader.openResource(fileName);
   String rules = IOUtils.toString(input, "UTF-8");
   return new RuleBasedCollator(rules);
  } catch (Exception e) {
    // io error or invalid rules
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(input);
  }
}
 
Example 12
Source File: OpenNLPOpsFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static LemmatizerModel getLemmatizerModel(String modelName, ResourceLoader loader) throws IOException {
  LemmatizerModel model = lemmatizerModels.get(modelName);
  if (model == null) {
    try (InputStream resource = loader.openResource(modelName)) {
      model = new LemmatizerModel(resource);
    }
    lemmatizerModels.put(modelName, model);
  }
  return model;
}
 
Example 13
Source File: TransformerProvider.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Return a Templates object for the given filename */
private Templates getTemplates(ResourceLoader loader, String filename,int cacheLifetimeSeconds) throws IOException {
  
  Templates result = null;
  lastFilename = null;
  try {
    if(log.isDebugEnabled()) {
      log.debug("compiling XSLT templates:{}", filename);
    }
    final String fn = "xslt/" + filename;
    final TransformerFactory tFactory = TransformerFactory.newInstance();
    tFactory.setURIResolver(new SystemIdResolver(loader).asURIResolver());
    tFactory.setErrorListener(xmllog);
    final StreamSource src = new StreamSource(loader.openResource(fn),
      SystemIdResolver.createSystemIdFromResourceName(fn));
    try {
      result = tFactory.newTemplates(src);
    } finally {
      // some XML parsers are broken and don't close the byte stream (but they should according to spec)
      IOUtils.closeQuietly(src.getInputStream());
    }
  } catch (Exception e) {
    log.error(getClass().getName(), "newTemplates", e);
    throw new IOException("Unable to initialize Templates '" + filename + "'", e);
  }
  
  lastFilename = filename;
  lastTemplates = result;
  cacheExpiresTimeout = new TimeOut(cacheLifetimeSeconds, TimeUnit.SECONDS, TimeSource.NANO_TIME);

  return result;
}
 
Example 14
Source File: CollationField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Read custom rules from a file, and create a RuleBasedCollator
 * The file cannot support comments, as # might be in the rules!
 */
private Collator createFromRules(String fileName, ResourceLoader loader) {
  InputStream input = null;
  try {
   input = loader.openResource(fileName);
   String rules = IOUtils.toString(input, "UTF-8");
   return new RuleBasedCollator(rules);
  } catch (IOException | ParseException e) {
    // io error or invalid rules
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(input);
  }
}
 
Example 15
Source File: ReplaceRewriterFactory.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Override
public RewriterFactory createFactory(String id, NamedList<?> args, ResourceLoader resourceLoader) throws IOException {

    final String rulesResourceName = (String) args.get("rules");
    if (rulesResourceName == null) {
        throw new IllegalArgumentException("Property 'rules' not configured");
    }

    final InputStreamReader reader = new InputStreamReader(resourceLoader.openResource(rulesResourceName), StandardCharsets.UTF_8);

    final Boolean ignoreCase = args.getBooleanArg("ignoreCase");

    final String inputDelimiter = (String) args.get("inputDelimiter");

    // querqy parser for queries that are part of the instructions in the rules
    String rulesQuerqyParser = (String) args.get("querqyParser");
    QuerqyParserFactory querqyParser = null;
    if (rulesQuerqyParser != null) {
        rulesQuerqyParser = rulesQuerqyParser.trim();
        if (rulesQuerqyParser.length() > 0) {
            querqyParser = resourceLoader.newInstance(rulesQuerqyParser, QuerqyParserFactory.class);
        }
    }

    if (querqyParser == null) {
        querqyParser = new WhiteSpaceQuerqyParserFactory();
    }

    return new querqy.rewrite.contrib.ReplaceRewriterFactory(id, reader,
            ignoreCase != null ? ignoreCase : DEFAULT_IGNORE_CASE,
            inputDelimiter != null ? inputDelimiter : DEFAULT_INPUT_DELIMITER,
            querqyParser.createParser());
}
 
Example 16
Source File: Word2VecFilterFactory.java    From word2vec-query-expansion with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
    File vectorsFile = new File(this.vectorsFile);
    Vectors vectors = new Vectors(vectorsFile.exists() ? new FileInputStream(vectorsFile)
            : loader.openResource(this.vectorsFile));
    this.expander = new QueryExpander(vectors, true, termSelectionStrategy);
}
 
Example 17
Source File: SimpleCommonRulesRewriterFactory.java    From querqy with Apache License 2.0 4 votes vote down vote up
@Override
public RewriterFactory createFactory(final String id, final NamedList<?> args,
                                     final ResourceLoader resourceLoader) throws IOException {

    final String rulesResourceName = (String) args.get("rules");
    if (rulesResourceName == null) {
        throw new IllegalArgumentException("Property 'rules' not configured");
    }

    final Map<String, SelectionStrategyFactory> selectionStrategyFactories = new HashMap<>();

    final NamedList<?> selectionStrategyConfiguration = (NamedList<?>) args.get("rules.selectionStrategy");

    if (selectionStrategyConfiguration != null) {

        @SuppressWarnings("unchecked")
        final List<NamedList<?>> strategyConfigs = (List<NamedList<?>>) selectionStrategyConfiguration
                .getAll("strategy");

        if (strategyConfigs != null) {
            for (NamedList<?> config : strategyConfigs) {
                @SuppressWarnings("unchecked")
                final FactoryAdapter<SelectionStrategyFactory> factory = resourceLoader
                        .newInstance((String) config.get("class"), FactoryAdapter.class);
                final String strategyId = (String) config.get("id");
                if (selectionStrategyFactories.put(strategyId,
                        factory.createFactory(strategyId, config, resourceLoader)) != null) {
                    throw new IOException("Duplicate id in rules.selectionStrategy: " + id);
                }
            }
        }
    }


    final Boolean ignoreCase = args.getBooleanArg("ignoreCase");

    // querqy parser for queries that are part of the instructions in the
    // rules
    String rulesQuerqyParser = (String) args.get("querqyParser");
    QuerqyParserFactory querqyParser = null;
    if (rulesQuerqyParser != null) {
        rulesQuerqyParser = rulesQuerqyParser.trim();
        if (rulesQuerqyParser.length() > 0) {
            querqyParser = resourceLoader.newInstance(rulesQuerqyParser, QuerqyParserFactory.class);
        }
    }

    if (querqyParser == null) {
        querqyParser = new WhiteSpaceQuerqyParserFactory();
    }

    return new querqy.rewrite.commonrules.SimpleCommonRulesRewriterFactory(id,
            new InputStreamReader(resourceLoader.openResource(rulesResourceName), "UTF-8"), querqyParser,
            ignoreCase == null || ignoreCase, selectionStrategyFactories, DEFAULT_SELECTION_STRATEGY_FACTORY);
}
 
Example 18
Source File: SynonymFormatCommonRulesRewriterFactory.java    From querqy with Apache License 2.0 4 votes vote down vote up
void addBoostInstructions(RulesCollectionBuilder builder, BoostDirection direction, float boost,
                           ResourceLoader resourceLoader, String resourceName) throws IOException {

   try (
         BufferedReader reader = new BufferedReader(new InputStreamReader(resourceLoader.openResource(resourceName)))) {

      String line;

      int ord = 0;

      while ((line = reader.readLine()) != null) {

         line = line.trim();
         if (line.length() > 0) {

            int pos = line.indexOf("#");

            if (pos > -1) {
               if (line.length() == 1) {
                  continue;
               }
               line = line.substring(0, pos).trim();
            }

            pos = line.indexOf("=>");
            if (pos > 0) {
               String inputsStr = line.substring(0, pos).trim();
               if (pos < line.length() - 2) {

                  String instructionStr = line.substring(pos + 2).trim();
                  if (instructionStr.length() > 0) {

                     List<Input> inputs = makeInputs(inputsStr);
                     if (inputs.size() > 0) {

                        for (String t : instructionStr.split(",")) {
                           t = t.trim();
                           if (t.length() > 0) {
                              Query query = termsToQuery(t);
                              if (!query.getClauses().isEmpty()) {
                                 for (Input input : inputs) {
                                    BoostInstruction bi = new BoostInstruction(query, direction, boost);
                                    ord++;
                                    builder.addRule(input, new Instructions(ord, Integer.toString(ord),
                                            Collections.singletonList(bi)));
                                 }
                              }
                           }
                        }

                     }

                  }
               }
            }

         }

      }
   }
}