org.apache.lucene.analysis.util.ResourceLoader Java Examples

The following examples show how to use org.apache.lucene.analysis.util.ResourceLoader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SynonymFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  List<String> files = splitFileNames(synonyms);
  for (String file : files) {
    decoder.reset();
    try (final Reader isr = new InputStreamReader(loader.openResource(file), decoder)) {
      parser.parse(isr);
    }
  }
  return parser.build();
}
 
Example #2
Source File: PhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  clazz = registry.get(name.toUpperCase(Locale.ROOT));
  if( clazz == null ) {
    clazz = resolveEncoder(name, loader);
  }

  if (maxCodeLength != null) {
    try {
      setMaxCodeLenMethod = clazz.getMethod("setMaxCodeLen", int.class);
    } catch (Exception e) {
      throw new IllegalArgumentException("Encoder " + name + " / " + clazz + " does not support " + MAX_CODE_LENGTH, e);
    }
  }

  getEncoder();//trigger initialization for potential problems to be thrown now
}
 
Example #3
Source File: KoreanTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (userDictionaryPath != null) {
    try (InputStream stream = loader.openResource(userDictionaryPath)) {
      String encoding = userDictionaryEncoding;
      if (encoding == null) {
        encoding = IOUtils.UTF_8;
      }
      CharsetDecoder decoder = Charset.forName(encoding).newDecoder()
        .onMalformedInput(CodingErrorAction.REPORT)
        .onUnmappableCharacter(CodingErrorAction.REPORT);
      Reader reader = new InputStreamReader(stream, decoder);
      userDictionary = UserDictionary.open(reader);
    }
  } else {
    userDictionary = null;
  }
}
 
Example #4
Source File: HyphenationCompoundWordTokenFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  InputStream stream = null;
  try {
    if (dictFile != null) // the dictionary can be empty.
      dictionary = getWordSet(loader, dictFile, false);
    // TODO: Broken, because we cannot resolve real system id
    // ResourceLoader should also supply method like ClassLoader to get resource URL
    stream = loader.openResource(hypFile);
    final InputSource is = new InputSource(stream);
    is.setEncoding(encoding); // if it's null let xml parser decide
    is.setSystemId(hypFile);
    hyphenator = HyphenationCompoundWordTokenFilter.getHyphenationTree(is);
  } finally {
    IOUtils.closeWhileHandlingException(stream);
  }
}
 
Example #5
Source File: ICUTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  assert tailored != null : "init must be called first!";
  if (tailored.isEmpty()) {
    config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords);
  } else {
    final BreakIterator breakers[] = new BreakIterator[1 + UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)];
    for (Map.Entry<Integer,String> entry : tailored.entrySet()) {
      int code = entry.getKey();
      String resourcePath = entry.getValue();
      breakers[code] = parseRules(resourcePath, loader);
    }
    config = new DefaultICUTokenizerConfig(cjkAsWords, myanmarAsWords) {
      
      @Override
      public RuleBasedBreakIterator getBreakIterator(int script) {
        if (breakers[script] != null) {
          return (RuleBasedBreakIterator) breakers[script].clone();
        } else {
          return super.getBreakIterator(script);
        }
      }
      // TODO: we could also allow codes->types mapping
    };
  }
}
 
Example #6
Source File: SafeXMLParsing.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Parses a config file from ResourceLoader. Xinclude and external entities are enabled, but cannot escape the resource loader. */
public static Document parseConfigXML(Logger log, ResourceLoader loader, String file) throws SAXException, IOException {
  try (InputStream in = loader.openResource(file)) {
    final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    dbf.setValidating(false);
    dbf.setNamespaceAware(true);
    trySetDOMFeature(dbf, XMLConstants.FEATURE_SECURE_PROCESSING, true);
    try {
      dbf.setXIncludeAware(true);
    } catch (UnsupportedOperationException e) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser doesn't support XInclude option", e);
    }
    
    final DocumentBuilder db = dbf.newDocumentBuilder();
    db.setEntityResolver(new SystemIdResolver(loader));
    db.setErrorHandler(new XMLErrorLogger(log));
    return db.parse(in, SystemIdResolver.createSystemIdFromResourceName(file));
  } catch (ParserConfigurationException pce) {
    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "XML parser cannot be configured", pce);
  }
}
 
Example #7
Source File: TestKeepFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);

  factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt, keep-2.txt",
      "ignoreCase", "true");
  words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}
 
Example #8
Source File: MappingCharFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (mapping != null) {
    List<String> wlist = null;
    List<String> files = splitFileNames(mapping);
    wlist = new ArrayList<>();
    for (String file : files) {
      List<String> lines = getLines(loader, file.trim());
      wlist.addAll(lines);
    }
    final NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
    parseRules(wlist, builder);
    normMap = builder.build();
    if (normMap.map == null) {
      // if the inner FST is null, it means it accepts nothing (e.g. the file is empty)
      // so just set the whole map to null
      normMap = null;
    }
  }
}
 
Example #9
Source File: DefaultQuerqyDismaxQParserTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Override
public RewriterFactory createFactory(final String rewriterId, NamedList<?> args, ResourceLoader resourceLoader) {
    return new RewriterFactory(rewriterId) {
        @Override
        public QueryRewriter createRewriter(ExpandedQuery input, SearchEngineRequestAdapter searchEngineRequestAdapter) {
            return query -> {
                query.setUserQuery(new MatchAllQuery());
                query.addFilterQuery(WhiteSpaceQuerqyParser.parseString("a"));
                return query;
            };
        }

        @Override
        public Set<Term> getGenerableTerms() {
            return Collections.emptySet();
        }
    };
}
 
Example #10
Source File: AbstractQuerqyDismaxQParserPluginTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Test
public void testThatASimpleQuerqyQParserFactoryIsCreatedIfOnlyTheParserClassIsConfigured() throws Exception {

    NamedList<NamedList<String>> args = mock(NamedList.class);
    when(args.get("parser")).thenReturn(parserConfig);

    when(parserConfig.get("factory")).thenReturn(null);
    when(parserConfig.get("class")).thenReturn("querqy.parser.WhiteSpaceQuerqyParser");
    ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader());

    final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args);

    assertNotNull(factory);
    assertTrue(factory instanceof SimpleQuerqyQParserFactory);
    SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory;
    assertEquals(WhiteSpaceQuerqyParser.class, qParserFactory.querqyParserClass);

}
 
Example #11
Source File: EnglishLemmatisationFilterFactory.java    From jate with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
    if (lemmatiserResourceDir != null ) {
        try {
            String path=((SolrResourceLoader) loader).getConfigDir();
            if(!path.endsWith(File.separator))
                    path=path+File.separator;
            lemmatiser = new EngLemmatiser(path+lemmatiserResourceDir,
                    false, false);
        } catch (Exception e) {
            StringBuilder sb = new StringBuilder("Initiating ");
            sb.append(this.getClass().getName()).append(" failed due to:\n");
            sb.append(ExceptionUtils.getFullStackTrace(e));
            throw new IllegalArgumentException(sb.toString());
        }
    }
}
 
Example #12
Source File: OpenNLPTokenizerFactory.java    From jate with GNU Lesser General Public License v3.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
    if(sentenceModelFile!=null) {
        sentenceOp = new SentenceDetectorME(new SentenceModel(
                loader.openResource(sentenceModelFile)));
    }

    if(tokenizerModelFile==null)
        throw new IOException("Parameter 'tokenizerModle' is required, but is invalid:"+tokenizerModelFile);
    tokenizerOp = new TokenizerME(new TokenizerModel(
            loader.openResource(tokenizerModelFile)
    ));

    if(parChunkingClass!=null) {
        try {
            Class c = Class.forName(parChunkingClass);
            Object o = c.newInstance();
            paragraphChunker = (ParagraphChunker) o;
        }catch (Exception e){
            throw new IOException(e);
        }
    }

}
 
Example #13
Source File: MtasTokenizerFactory.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Instantiates a new mtas tokenizer factory.
 *
 * @param args the args
 * @param resourceLoader the resource loader
 * @throws IOException Signals that an I/O exception has occurred.
 */
public MtasTokenizerFactory(Map<String, String> args,
    ResourceLoader resourceLoader) throws IOException {
  super(args);
  configFileArgument = get(args, ARGUMENT_CONFIGFILE);
  configArgument = get(args, ARGUMENT_CONFIG);
  analyzerArgument = get(args, ARGUMENT_PARSER);
  analyzerArgumentParserArgs = get(args, ARGUMENT_PARSER_ARGS);
  defaultArgument = get(args, ARGUMENT_DEFAULT);
  int numberOfArgs = 0;
  numberOfArgs = (configFileArgument==null)?numberOfArgs:numberOfArgs+1;
  numberOfArgs = (configArgument==null)?numberOfArgs:numberOfArgs+1;
  numberOfArgs = (analyzerArgument==null)?numberOfArgs:numberOfArgs+1;
  
  if (numberOfArgs>1) {
    throw new IOException(this.getClass().getName() + " can't have multiple of "
        + ARGUMENT_CONFIGFILE + ", " + ARGUMENT_CONFIG+" AND "+ARGUMENT_PARSER);
  } else if (configArgument == null && defaultArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have "
        + ARGUMENT_DEFAULT + " without " + ARGUMENT_CONFIG);
  } else if (numberOfArgs==0) {
    throw new IOException(this.getClass().getName() + " should have "
        + ARGUMENT_CONFIGFILE + " or " + ARGUMENT_CONFIG+" or "+ARGUMENT_PARSER);
  }
  init(resourceLoader);
}
 
Example #14
Source File: SynonymGraphFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Load synonyms with the given {@link SynonymMap.Parser} class.
 */
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
  CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
      .onMalformedInput(CodingErrorAction.REPORT)
      .onUnmappableCharacter(CodingErrorAction.REPORT);

  SynonymMap.Parser parser;
  Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class);
  try {
    parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer);
  } catch (Exception e) {
    throw new RuntimeException(e);
  }

  List<String> files = splitFileNames(synonyms);
  for (String file : files) {
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(file), decoder));
  }
  return parser.build();
}
 
Example #15
Source File: StemmerOverrideFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (dictionaryFiles != null) {
    List<String> files = splitFileNames(dictionaryFiles);
    if (files.size() > 0) {
      StemmerOverrideFilter.Builder builder = new StemmerOverrideFilter.Builder(ignoreCase);
      for (String file : files) {
        List<String> list = getLines(loader, file.trim());
        for (String line : list) {
          String[] mapping = line.split("\t", 2);
          builder.add(mapping[0], mapping[1]);
        }
      }
      dictionary = builder.build();
    }
  }
}
 
Example #16
Source File: MtasConfiguration.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Read mtas tokenizer configurations.
 *
 * @param resourceLoader
 *          the resource loader
 * @param configFile
 *          the config file
 * @return the hash map
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
public static HashMap<String, MtasConfiguration> readMtasTokenizerConfigurations(
    ResourceLoader resourceLoader, String configFile) throws IOException {
  HashMap<String, HashMap<String, String>> configs = readConfigurations(
      resourceLoader, configFile, MtasTokenizerFactory.class.getName());
  if (configs == null) {
    throw new IOException("no configurations");
  } else {
    HashMap<String, MtasConfiguration> result = new HashMap<String, MtasConfiguration>();
    for (Entry<String, HashMap<String, String>> entry : configs.entrySet()) {
      HashMap<String, String> config = entry.getValue();
      if (config.containsKey(TOKENIZER_CONFIGURATION_FILE)) {
        result.put(entry.getKey(), readConfiguration(resourceLoader
            .openResource(config.get(TOKENIZER_CONFIGURATION_FILE))));
      } else {
        throw new IOException("configuration " + entry.getKey() + " has no "
            + TOKENIZER_CONFIGURATION_FILE);
      }
    }
    return result;
  }
}
 
Example #17
Source File: ReplaceRewriterFactory.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Override
public RewriterFactory createFactory(String id, NamedList<?> args, ResourceLoader resourceLoader) throws IOException {

    final String rulesResourceName = (String) args.get("rules");
    if (rulesResourceName == null) {
        throw new IllegalArgumentException("Property 'rules' not configured");
    }

    final InputStreamReader reader = new InputStreamReader(resourceLoader.openResource(rulesResourceName), StandardCharsets.UTF_8);

    final Boolean ignoreCase = args.getBooleanArg("ignoreCase");

    final String inputDelimiter = (String) args.get("inputDelimiter");

    // querqy parser for queries that are part of the instructions in the rules
    String rulesQuerqyParser = (String) args.get("querqyParser");
    QuerqyParserFactory querqyParser = null;
    if (rulesQuerqyParser != null) {
        rulesQuerqyParser = rulesQuerqyParser.trim();
        if (rulesQuerqyParser.length() > 0) {
            querqyParser = resourceLoader.newInstance(rulesQuerqyParser, QuerqyParserFactory.class);
        }
    }

    if (querqyParser == null) {
        querqyParser = new WhiteSpaceQuerqyParserFactory();
    }

    return new querqy.rewrite.contrib.ReplaceRewriterFactory(id, reader,
            ignoreCase != null ? ignoreCase : DEFAULT_IGNORE_CASE,
            inputDelimiter != null ? inputDelimiter : DEFAULT_INPUT_DELIMITER,
            querqyParser.createParser());
}
 
Example #18
Source File: CollationField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Read custom rules from a file, and create a RuleBasedCollator
 * The file cannot support comments, as # might be in the rules!
 */
private Collator createFromRules(String fileName, ResourceLoader loader) {
  InputStream input = null;
  try {
   input = loader.openResource(fileName);
   String rules = IOUtils.toString(input, "UTF-8");
   return new RuleBasedCollator(rules);
  } catch (IOException | ParseException e) {
    // io error or invalid rules
    throw new RuntimeException(e);
  } finally {
    IOUtils.closeQuietly(input);
  }
}
 
Example #19
Source File: MMSegTokenizerFactory.java    From jstarcraft-nlp with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) {
    String dicPath = getOriginalArgs().get("dicPath");

    dic = Utils.getDict(dicPath, loader);

    logger.info("dic load... in={}", dic.getDicPath().toURI());
}
 
Example #20
Source File: TestSystemIdResolver.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testUnsafeResolving() throws Exception {
  System.setProperty("solr.allow.unsafe.resourceloading", "true");
  
  final Path testHome = SolrTestCaseJ4.getFile("solr/collection1").getParentFile().toPath();
  final ResourceLoader loader = new SolrResourceLoader(testHome.resolve("collection1"), this.getClass().getClassLoader());
  final SystemIdResolver resolver = new SystemIdResolver(loader);
  
  assertEntityResolving(resolver, SystemIdResolver.createSystemIdFromResourceName(testHome+"/crazy-path-to-schema.xml"),
    SystemIdResolver.createSystemIdFromResourceName(testHome+"/crazy-path-to-config.xml"), "crazy-path-to-schema.xml");    
}
 
Example #21
Source File: OpenNLPRegexChunkerFactory.java    From jate with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
    super.inform(loader);
    if (patternFile != null) {
        try {
            List<String> lines = getLines(loader, patternFile.trim());
            initPatterns(lines, patterns);
        } catch (IOException ioe) {
            StringBuilder sb = new StringBuilder("Initiating ");
            sb.append(this.getClass().getName()).append(" failed due to patterns. Details:\n");
            sb.append(ExceptionUtils.getFullStackTrace(ioe));
            throw new IllegalArgumentException(sb.toString());
        }
    }
}
 
Example #22
Source File: NumberUnitRewriterFactory.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Override
public RewriterFactory createFactory(final String id, final NamedList<?> args, final ResourceLoader resourceLoader)
        throws IOException {

    final Object obj = args.get(KEY_CONFIG_FILE);
    if (!(obj instanceof String)) {
        throw new IllegalArgumentException("Property 'config' not or not properly configured");
    }

    final String rulesResourceName = (String) obj;

    // resource InputStream will be closed by Jackson Json Parser
    final NumberUnitConfigObject numberUnitConfigObject = JSON_DEFAULT_OBJECT_MAPPER.readValue(
            resourceLoader.openResource(rulesResourceName), NumberUnitConfigObject.class);

    final int scale = getOrDefaultInt(numberUnitConfigObject::getScaleForLinearFunctions,
            DEFAULT_SCALE_FOR_LINEAR_FUNCTIONS);
    final List<NumberUnitDefinition> numberUnitDefinitions = parseConfig(numberUnitConfigObject);

    numberUnitDefinitions.stream()
            .filter(this::numberUnitDefinitionHasDuplicateUnitDefinition)
            .findFirst()
            .ifPresent(numberUnitDefinition -> {
                throw new IllegalArgumentException("Units must only defined once per NumberUnitDefinition");});

    return new querqy.rewrite.contrib.NumberUnitRewriterFactory(id, numberUnitDefinitions,
            new NumberUnitQueryCreatorSolr(scale));
}
 
Example #23
Source File: ManagedSynonymGraphFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Called once, during core initialization, to initialize any analysis components
 * that depend on the data managed by this resource. It is important that the
 * analysis component is only initialized once during core initialization so that
 * text analysis is consistent, especially in a distributed environment, as we
 * don't want one server applying a different set of stop words than other servers.
 */
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res)
    throws SolrException
{
  NamedList<Object> args = (NamedList<Object>)initArgs;
  args.add("synonyms", getResourceId());
  args.add("expand", "false");
  args.add("format", "solr");

  Map<String,String> filtArgs = new HashMap<>();
  for (Map.Entry<String,?> entry : args) {
    filtArgs.put(entry.getKey(), entry.getValue().toString());
  }
  // create the actual filter factory that pulls the synonym mappings
  // from synonymMappings using a custom parser implementation
  delegate = new SynonymGraphFilterFactory(filtArgs) {
    @Override
    protected SynonymMap loadSynonyms
        (ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer)
        throws IOException, ParseException {

      ManagedSynonymParser parser =
          new ManagedSynonymParser((SynonymManager)res, dedup, analyzer);
      // null is safe here because there's no actual parsing done against a input Reader
      parser.parse(null);
      return parser.build();
    }
  };
  try {
    delegate.inform(res.getResourceLoader());
  } catch (IOException e) {
    throw new SolrException(ErrorCode.SERVER_ERROR, e);
  }
}
 
Example #24
Source File: MtasCharFilterFactory.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Instantiates a new mtas char filter factory.
 *
 * @param args the args
 * @param resourceLoader the resource loader
 * @throws IOException Signals that an I/O exception has occurred.
 */
public MtasCharFilterFactory(Map<String, String> args,
    ResourceLoader resourceLoader) throws IOException {
  super(args);
  typeArgument = get(args, ARGUMENT_TYPE);
  prefixArgument = get(args, ARGUMENT_PREFIX);
  postfixArgument = get(args, ARGUMENT_POSTFIX);
  configArgument = get(args, ARGUMENT_CONFIG);
  defaultArgument = get(args, ARGUMENT_DEFAULT);
  if (typeArgument != null && configArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have both "
        + ARGUMENT_TYPE + " and " + ARGUMENT_CONFIG);
  } else if (typeArgument == null && prefixArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have "
        + ARGUMENT_PREFIX + " without " + ARGUMENT_TYPE);
  } else if (typeArgument == null && postfixArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have "
        + ARGUMENT_POSTFIX + " without " + ARGUMENT_TYPE);
  } else if (configArgument == null && defaultArgument != null) {
    throw new IOException(this.getClass().getName() + " can't have "
        + ARGUMENT_DEFAULT + " without " + ARGUMENT_CONFIG);
  } else if (typeArgument == null && configArgument == null) {
    throw new IOException(this.getClass().getName() + " should have "
        + ARGUMENT_TYPE + " or " + ARGUMENT_CONFIG);
  }
  init(resourceLoader);
}
 
Example #25
Source File: TestCommonGramsQueryFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}
 
Example #26
Source File: TestCommonGramsFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}
 
Example #27
Source File: SnowballPorterFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  String className = "org.tartarus.snowball.ext." + language + "Stemmer";
  stemClass = loader.newInstance(className, SnowballStemmer.class).getClass();

  if (wordFiles != null) {
    protectedWords = getWordSet(loader, wordFiles, false);
  }
}
 
Example #28
Source File: TypeTokenFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  List<String> files = splitFileNames(stopTypesFiles);
  if (files.size() > 0) {
    stopTypes = new HashSet<>();
    for (String file : files) {
      List<String> typesLines = getLines(loader, file.trim());
      stopTypes.addAll(typesLines);
    }
  }
}
 
Example #29
Source File: WordDelimiterGraphFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void inform(ResourceLoader loader) throws IOException {
  if (wordFiles != null) {  
    protectedWords = getWordSet(loader, wordFiles, false);
  }
  if (types != null) {
    List<String> files = splitFileNames( types );
    List<String> wlist = new ArrayList<>();
    for( String file : files ){
      List<String> lines = getLines(loader, file.trim());
      wlist.addAll( lines );
    }
    typeTable = parseTypes(wlist);
  }
}
 
Example #30
Source File: ConditionalTokenFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public final void inform(ResourceLoader loader) throws IOException {
  if (innerFilters == null)
    return;
  for (TokenFilterFactory factory : innerFilters) {
    if (factory instanceof ResourceLoaderAware) {
      ((ResourceLoaderAware)factory).inform(loader);
    }
  }
  doInform(loader);
}