org.apache.lucene.analysis.util.ClasspathResourceLoader Java Examples

The following examples show how to use org.apache.lucene.analysis.util.ClasspathResourceLoader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractQuerqyDismaxQParserPluginTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Test
public void testThatASimpleQuerqyQParserFactoryIsCreatedIfOnlyTheParserClassIsConfigured() throws Exception {

    NamedList<NamedList<String>> args = mock(NamedList.class);
    when(args.get("parser")).thenReturn(parserConfig);

    when(parserConfig.get("factory")).thenReturn(null);
    when(parserConfig.get("class")).thenReturn("querqy.parser.WhiteSpaceQuerqyParser");
    ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader());

    final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args);

    assertNotNull(factory);
    assertTrue(factory instanceof SimpleQuerqyQParserFactory);
    SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory;
    assertEquals(WhiteSpaceQuerqyParser.class, qParserFactory.querqyParserClass);

}
 
Example #2
Source File: TestOpenNLPPOSFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPOS() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      SENTENCES_posTags, null, null, true);

  analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      null, null, null, true, toPayloads(SENTENCES_posTags));
}
 
Example #3
Source File: TestKeepFilterFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  KeepWordFilterFactory factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);

  factory = (KeepWordFilterFactory) tokenFilterFactory("KeepWord",
      "words", "keep-1.txt, keep-2.txt",
      "ignoreCase", "true");
  words = factory.getWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
}
 
Example #4
Source File: TestOpenNLPTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Test
public void testClose() throws IOException {
  Map<String,String> args = new HashMap<String,String>() {{ put("sentenceModel", "en-test-sent.bin");
                                                            put("tokenizerModel", "en-test-tokenizer.bin"); }};
  OpenNLPTokenizerFactory factory = new OpenNLPTokenizerFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));

  Tokenizer ts = factory.create(newAttributeFactory());
  ts.setReader(new StringReader(SENTENCES));

  ts.reset();
  ts.close();
  ts.reset();
  ts.setReader(new StringReader(SENTENCES));
  assertTokenStreamContents(ts, SENTENCES_punc);
  ts.close();
  ts.reset();
  ts.setReader(new StringReader(SENTENCES));
  assertTokenStreamContents(ts, SENTENCES_punc);
}
 
Example #5
Source File: TestICUTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTokenizeLatinDontBreakOnHyphens() throws Exception {
  Reader reader = new StringReader
      ("One-two punch.  Brang-, not brung-it.  This one--not that one--is the right one, -ish.");
  final Map<String,String> args = new HashMap<>();
  args.put(ICUTokenizerFactory.RULEFILES, "Latn:Latin-dont-break-on-hyphens.rbbi");
  ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  Tokenizer stream = factory.create(newAttributeFactory());
  stream.setReader(reader);
  assertTokenStreamContents(stream,
      new String[] { "One-two", "punch",
          "Brang", "not", "brung-it",
          "This", "one", "not", "that", "one", "is", "the", "right", "one", "ish" });
}
 
Example #6
Source File: TestSuggestStopFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private SuggestStopFilterFactory createFactory(String ... params) throws IOException {
  if(params.length%2 != 0) {
    throw new IllegalArgumentException("invalid keysAndValues map");
  }
  Map<String, String> args = new HashMap<>(params.length/2);
  for(int i=0; i<params.length; i+=2) {
    String previous = args.put(params[i], params[i+1]);
    assertNull("duplicate values for key: " + params[i], previous);
  }
  args.put("luceneMatchVersion", Version.LATEST.toString());

  SuggestStopFilterFactory factory = new SuggestStopFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  return factory;
}
 
Example #7
Source File: TestICUTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Specify more than one script/rule file pair.
 * Override default DefaultICUTokenizerConfig Thai script tokenization.
 * Use the same rule file for both scripts.
 */
public void testKeywordTokenizeCyrillicAndThai() throws Exception {
  Reader reader = new StringReader
      ("Some English.  Немного русский.  ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  More English.");
  final Map<String,String> args = new HashMap<>();
  args.put(ICUTokenizerFactory.RULEFILES, "Cyrl:KeywordTokenizer.rbbi,Thai:KeywordTokenizer.rbbi");
  ICUTokenizerFactory factory = new ICUTokenizerFactory(args);
  factory.inform(new ClasspathResourceLoader(getClass()));
  Tokenizer stream = factory.create(newAttributeFactory());
  stream.setReader(reader);
  assertTokenStreamContents(stream, new String[] { "Some", "English",
      "Немного русский.  ",
      "ข้อความภาษาไทยเล็ก ๆ น้อย ๆ  ",
      "More", "English" });
}
 
Example #8
Source File: TestPhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Case: default
 */
public void testFactoryDefaults() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Metaphone);
  assertTrue(factory.inject); // default
}
 
Example #9
Source File: TestPhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInjectFalse() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  args.put(PhoneticFilterFactory.INJECT, "false");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertFalse(factory.inject);
}
 
Example #10
Source File: TestPhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMaxCodeLength() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Metaphone");
  args.put(PhoneticFilterFactory.MAX_CODE_LENGTH, "2");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertEquals(2, ((Metaphone) factory.getEncoder()).getMaxCodeLen());
}
 
Example #11
Source File: TestPhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testUnknownEncoder() throws IOException {
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    Map<String,String> args = new HashMap<>();
    args.put("encoder", "XXX");
    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
    factory.inform(new ClasspathResourceLoader(factory.getClass()));
  });
  assertTrue(expected.getMessage().contains("Error loading encoder"));
}
 
Example #12
Source File: TestPhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testUnknownEncoderReflection() throws IOException {
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    Map<String,String> args = new HashMap<>();
    args.put("encoder", "org.apache.commons.codec.language.NonExistence");
    PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
    factory.inform(new ClasspathResourceLoader(factory.getClass()));
  });
  assertTrue(expected.getMessage().contains("Error loading encoder"));
}
 
Example #13
Source File: TestPhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Case: Reflection
 */
public void testFactoryReflection() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "org.apache.commons.codec.language.Metaphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Metaphone);
  assertTrue(factory.inject); // default
}
 
Example #14
Source File: TestPhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** 
 * we use "Caverphone2" as it is registered in the REGISTRY as Caverphone,
 * so this effectively tests reflection without package name
 */
public void testFactoryReflectionCaverphone2() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Caverphone2");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Caverphone2);
  assertTrue(factory.inject); // default
}
 
Example #15
Source File: TestPhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testFactoryReflectionCaverphone() throws IOException {
  Map<String,String> args = new HashMap<>();
  args.put(PhoneticFilterFactory.ENCODER, "Caverphone");
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  assertTrue(factory.getEncoder() instanceof Caverphone2);
  assertTrue(factory.inject); // default
}
 
Example #16
Source File: TestPhoneticFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static void assertAlgorithm(String algName, String inject, String input,
    String[] expected) throws Exception {
  Tokenizer tokenizer = whitespaceMockTokenizer(input);
  Map<String,String> args = new HashMap<>();
  args.put("encoder", algName);
  args.put("inject", inject);
  PhoneticFilterFactory factory = new PhoneticFilterFactory(args);
  factory.inform(new ClasspathResourceLoader(factory.getClass()));
  TokenStream stream = factory.create(tokenizer);
  assertTokenStreamContents(stream, expected);
}
 
Example #17
Source File: TestMorfologikFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testExplicitDictionary() throws Exception {
  final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);

  StringReader reader = new StringReader("inflected1 inflected2");
  Map<String,String> params = new HashMap<>();
  params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "custom-dictionary.dict");
  MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
  factory.inform(loader);
  TokenStream stream = whitespaceMockTokenizer(reader);
  stream = factory.create(stream);
  assertTokenStreamContents(stream, new String[] {"lemma1", "lemma2"});
}
 
Example #18
Source File: TestMorfologikFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMissingDictionary() throws Exception {
  final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);

  IOException expected = expectThrows(IOException.class, () -> {
    Map<String,String> params = new HashMap<>();
    params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict");
    MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
    factory.inform(loader);
  });
  assertTrue(expected.getMessage().contains("Resource not found"));
}
 
Example #19
Source File: TestCommonGramsFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}
 
Example #20
Source File: TestCommonGramsQueryFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
  assertTrue("loader is null and it shouldn't be", loader != null);
  CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-1.txt", 
      "ignoreCase", "true");
  CharArraySet words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2,
      words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-1.txt, stop-2.txt", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4,
      words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory
      .isIgnoreCase() == true);

  factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
      "words", "stop-snowball.txt", 
      "format", "snowball", 
      "ignoreCase", "true");
  words = factory.getCommonWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));
}
 
Example #21
Source File: TestStopFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  StopFilterFactory factory = (StopFilterFactory) tokenFilterFactory("Stop",
      "words", "stop-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = (StopFilterFactory) tokenFilterFactory("Stop",
      "words", "stop-1.txt, stop-2.txt",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = (StopFilterFactory) tokenFilterFactory("Stop",
      "words", "stop-snowball.txt",
      "format", "snowball",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));

  // defaults
  factory = (StopFilterFactory) tokenFilterFactory("Stop");
  assertEquals(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
  assertEquals(false, factory.isIgnoreCase());
}
 
Example #22
Source File: ElasticsearchConfig.java    From spring-boot-practice with Apache License 2.0 5 votes vote down vote up
@PostConstruct
public void init() throws IOException {
    log.info("Start loading accounts");
    ClasspathResourceLoader loader = new ClasspathResourceLoader();
    ObjectMapper mapper = new ObjectMapper();
    ObjectReader objectReader = mapper.readerFor(Account.class);
    try (BufferedReader reader = new BufferedReader(new InputStreamReader(loader.openResource("import.json")))) {
        String line;
        while ((line = reader.readLine()) != null) {
            Account account = objectReader.readValue(line);
            accountRepository.save(account);
        }
    }
    log.info("Finished loading");
}
 
Example #23
Source File: AbstractQuerqyDismaxQParserPluginTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
public void testThatASimpleQuerqyQParserFactoryIsCreatedIfTheParserClassIsConfiguredAsAString() throws Exception {

    NamedList<String> args = mock(NamedList.class);
    when(args.get("parser")).thenReturn(DummyQuerqyParser.class.getName());
    ResourceLoader resourceLoader = new ClasspathResourceLoader(getClass().getClassLoader());

    final SolrQuerqyParserFactory factory = plugin.loadSolrQuerqyParserFactory(resourceLoader, args);

    assertNotNull(factory);
    assertTrue(factory instanceof SimpleQuerqyQParserFactory);
    SimpleQuerqyQParserFactory qParserFactory = (SimpleQuerqyQParserFactory) factory;
    assertEquals(DummyQuerqyParser.class, qParserFactory.querqyParserClass);

}
 
Example #24
Source File: TestICUTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMixedText() throws Exception {
  Reader reader = new StringReader("การที่ได้ต้องแสดงว่างานดี  This is a test ກວ່າດອກ");
  ICUTokenizerFactory factory = new ICUTokenizerFactory(new HashMap<String,String>());
  factory.inform(new ClasspathResourceLoader(getClass()));
  Tokenizer stream = factory.create(newAttributeFactory());
  stream.setReader(reader);
  assertTokenStreamContents(stream,
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี",
      "This", "is", "a", "test", "ກວ່າ", "ດອກ"});
}
 
Example #25
Source File: TestSuggestStopFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInform() throws Exception {
  ResourceLoader loader = new ClasspathResourceLoader(getClass());
  assertTrue("loader is null and it shouldn't be", loader != null);
  SuggestStopFilterFactory factory = createFactory(
      "words", "stop-1.txt",
      "ignoreCase", "true");
  CharArraySet words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = createFactory("words", "stop-1.txt, stop-2.txt",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertTrue("words is null and it shouldn't be", words != null);
  assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
  assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);

  factory = createFactory("words", "stop-snowball.txt",
      "format", "snowball",
      "ignoreCase", "true");
  words = factory.getStopWords();
  assertEquals(8, words.size());
  assertTrue(words.contains("he"));
  assertTrue(words.contains("him"));
  assertTrue(words.contains("his"));
  assertTrue(words.contains("himself"));
  assertTrue(words.contains("she"));
  assertTrue(words.contains("her"));
  assertTrue(words.contains("hers"));
  assertTrue(words.contains("herself"));

  // defaults
  factory = createFactory();
  assertEquals(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
  assertEquals(false, factory.isIgnoreCase());
}
 
Example #26
Source File: TestOpenNLPChunkerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBasic() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      SENTENCES_chunks, null, null, true);
}
 
Example #27
Source File: TestOpenNLPChunkerFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testPayloads() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
      .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
      null, null, null, true, toPayloads(SENTENCES_chunks));
}
 
Example #28
Source File: TestOpenNLPPOSFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBasic() throws IOException {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
}
 
Example #29
Source File: TestOpenNLPPOSFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testNoBreak() throws Exception {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
      .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
      .build();
  assertAnalyzesTo(analyzer, NO_BREAK, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets,
      null, null, null, true);
}
 
Example #30
Source File: TestOpenNLPTokenizerFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testTokenizer() throws IOException {
  CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
      .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin", "tokenizerModel", "en-test-tokenizer.bin")
      .build();
  assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
  assertAnalyzesTo(analyzer, SENTENCE1, SENTENCE1_punc);
}