org.apache.lucene.analysis.MockTokenizer Java Examples

The following examples show how to use org.apache.lucene.analysis.MockTokenizer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSynonymMapFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testDontKeepOrig() throws Exception {
  b = new SynonymMap.Builder(true);
  add("a b", "foo", false);

  final SynonymMap map = b.build();

  final Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
      return new TokenStreamComponents(tokenizer, new SynonymFilter(tokenizer, map, false));
    }
  };

  assertAnalyzesTo(analyzer, "a b c",
                   new String[] {"foo", "c"},
                   new int[] {0, 4},
                   new int[] {3, 5},
                   null,
                   new int[] {1, 1},
                   new int[] {1, 1},
                   true);
  checkAnalysisConsistency(random(), analyzer, false, "a b c");
  analyzer.close();
}
 
Example #2
Source File: TestICUNormalizer2CharFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testMassiveLigature() throws IOException {
  String input = "\uFDFA";

  CharFilter reader = new ICUNormalizer2CharFilter(new StringReader(input),
    Normalizer2.getInstance(null, "nfkc_cf", Normalizer2.Mode.COMPOSE));

  Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  tokenStream.setReader(reader);

  assertTokenStreamContents(tokenStream,
    new String[] {"صلى", "الله", "عليه", "وسلم"},
    new int[]{0, 0, 0, 0},
    new int[]{0, 0, 0, 1},
    input.length()
  );
}
 
Example #3
Source File: TestPayloadCheckQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  Analyzer simplePayloadAnalyzer = new Analyzer() {
      @Override
      public TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
        return new TokenStreamComponents(tokenizer, new SimplePayloadFilter(tokenizer));
      }
  };

  directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
      newIndexWriterConfig(simplePayloadAnalyzer)
          .setMaxBufferedDocs(TestUtil.nextInt(random(), 100, 1000)).setMergePolicy(newLogMergePolicy()));
  //writer.infoStream = System.out;
  for (int i = 0; i < 2000; i++) {
    Document doc = new Document();
    doc.add(newTextField("field", English.intToEnglish(i), Field.Store.YES));
    writer.addDocument(doc);
  }
  reader = writer.getReader();
  searcher = newSearcher(reader);
  writer.close();
}
 
Example #4
Source File: TestCompoundWordTokenFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testReset() throws Exception {
  CharArraySet dict = makeDictionary("Rind", "Fleisch", "Draht", "Schere", "Gesetz",
      "Aufgabe", "Überwachung");

  MockTokenizer wsTokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  wsTokenizer.setEnableChecks(false); // we will reset in a strange place
  wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
  DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
      wsTokenizer, dict,
      CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
      CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
  
  CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
  tf.reset();
  assertTrue(tf.incrementToken());
  assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
  assertTrue(tf.incrementToken());
  assertEquals("Rind", termAtt.toString());
  tf.end();
  tf.close();
  wsTokenizer.setReader(new StringReader("Rindfleischüberwachungsgesetz"));
  tf.reset();
  assertTrue(tf.incrementToken());
  assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
}
 
Example #5
Source File: TestPhraseQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPhraseQueryWithStopAnalyzer() throws Exception {
  Directory directory = newDirectory();
  Analyzer stopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(stopAnalyzer));
  Document doc = new Document();
  doc.add(newTextField("field", "the stop words are here", Field.Store.YES));
  writer.addDocument(doc);
  IndexReader reader = writer.getReader();
  writer.close();

  IndexSearcher searcher = newSearcher(reader);

  // valid exact phrase query
  PhraseQuery query = new PhraseQuery("field", "stop", "words");
  ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
  assertEquals(1, hits.length);
  QueryUtils.check(random(), query,searcher);

  reader.close();
  directory.close();
}
 
Example #6
Source File: TestRemoveDuplicatesTokenFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
  final int numIters = atLeast(3);
  for (int i = 0; i < numIters; i++) {
    SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
    final int numEntries = atLeast(10);
    for (int j = 0; j < numEntries; j++) {
      add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
    }
    final SynonymMap map = b.build();
    final boolean ignoreCase = random().nextBoolean();
    
    final Analyzer analyzer = new Analyzer() {
      @Override
      protected TokenStreamComponents createComponents(String fieldName) {
        Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
        TokenStream stream = new SynonymGraphFilter(tokenizer, map, ignoreCase);
        return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
      }
    };

    checkRandomData(random(), analyzer, 200);
    analyzer.close();
  }
}
 
Example #7
Source File: TestUniqueTermCount.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  dir = newDirectory();
  MockAnalyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  IndexWriterConfig config = newIndexWriterConfig(analyzer);
  config.setMergePolicy(newLogMergePolicy());
  config.setSimilarity(new TestSimilarity());
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  Document doc = new Document();
  Field foo = newTextField("foo", "", Field.Store.NO);
  doc.add(foo);
  for (int i = 0; i < 100; i++) {
    foo.setStringValue(addValue());
    writer.addDocument(doc);
  }
  reader = writer.getReader();
  writer.close();
}
 
Example #8
Source File: TestMaxTermFrequency.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true))
                               .setMergePolicy(newLogMergePolicy());
  config.setSimilarity(new TestSimilarity());
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  Document doc = new Document();
  Field foo = newTextField("foo", "", Field.Store.NO);
  doc.add(foo);
  for (int i = 0; i < 100; i++) {
    foo.setStringValue(addValue());
    writer.addDocument(doc);
  }
  reader = writer.getReader();
  writer.close();
}
 
Example #9
Source File: QueryParserTestBase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testStopwords() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
  Query result = getQuery("field:the OR field:foo",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery || result instanceof MatchNoDocsQuery);
  if (result instanceof BooleanQuery) {
    assertEquals(0, ((BooleanQuery) result).clauses().size());
  }
  result = getQuery("field:woo OR field:the",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BoostQuery", result instanceof BoostQuery);
  result = ((BoostQuery) result).getQuery();
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE) System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
}
 
Example #10
Source File: TestBeiderMorseFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCustomAttribute() throws IOException {
  TokenStream stream = new MockTokenizer(MockTokenizer.KEYWORD, false);
  ((Tokenizer)stream).setReader(new StringReader("D'Angelo"));
  stream = new PatternKeywordMarkerFilter(stream, Pattern.compile(".*"));
  stream = new BeiderMorseFilter(stream, new PhoneticEngine(NameType.GENERIC, RuleType.EXACT, true));
  KeywordAttribute keyAtt = stream.addAttribute(KeywordAttribute.class);
  stream.reset();
  int i = 0;
  while(stream.incrementToken()) {
    assertTrue(keyAtt.isKeyword());
    i++;
  }
  assertEquals(12, i);
  stream.end();
  stream.close();
}
 
Example #11
Source File: TestQueryParser.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testFuzzySlopeExtendability() throws ParseException {
  QueryParser qp = new QueryParser("a",  new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)) {

    @Override
    Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage)
        throws ParseException {
      
      if(fuzzySlop.image.endsWith("€")) {
        float fms = fuzzyMinSim;
        try {
          fms = Float.parseFloat(fuzzySlop.image.substring(1, fuzzySlop.image.length()-1));
        } catch (Exception ignored) { }
        float value = Float.parseFloat(termImage);
        return getRangeQuery(qfield, Float.toString(value-fms/2.f), Float.toString(value+fms/2.f), true, true);
      }
      return super.handleBareFuzzy(qfield, fuzzySlop, termImage);
    }
    
  };
  assertEquals(qp.parse("a:[11.95 TO 12.95]"), qp.parse("12.45~1€"));
}
 
Example #12
Source File: TestDirectSpellChecker.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testBogusField() throws Exception {
  DirectSpellChecker spellChecker = new DirectSpellChecker();
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, analyzer);

  for (int i = 0; i < 20; i++) {
    Document doc = new Document();
    doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO));
    writer.addDocument(doc);
  }

  IndexReader ir = writer.getReader();

  SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
      "bogusFieldBogusField", "fvie"), 2, ir,
      SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
  assertEquals(0, similar.length);
  
  IOUtils.close(ir, writer, dir, analyzer);
}
 
Example #13
Source File: MockTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Creates a new MockTokenizerFactory */
public MockTokenizerFactory(Map<String,String> args) {
  super(args);
  String patternArg = get(args, "pattern", Arrays.asList("keyword", "simple", "whitespace"));
  if ("keyword".equalsIgnoreCase(patternArg)) {
    pattern = MockTokenizer.KEYWORD;
  } else if ("simple".equalsIgnoreCase(patternArg)) {
    pattern = MockTokenizer.SIMPLE;
  } else {
    pattern = MockTokenizer.WHITESPACE;
  }
  
  enableChecks = getBoolean(args, "enableChecks", true);
  if (!args.isEmpty()) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}
 
Example #14
Source File: TestICUNormalizer2Filter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testAlternate() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    public TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(tokenizer, new ICUNormalizer2Filter(
          tokenizer,
          /* specify nfc with decompose to get nfd */
          Normalizer2.getInstance(null, "nfc", Normalizer2.Mode.DECOMPOSE)));
    }
  };
  
  // decompose EAcute into E + combining Acute
  assertAnalyzesTo(a, "\u00E9", new String[] { "\u0065\u0301" });
  a.close();
}
 
Example #15
Source File: HighlighterTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testMaxSizeEndHighlight() throws Exception {
  TestHighlightRunner helper = new TestHighlightRunner() {
    @Override
    public void run() throws Exception {
      CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
      TermQuery query = new TermQuery(new Term("text", "searchterm"));

      String text = "this is a text with searchterm in it";
      SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
      Highlighter hg = getHighlighter(query, "text", fm);
      hg.setTextFragmenter(new NullFragmenter());
      hg.setMaxDocCharsToAnalyze(36);
      String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
      assertTrue(
          "Matched text should contain remainder of text after highlighted query ",
          match.endsWith("in it"));
    }
  };
  helper.start();
}
 
Example #16
Source File: TestQPHelper.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPositionIncrement() throws Exception {
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(
      new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));

  qp.setEnablePositionIncrements(true);

  String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
  // 0 2 5 7 8
  int expectedPositions[] = { 1, 3, 4, 6, 9 };
  PhraseQuery pq = (PhraseQuery) qp.parse(qtxt, "a");
  // System.out.println("Query text: "+qtxt);
  // System.out.println("Result: "+pq);
  Term t[] = pq.getTerms();
  int pos[] = pq.getPositions();
  for (int i = 0; i < t.length; i++) {
    // System.out.println(i+". "+t[i]+"  pos: "+pos[i]);
    assertEquals("term " + i + " = " + t[i] + " has wrong term-position!",
        expectedPositions[i], pos[i]);
  }
}
 
Example #17
Source File: TestCompoundWordTokenFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testTokenEndingWithWordComponentOfMinimumLength() throws Exception {
  CharArraySet dict = makeDictionary("ab", "cd", "ef");

  Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  tokenizer.setReader(new StringReader("abcdef"));
  DictionaryCompoundWordTokenFilter tf = new DictionaryCompoundWordTokenFilter(
    tokenizer,
    dict,
    CompoundWordTokenFilterBase.DEFAULT_MIN_WORD_SIZE,
    CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
    CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);

  assertTokenStreamContents(tf,
    new String[] { "abcdef", "ab", "cd", "ef" },
    new int[] { 0, 0, 0, 0},
    new int[] { 6, 6, 6, 6},
    new int[] { 1, 0, 0, 0}
    );
}
 
Example #18
Source File: TestPortugueseLightStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testKeyword() throws IOException {
  final CharArraySet exclusionSet = new CharArraySet( asSet("quilométricas"), false);
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
      TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
      return new TokenStreamComponents(source, new PortugueseLightStemFilter(sink));
    }
  };
  checkOneTerm(a, "quilométricas", "quilométricas");
  a.close();
}
 
Example #19
Source File: CommonGramsFilterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Test CommonGramsQueryFilter in the case of a single word query
 */
public void testOneWordQuery() throws Exception {
  final String input = "monster";
  MockTokenizer wt = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  wt.setReader(new StringReader(input));
  CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords);
  TokenFilter nsf = new CommonGramsQueryFilter(cgf);
  assertTokenStreamContents(nsf, new String[] { "monster" });
}
 
Example #20
Source File: TestFingerprintFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMaxFingerprintSize() throws Exception {
  for (final boolean consumeAll : new boolean[] { true, false }) {
    MockTokenizer tokenizer = whitespaceMockTokenizer("B2 A1 C3 D4 E5 F6 G7 H1");
    tokenizer.setEnableChecks(consumeAll);
    TokenStream stream = new FingerprintFilter(tokenizer, 4, ' ');
    assertTokenStreamContents(stream, new String[] {});
  }
}
 
Example #21
Source File: TestConcatenateGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithSynonyms() throws Exception {
  SynonymMap.Builder builder = new SynonymMap.Builder(true);
  builder.add(new CharsRef("mykeyword"), new CharsRef("mysynonym"), true);
  Tokenizer tokenStream = new MockTokenizer(MockTokenizer.WHITESPACE, true);
  String input = "mykeyword another keyword";
  tokenStream.setReader(new StringReader(input));
  @SuppressWarnings("deprecation")
  SynonymFilter filter = new SynonymFilter(tokenStream, builder.build(), true);
  ConcatenateGraphFilter stream = new ConcatenateGraphFilter(filter, SEP_LABEL, false, 100);
  String[] expectedOutputs = new String[2];
  CharsRefBuilder expectedOutput = new CharsRefBuilder();
  expectedOutput.append("mykeyword");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[0] = expectedOutput.toCharsRef().toString();
  expectedOutput.clear();
  expectedOutput.append("mysynonym");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("another");
  expectedOutput.append(SEP_LABEL);
  expectedOutput.append("keyword");
  expectedOutputs[1] = expectedOutput.toCharsRef().toString();
  assertTokenStreamContents(stream, expectedOutputs, null, null, new int[]{1, 0});
}
 
Example #22
Source File: TestFingerprintFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingleToken() throws Exception {
  for (final boolean consumeAll : new boolean[] { true, false }) {
    MockTokenizer tokenizer = whitespaceMockTokenizer("A1");
    tokenizer.setEnableChecks(consumeAll);
    TokenStream stream = new FingerprintFilter(tokenizer);
    assertTokenStreamContents(stream, new String[] { "A1" });
  }
}
 
Example #23
Source File: AnalyzingInfixSuggesterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testHighlightCaseChange() throws Exception {
  Input keys[] = new Input[] {
    new Input("a Penny saved is a penny earned", 10, new BytesRef("foobaz")),
  };

  Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true);
  AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
  suggester.build(new InputArrayIterator(keys));
  List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
  assertEquals(1, results.size());
  assertEquals("a Penny saved is a penny earned", results.get(0).key);
  assertEquals("a <b>Penn</b>y saved is a <b>penn</b>y earned", results.get(0).highlightKey);
  suggester.close();

  // Try again, but overriding addPrefixMatch to highlight
  // the entire hit:
  suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false) {
      @Override
      protected void addPrefixMatch(StringBuilder sb, String surface, String analyzed, String prefixToken) {
        sb.append("<b>");
        sb.append(surface);
        sb.append("</b>");
      }
    };
  suggester.build(new InputArrayIterator(keys));
  results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
  assertEquals(1, results.size());
  assertEquals("a Penny saved is a penny earned", results.get(0).key);
  assertEquals("a <b>Penny</b> saved is a <b>penny</b> earned", results.get(0).highlightKey);
  suggester.close();
  a.close();
}
 
Example #24
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testStopwordsPosIncHole() throws Exception {
  Directory dir = newDirectory();
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new MockTokenizer();
      TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
      return new TokenStreamComponents(tokenizer, stream);
    }
  };
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
  Document doc = new Document();
  doc.add(new TextField("body", "just a", Field.Store.NO));
  doc.add(new TextField("body", "test of gaps", Field.Store.NO));
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.add(new Term("body", "just"), 0);
  builder.add(new Term("body", "test"), 2);
  PhraseQuery pq = builder.build();
  // body:"just ? test"
  assertEquals(1, is.search(pq, 5).totalHits.value);
  ir.close();
  dir.close();
}
 
Example #25
Source File: TestPortugueseMinimalStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer source = new MockTokenizer(MockTokenizer.SIMPLE, true);
      return new TokenStreamComponents(source, new PortugueseMinimalStemFilter(source));
    }
  };
}
 
Example #26
Source File: TestFuzzyQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void test2() throws Exception {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
  addDoc("LANGE", writer);
  addDoc("LUETH", writer);
  addDoc("PIRSING", writer);
  addDoc("RIEGEL", writer);
  addDoc("TRZECZIAK", writer);
  addDoc("WALKER", writer);
  addDoc("WBR", writer);
  addDoc("WE", writer);
  addDoc("WEB", writer);
  addDoc("WEBE", writer);
  addDoc("WEBER", writer);
  addDoc("WEBERE", writer);
  addDoc("WEBREE", writer);
  addDoc("WEBEREI", writer);
  addDoc("WBRE", writer);
  addDoc("WITTKOPF", writer);
  addDoc("WOJNAROWSKI", writer);
  addDoc("WRICKE", writer);

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  writer.close();

  FuzzyQuery query = new FuzzyQuery(new Term("field", "WEBER"), 2, 1);
  //query.setRewriteMethod(FuzzyQuery.SCORING_BOOLEAN_QUERY_REWRITE);
  ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
  assertEquals(8, hits.length);

  reader.close();
  directory.close();
}
 
Example #27
Source File: AnalyzingInfixSuggesterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testNRTWithParallelAdds() throws IOException, InterruptedException {
  String[] keys = new String[] {"python", "java", "c", "scala", "ruby", "clojure", "erlang", "go", "swift", "lisp"};
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
  Path tempDir = createTempDir("AIS_NRT_PERSIST_TEST");
  AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
  Thread[] multiAddThreads = new Thread[10];
  // Cannot call refresh on an suggester when no docs are added to the index
  expectThrows(IllegalStateException.class, () -> {
    suggester.refresh();
  });

  for(int i=0; i<10; i++) {
    multiAddThreads[i] = new Thread(new IndexDocument(suggester, keys[i]));
  }
  for(int i=0; i<10; i++) {
    multiAddThreads[i].start();
  }
  //Make sure all threads have completed indexing
  for(int i=0; i<10; i++) {
    multiAddThreads[i].join();
  }

  suggester.refresh();
  List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("python", random()), 10, true, false);
  assertEquals(1, results.size());
  assertEquals("python", results.get(0).key);

  //Test if the index is getting persisted correctly and can be reopened.
  suggester.commit();
  suggester.close();

  AnalyzingInfixSuggester suggester2 = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
  results = suggester2.lookup(TestUtil.stringToCharSequence("python", random()), 10, true, false);
  assertEquals(1, results.size());
  assertEquals("python", results.get(0).key);

  suggester2.close();
  a.close();
}
 
Example #28
Source File: TestNorwegianLightStemFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Test against a Nynorsk vocabulary file */
public void testNynorskVocabulary() throws IOException {  
  Analyzer analyzer = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
      return new TokenStreamComponents(source, new NorwegianLightStemFilter(source, NYNORSK));
    }
  };
  assertVocabulary(analyzer, Files.newInputStream(getDataPath("nn_light.txt")));
  analyzer.close();
}
 
Example #29
Source File: QueryParserTestBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * This test differs from TestPrecedenceQueryParser
 */
public void testPrecedence() throws Exception {
  CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
  Query query1 = getQuery("A AND B OR C AND D", qp);
  Query query2 = getQuery("+A +B +C +D", qp);
  assertEquals(query1, query2);
}
 
Example #30
Source File: TestGermanMinimalStemFilterFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testStemming() throws Exception {
  Reader reader = new StringReader("bilder");
  TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
  ((Tokenizer)stream).setReader(reader);
  stream = tokenFilterFactory("GermanMinimalStem").create(stream);
  assertTokenStreamContents(stream, new String[] { "bild" });
}