org.apache.lucene.util.automaton.ByteRunAutomaton Java Examples

The following examples show how to use org.apache.lucene.util.automaton.ByteRunAutomaton. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestFuzzyQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testVisitor() {
  FuzzyQuery q = new FuzzyQuery(new Term("field", "blob"), 2);
  AtomicBoolean visited = new AtomicBoolean(false);
  q.visit(new QueryVisitor() {
    @Override
    public void consumeTermsMatching(Query query, String field, Supplier<ByteRunAutomaton> automaton) {
      visited.set(true);
      ByteRunAutomaton a = automaton.get();
      assertMatches(a, "blob");
      assertMatches(a, "bolb");
      assertMatches(a, "blobby");
      assertNoMatches(a, "bolbby");
    }
  });
  assertTrue(visited.get());
}
 
Example #2
Source File: LabelledCharArrayMatcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a representation of the automaton that matches char[] instead of byte[]
 */
static LabelledCharArrayMatcher wrap(String label, ByteRunAutomaton runAutomaton) {
    return wrap(label, (chars, offset, length) -> {
      int state = 0;
      final int maxIdx = offset + length;
      for (int i = offset; i < maxIdx; i++) {
        final int code = chars[i];
        int b;
        // UTF16 to UTF8   (inlined logic from UnicodeUtil.UTF16toUTF8 )
        if (code < 0x80) {
          state = runAutomaton.step(state, code);
          if (state == -1) return false;
        } else if (code < 0x800) {
          b = (0xC0 | (code >> 6));
          state = runAutomaton.step(state, b);
          if (state == -1) return false;
          b = (0x80 | (code & 0x3F));
          state = runAutomaton.step(state, b);
          if (state == -1) return false;
        } else {
          // more complex
          byte[] utf8Bytes = new byte[4 * (maxIdx - i)];
          int utf8Len = UnicodeUtil.UTF16toUTF8(chars, i, maxIdx - i, utf8Bytes);
          for (int utfIdx = 0; utfIdx < utf8Len; utfIdx++) {
            state = runAutomaton.step(state, utf8Bytes[utfIdx] & 0xFF);
            if (state == -1) return false;
          }
          break;
        }
      }
      return runAutomaton.isAccept(state);
    });
}
 
Example #3
Source File: TermInSetQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private ByteRunAutomaton asByteRunAutomaton() {
  TermIterator iterator = termData.iterator();
  List<Automaton> automata = new ArrayList<>();
  for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
    automata.add(Automata.makeBinary(term));
  }
  return new CompiledAutomaton(Operations.union(automata)).runAutomaton;

}
 
Example #4
Source File: IncludeExclude.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
private AutomatonBackedStringFilter(Automaton automaton) {
    this.runAutomaton = new ByteRunAutomaton(automaton);
}
 
Example #5
Source File: MultiTermHighlighting.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void consumeTermsMatching(Query query, String field, Supplier<ByteRunAutomaton> automaton) {
  runAutomata.add(LabelledCharArrayMatcher.wrap(query.toString(), automaton.get()));
}
 
Example #6
Source File: TestFuzzyQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static void assertMatches(ByteRunAutomaton automaton, String text) {
  BytesRef b = new BytesRef(text);
  assertTrue(automaton.run(b.bytes, b.offset, b.length));
}
 
Example #7
Source File: TestFuzzyQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static void assertNoMatches(ByteRunAutomaton automaton, String text) {
  BytesRef b = new BytesRef(text);
  assertFalse(automaton.run(b.bytes, b.offset, b.length));
}
 
Example #8
Source File: QueryVisitor.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Called by leaf queries that match on a class of terms
 *
 * @param query     the leaf query
 * @param field     the field queried against
 * @param automaton a supplier for an automaton defining which terms match
 *
 * @lucene.experimental
 */
public void consumeTermsMatching(Query query, String field, Supplier<ByteRunAutomaton> automaton) {
  visitLeaf(query); // default impl for backward compatibility
}