Java Code Examples for org.apache.lucene.util.automaton.Automaton#createState()

The following examples show how to use org.apache.lucene.util.automaton.Automaton#createState() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDuelingAnalyzers.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  Automaton single = new Automaton();
  int initial = single.createState();
  int accept = single.createState();
  single.setAccept(accept, true);

  // build an automaton matching this jvm's letter definition
  for (int i = 0; i <= 0x10FFFF; i++) {
    if (Character.isLetter(i)) {
      single.addTransition(initial, accept, i);
    }
  }
  Automaton repeat = Operations.repeat(single);
  jvmLetter = new CharacterRunAutomaton(repeat);
}
 
Example 2
Source File: TestSimplePatternTokenizer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testNotDeterminized() throws Exception {
  Automaton a = new Automaton();
  int start = a.createState();
  int mid1 = a.createState();
  int mid2 = a.createState();
  int end = a.createState();
  a.setAccept(end, true);
  a.addTransition(start, mid1, 'a', 'z');
  a.addTransition(start, mid2, 'a', 'z');
  a.addTransition(mid1, end, 'b');
  a.addTransition(mid2, end, 'b');
  expectThrows(IllegalArgumentException.class, () -> {new SimplePatternTokenizer(a);});
}
 
Example 3
Source File: TestSimplePatternSplitTokenizer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testNotDeterminized() throws Exception {
  Automaton a = new Automaton();
  int start = a.createState();
  int mid1 = a.createState();
  int mid2 = a.createState();
  int end = a.createState();
  a.setAccept(end, true);
  a.addTransition(start, mid1, 'a', 'z');
  a.addTransition(start, mid2, 'a', 'z');
  a.addTransition(mid1, end, 'b');
  a.addTransition(mid2, end, 'b');
  expectThrows(IllegalArgumentException.class, () -> {new SimplePatternSplitTokenizer(a);});
}
 
Example 4
Source File: TermAutomatonQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Call this once you are done adding states/transitions.
 * @param maxDeterminizedStates Maximum number of states created when
 *   determinizing the automaton.  Higher numbers allow this operation to
 *   consume more memory but allow more complex automatons.
 */
public void finish(int maxDeterminizedStates) {
  Automaton automaton = builder.finish();

  // System.out.println("before det:\n" + automaton.toDot());

  Transition t = new Transition();

  // TODO: should we add "eps back to initial node" for all states,
  // and det that?  then we don't need to revisit initial node at
  // every position?  but automaton could blow up?  And, this makes it
  // harder to skip useless positions at search time?

  if (anyTermID != -1) {

    // Make sure there are no leading or trailing ANY:
    int count = automaton.initTransition(0, t);
    for(int i=0;i<count;i++) {
      automaton.getNextTransition(t);
      if (anyTermID >= t.min && anyTermID <= t.max) {
        throw new IllegalStateException("automaton cannot lead with an ANY transition");
      }
    }

    int numStates = automaton.getNumStates();
    for(int i=0;i<numStates;i++) {
      count = automaton.initTransition(i, t);
      for(int j=0;j<count;j++) {
        automaton.getNextTransition(t);
        if (automaton.isAccept(t.dest) && anyTermID >= t.min && anyTermID <= t.max) {
          throw new IllegalStateException("automaton cannot end with an ANY transition");
        }
      }
    }

    int termCount = termToID.size();

    // We have to carefully translate these transitions so automaton
    // realizes they also match all other terms:
    Automaton newAutomaton = new Automaton();
    for(int i=0;i<numStates;i++) {
      newAutomaton.createState();
      newAutomaton.setAccept(i, automaton.isAccept(i));
    }

    for(int i=0;i<numStates;i++) {
      count = automaton.initTransition(i, t);
      for(int j=0;j<count;j++) {
        automaton.getNextTransition(t);
        int min, max;
        if (t.min <= anyTermID && anyTermID <= t.max) {
          // Match any term
          min = 0;
          max = termCount-1;
        } else {
          min = t.min;
          max = t.max;
        }
        newAutomaton.addTransition(t.source, t.dest, min, max);
      }
    }
    newAutomaton.finishState();
    automaton = newAutomaton;
  }

  det = Operations.removeDeadStates(Operations.determinize(automaton,
    maxDeterminizedStates));

  if (det.isAccept(0)) {
    throw new IllegalStateException("cannot accept the empty string");
  }
}