org.apache.lucene.util.automaton.Transition Java Examples

The following examples show how to use org.apache.lucene.util.automaton.Transition. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: XAnalyzingSuggester.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private int[] topoSortStates(Automaton a) {
  int[] states = new int[a.getNumStates()];
  final Set<Integer> visited = new HashSet<>();
  final LinkedList<Integer> worklist = new LinkedList<>();
  worklist.add(0);
  visited.add(0);
  int upto = 0;
  states[upto] = 0;
  upto++;
  Transition t = new Transition();
  while (worklist.size() > 0) {
    int s = worklist.removeFirst();
    int count = a.initTransition(s, t);
    for (int i=0;i<count;i++) {
      a.getNextTransition(t);
      if (!visited.contains(t.dest)) {
        visited.add(t.dest);
        worklist.add(t.dest);
        states[upto++] = t.dest;
      }
    }
  }
  return states;
}
 
Example #2
Source File: TestSynonymGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Renumbers nodes according to their topo sort */
private Automaton topoSort(Automaton in) {
  int[] newToOld = Operations.topoSortStates(in);
  int[] oldToNew = new int[newToOld.length];

  Automaton.Builder a = new Automaton.Builder();
  //System.out.println("remap:");
  for(int i=0;i<newToOld.length;i++) {
    a.createState();
    oldToNew[newToOld[i]] = i;
    //System.out.println("  " + newToOld[i] + " -> " + i);
    if (in.isAccept(newToOld[i])) {
      a.setAccept(i, true);
      //System.out.println("    **");
    }
  }

  Transition t = new Transition();
  for(int i=0;i<newToOld.length;i++) {
    int count = in.initTransition(newToOld[i], t);
    for(int j=0;j<count;j++) {
      in.getNextTransition(t);
      a.addTransition(i, oldToNew[t.dest], t.min, t.max);
    }
  }

  return a.finish();
}
 
Example #3
Source File: GraphTokenStreamFiniteStrings.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static void articulationPointsRecurse(Automaton a, int state, int d, int[] depth, int[] low, int[] parent,
                                              BitSet visited, List<Integer> points) {
  visited.set(state);
  depth[state] = d;
  low[state] = d;
  int childCount = 0;
  boolean isArticulation = false;
  Transition t = new Transition();
  int numT = a.initTransition(state, t);
  for (int i = 0; i < numT; i++) {
    a.getNextTransition(t);
    if (visited.get(t.dest) == false) {
      parent[t.dest] = state;
      articulationPointsRecurse(a, t.dest, d + 1, depth, low, parent, visited, points);
      childCount++;
      if (low[t.dest] >= depth[state]) {
        isArticulation = true;
      }
      low[state] = Math.min(low[state], low[t.dest]);
    } else if (t.dest != parent[state]) {
      low[state] = Math.min(low[state], depth[t.dest]);
    }
  }
  if ((parent[state] != -1 && isArticulation) || (parent[state] == -1 && childCount > 1)) {
    points.add(state);
  }
}
 
Example #4
Source File: TermAutomatonQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Call this once you are done adding states/transitions.
 * @param maxDeterminizedStates Maximum number of states created when
 *   determinizing the automaton.  Higher numbers allow this operation to
 *   consume more memory but allow more complex automatons.
 */
public void finish(int maxDeterminizedStates) {
  Automaton automaton = builder.finish();

  // System.out.println("before det:\n" + automaton.toDot());

  Transition t = new Transition();

  // TODO: should we add "eps back to initial node" for all states,
  // and det that?  then we don't need to revisit initial node at
  // every position?  but automaton could blow up?  And, this makes it
  // harder to skip useless positions at search time?

  if (anyTermID != -1) {

    // Make sure there are no leading or trailing ANY:
    int count = automaton.initTransition(0, t);
    for(int i=0;i<count;i++) {
      automaton.getNextTransition(t);
      if (anyTermID >= t.min && anyTermID <= t.max) {
        throw new IllegalStateException("automaton cannot lead with an ANY transition");
      }
    }

    int numStates = automaton.getNumStates();
    for(int i=0;i<numStates;i++) {
      count = automaton.initTransition(i, t);
      for(int j=0;j<count;j++) {
        automaton.getNextTransition(t);
        if (automaton.isAccept(t.dest) && anyTermID >= t.min && anyTermID <= t.max) {
          throw new IllegalStateException("automaton cannot end with an ANY transition");
        }
      }
    }

    int termCount = termToID.size();

    // We have to carefully translate these transitions so automaton
    // realizes they also match all other terms:
    Automaton newAutomaton = new Automaton();
    for(int i=0;i<numStates;i++) {
      newAutomaton.createState();
      newAutomaton.setAccept(i, automaton.isAccept(i));
    }

    for(int i=0;i<numStates;i++) {
      count = automaton.initTransition(i, t);
      for(int j=0;j<count;j++) {
        automaton.getNextTransition(t);
        int min, max;
        if (t.min <= anyTermID && anyTermID <= t.max) {
          // Match any term
          min = 0;
          max = termCount-1;
        } else {
          min = t.min;
          max = t.max;
        }
        newAutomaton.addTransition(t.source, t.dest, min, max);
      }
    }
    newAutomaton.finishState();
    automaton = newAutomaton;
  }

  det = Operations.removeDeadStates(Operations.determinize(automaton,
    maxDeterminizedStates));

  if (det.isAccept(0)) {
    throw new IllegalStateException("cannot accept the empty string");
  }
}
 
Example #5
Source File: TermAutomatonQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public Query rewrite(IndexReader reader) throws IOException {
  if (Operations.isEmpty(det)) {
    return new MatchNoDocsQuery();
  }

  IntsRef single = Operations.getSingleton(det);
  if (single != null && single.length == 1) {
    return new TermQuery(new Term(field, idToTerm.get(single.ints[single.offset])));
  }

  // TODO: can PhraseQuery really handle multiple terms at the same position?  If so, why do we even have MultiPhraseQuery?
  
  // Try for either PhraseQuery or MultiPhraseQuery, which only works when the automaton is a sausage:
  MultiPhraseQuery.Builder mpq = new MultiPhraseQuery.Builder();
  PhraseQuery.Builder pq = new PhraseQuery.Builder();

  Transition t = new Transition();
  int state = 0;
  int pos = 0;
  query:
  while (true) {
    int count = det.initTransition(state, t);
    if (count == 0) {
      if (det.isAccept(state) == false) {
        mpq = null;
        pq = null;
      }
      break;
    } else if (det.isAccept(state)) {
      mpq = null;
      pq = null;
      break;
    }
    int dest = -1;
    List<Term> terms = new ArrayList<>();
    boolean matchesAny = false;
    for(int i=0;i<count;i++) {
      det.getNextTransition(t);
      if (i == 0) {
        dest = t.dest;
      } else if (dest != t.dest) {
        mpq = null;
        pq = null;
        break query;
      }

      matchesAny |= anyTermID >= t.min && anyTermID <= t.max;

      if (matchesAny == false) {
        for(int termID=t.min;termID<=t.max;termID++) {
          terms.add(new Term(field, idToTerm.get(termID)));
        }
      }
    }
    if (matchesAny == false) {
      mpq.add(terms.toArray(new Term[terms.size()]), pos);
      if (pq != null) {
        if (terms.size() == 1) {
          pq.add(terms.get(0), pos);
        } else {
          pq = null;
        }
      }
    }
    state = dest;
    pos++;
  }

  if (pq != null) {
    return pq.build();
  } else if (mpq != null) {
    return mpq.build();
  }
  
  // TODO: we could maybe also rewrite to union of PhraseQuery (pull all finite strings) if it's "worth it"?
  return this;
}