Java Code Examples for org.apache.lucene.util.automaton.Automaton#Builder

The following examples show how to use org.apache.lucene.util.automaton.Automaton#Builder . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestSynonymGraphFilter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Just creates a side path from startState to endState with the provided tokens. */
private static void addSidePath(Automaton.Builder a, int startState, int endState, char[] tokens, List<Integer> flatStates) {
  int lastState = startState;
  for(int i=0;i<tokens.length;i++) {
    int nextState;
    if (i == tokens.length-1) {
      nextState = endState;
    } else if (flatStates == null || i >= flatStates.size()) {
      nextState = a.createState();
      if (flatStates != null) {
        assert i == flatStates.size();
        flatStates.add(nextState);
      }
    } else {
      nextState = flatStates.get(i);
    }
    a.addTransition(lastState, nextState, tokens[i]);

    lastState = nextState;
  }
}
 
Example 2
Source File: GraphTokenStreamFiniteStrings.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Returns the articulation points (or cut vertices) of the graph:
 * https://en.wikipedia.org/wiki/Biconnected_component
 */
public int[] articulationPoints() {
  if (det.getNumStates() == 0) {
    return new int[0];
  }
  //
  Automaton.Builder undirect = new Automaton.Builder();
  undirect.copy(det);
  for (int i = 0; i < det.getNumStates(); i++) {
    int numT = det.initTransition(i, transition);
    for (int j = 0; j < numT; j++) {
      det.getNextTransition(transition);
      undirect.addTransition(transition.dest, i, transition.min);
    }
  }
  int numStates = det.getNumStates();
  BitSet visited = new BitSet(numStates);
  int[] depth = new int[det.getNumStates()];
  int[] low = new int[det.getNumStates()];
  int[] parent = new int[det.getNumStates()];
  Arrays.fill(parent, -1);
  List<Integer> points = new ArrayList<>();
  articulationPointsRecurse(undirect.finish(), 0, 0, depth, low, parent, visited, points);
  Collections.reverse(points);
  return points.stream().mapToInt(p -> p).toArray();
}
 
Example 3
Source File: TestSynonymGraphFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Renumbers nodes according to their topo sort */
private Automaton topoSort(Automaton in) {
  int[] newToOld = Operations.topoSortStates(in);
  int[] oldToNew = new int[newToOld.length];

  Automaton.Builder a = new Automaton.Builder();
  //System.out.println("remap:");
  for(int i=0;i<newToOld.length;i++) {
    a.createState();
    oldToNew[newToOld[i]] = i;
    //System.out.println("  " + newToOld[i] + " -> " + i);
    if (in.isAccept(newToOld[i])) {
      a.setAccept(i, true);
      //System.out.println("    **");
    }
  }

  Transition t = new Transition();
  for(int i=0;i<newToOld.length;i++) {
    int count = in.initTransition(newToOld[i], t);
    for(int j=0;j<count;j++) {
      in.getNextTransition(t);
      a.addTransition(i, oldToNew[t.dest], t.min, t.max);
    }
  }

  return a.finish();
}
 
Example 4
Source File: TokenStreamToAutomaton.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static void addHoles(Automaton.Builder builder, RollingBuffer<Position> positions, int pos) {
  Position posData = positions.get(pos);
  Position prevPosData = positions.get(pos-1);

  while(posData.arriving == -1 || prevPosData.leaving == -1) {
    if (posData.arriving == -1) {
      posData.arriving = builder.createState();
      builder.addTransition(posData.arriving, posData.leaving, POS_SEP);
    }
    if (prevPosData.leaving == -1) {
      if (pos == 1) {
        prevPosData.leaving = 0;
      } else {
        prevPosData.leaving = builder.createState();
      }
      if (prevPosData.arriving != -1) {
        builder.addTransition(prevPosData.arriving, prevPosData.leaving, POS_SEP);
      }
    }
    builder.addTransition(prevPosData.leaving, posData.arriving, HOLE);
    pos--;
    if (pos <= 0) {
      break;
    }
    posData = prevPosData;
    prevPosData = positions.get(pos-1);
  }
}
 
Example 5
Source File: TermAutomatonQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public TermAutomatonQuery(String field) {
  this.field = field;
  this.builder = new Automaton.Builder();
}
 
Example 6
Source File: GraphTokenStreamFiniteStrings.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Build an automaton from the provided {@link TokenStream}.
 */
private Automaton build(final TokenStream in) throws IOException {
  Automaton.Builder builder = new Automaton.Builder();

  final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class);
  final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class);

  in.reset();

  int pos = -1;
  int prevIncr = 1;
  int state = -1;
  int id = -1;
  int gap = 0;
  while (in.incrementToken()) {
    int currentIncr = posIncAtt.getPositionIncrement();
    if (pos == -1 && currentIncr < 1) {
      throw new IllegalStateException("Malformed TokenStream, start token can't have increment less than 1");
    }

    if (currentIncr == 0) {
      if (gap > 0) {
        pos -= gap;
      }
    }
    else {
      pos++;
      gap = currentIncr - 1;
    }

    int endPos = pos + posLengthAtt.getPositionLength() + gap;
    while (state < endPos) {
      state = builder.createState();
    }

    id++;
    if (tokens.length < id + 1) {
      tokens = ArrayUtil.grow(tokens, id + 1);
    }

    tokens[id] = in.cloneAttributes();
    builder.addTransition(pos, endPos, id);
    pos += gap;

    // we always produce linear token graphs from getFiniteStrings(), so we need to adjust
    // posLength and posIncrement accordingly
    tokens[id].addAttribute(PositionLengthAttribute.class).setPositionLength(1);
    if (currentIncr == 0) {
      // stacked token should have the same increment as original token at this position
      tokens[id].addAttribute(PositionIncrementAttribute.class).setPositionIncrement(prevIncr);
    }

    // only save last increment on non-zero increment in case we have multiple stacked tokens
    if (currentIncr > 0) {
      prevIncr = currentIncr;
    }
  }

  in.end();
  if (state != -1) {
    builder.setAccept(state, true);
  }
  return builder.finish();
}