Java Code Examples for org.apache.lucene.util.automaton.Operations#concatenate()

The following examples show how to use org.apache.lucene.util.automaton.Operations#concatenate() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: ContextMapping.java From Elasticsearch with Apache License 2.0

6 votes

/**
 * Create a automaton for a given context query this automaton will be used
 * to find the matching paths with the fst
 *
 * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query
 * @param queries list of {@link ContextQuery} defining the lookup context
 *
 * @return Automaton matching the given Query
 */
public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
    Automaton a = Automata.makeEmptyString();

    Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
    if (preserveSep) {
        // if separators are preserved the fst contains a SEP_LABEL
        // behind each gap. To have a matching automaton, we need to
        // include the SEP_LABEL in the query as well
        gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
    }

    for (ContextQuery query : queries) {
        a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
    }

    // TODO: should we limit this?  Do any of our ContextQuery impls really create exponential regexps?  GeoQuery looks safe (union
    // of strings).
    return Operations.determinize(a, Integer.MAX_VALUE);
}

Example 2

Source File: ContextQuery.java From lucene-solr with Apache License 2.0

6 votes

private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
  final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
  final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
  if (matchAllContexts || contexts.size() == 0) {
    return Operations.concatenate(matchAllAutomaton, sep);
  } else {
    Automaton contextsAutomaton = null;
    for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
      final ContextMetaData contextMetaData = entry.getValue();
      final IntsRef ref = entry.getKey();
      Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
      if (contextMetaData.exact == false) {
        contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
      }
      contextAutomaton = Operations.concatenate(contextAutomaton, sep);
      if (contextsAutomaton == null) {
        contextsAutomaton = contextAutomaton;
      } else {
        contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
      }
    }
    return contextsAutomaton;
  }
}

Example 3

Source File: XAnalyzingSuggester.java From Elasticsearch with Apache License 2.0

5 votes

protected Automaton convertAutomaton(Automaton a) {
  if (queryPrefix != null) {
    a = Operations.concatenate(Arrays.asList(queryPrefix, a));
    // This automaton should not blow up during determinize:
    a = Operations.determinize(a, Integer.MAX_VALUE);
  }
  return a;
}

Example 4

Source File: ContextQuery.java From lucene-solr with Apache License 2.0

5 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, scoreMode, boost));
  final Automaton innerAutomaton = innerWeight.getAutomaton();

  // If the inner automaton matches nothing, then we return an empty weight to avoid
  // traversing all contexts during scoring.
  if (innerAutomaton.getNumStates() == 0) {
    return new CompletionWeight(this, innerAutomaton);
  }

  // if separators are preserved the fst contains a SEP_LABEL
  // behind each gap. To have a matching automaton, we need to
  // include the SEP_LABEL in the query as well
  Automaton optionalSepLabel = Operations.optional(Automata.makeChar(ConcatenateGraphFilter.SEP_LABEL));
  Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerAutomaton);
  Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
  contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);

  final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
  final TreeSet<Integer> contextLengths = new TreeSet<>();
  for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
    ContextMetaData contextMetaData = entry.getValue();
    contextMap.put(entry.getKey(), contextMetaData.boost);
    contextLengths.add(entry.getKey().length);
  }
  int[] contextLengthArray = new int[contextLengths.size()];
  final Iterator<Integer> iterator = contextLengths.descendingIterator();
  for (int i = 0; iterator.hasNext(); i++) {
    contextLengthArray[i] = iterator.next();
  }
  return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}

Example 5

Source File: TestAutomatonQuery.java From lucene-solr with Apache License 2.0

5 votes

public void testEquals() {
  AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), Automata
      .makeString("foobar"));
  // reference to a1
  AutomatonQuery a2 = a1;
  // same as a1 (accepts the same language, same term)
  AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"),
                          Operations.concatenate(
                               Automata.makeString("foo"),
                               Automata.makeString("bar")));
  // different than a1 (same term, but different language)
  AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"),
                                         Automata.makeString("different"));
  // different than a1 (different term, same language)
  AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"),
                                         Automata.makeString("foobar"));
  
  assertEquals(a1.hashCode(), a2.hashCode());
  assertEquals(a1, a2);
  
  assertEquals(a1.hashCode(), a3.hashCode());
  assertEquals(a1, a3);

  // different class
  AutomatonQuery w1 = new WildcardQuery(newTerm("foobar"));
  // different class
  AutomatonQuery w2 = new RegexpQuery(newTerm("foobar"));
  
  assertFalse(a1.equals(w1));
  assertFalse(a1.equals(w2));
  assertFalse(w1.equals(w2));
  assertFalse(a1.equals(a4));
  assertFalse(a1.equals(a5));
  assertFalse(a1.equals(null));
}

Example 6

Source File: TestAutomatonQuery.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Test that rewriting to a prefix query works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewritePrefix() throws IOException {
  Automaton pfx = Automata.makeString("do");
  Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString());
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
  assertEquals(3, automatonQueryNrHits(aq));
}

Example 7

Source File: TestGraphTokenizers.java From lucene-solr with Apache License 2.0

5 votes

private Automaton join(String ... strings) {
  List<Automaton> as = new ArrayList<>();
  for(String s : strings) {
    as.add(s2a(s));
    as.add(SEP_A);
  }
  as.remove(as.size()-1);
  return Operations.concatenate(as);
}

Example 8

Source File: TestGraphTokenizers.java From lucene-solr with Apache License 2.0

5 votes

public void testSynOverHole() throws Exception {

    final TokenStream ts = new CannedTokenStream(
      new Token[] {
        token("a", 1, 1),
        token("X", 0, 2),
        token("b", 2, 1),
      });
    final Automaton a1 = Operations.union(join(s2a("a"), SEP_A, HOLE_A), s2a("X"));
    final Automaton expected = Operations.concatenate(a1, join(SEP_A, s2a("b")));
    assertSameLanguage(expected, ts);
  }

Example 9

Source File: Regex.java From crate with Apache License 2.0

5 votes

/** Return an {@link Automaton} that matches the given pattern. */
public static Automaton simpleMatchToAutomaton(String pattern) {
    List<Automaton> automata = new ArrayList<>();
    int previous = 0;
    for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) {
        automata.add(Automata.makeString(pattern.substring(previous, i)));
        automata.add(Automata.makeAnyString());
        previous = i + 1;
    }
    automata.add(Automata.makeString(pattern.substring(previous)));
    return Operations.concatenate(automata);
}

Example 10

Source File: TestGraphTokenizers.java From lucene-solr with Apache License 2.0

4 votes

private Automaton join(Automaton ... as) {
  return Operations.concatenate(Arrays.asList(as));
}