org.apache.lucene.util.automaton.Automata Java Examples

The following examples show how to use org.apache.lucene.util.automaton.Automata. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestQPHelper.java    From lucene-solr with Apache License 2.0 7 votes vote down vote up
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);

  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  StandardQueryParser qp2 = new StandardQueryParser();
  qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));

  q = qp2.parse("the^3", "field");
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertMatchNoDocsQuery(q);
  assertFalse(q instanceof BoostQuery);
}
 
Example #2
Source File: ContextMapping.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
/**
 * Create a automaton for a given context query this automaton will be used
 * to find the matching paths with the fst
 *
 * @param preserveSep set an additional char (<code>XAnalyzingSuggester.SEP_LABEL</code>) between each context query
 * @param queries list of {@link ContextQuery} defining the lookup context
 *
 * @return Automaton matching the given Query
 */
public static Automaton toAutomaton(boolean preserveSep, Iterable<ContextQuery> queries) {
    Automaton a = Automata.makeEmptyString();

    Automaton gap = Automata.makeChar(ContextMapping.SEPARATOR);
    if (preserveSep) {
        // if separators are preserved the fst contains a SEP_LABEL
        // behind each gap. To have a matching automaton, we need to
        // include the SEP_LABEL in the query as well
        gap = Operations.concatenate(gap, Automata.makeChar(XAnalyzingSuggester.SEP_LABEL));
    }

    for (ContextQuery query : queries) {
        a = Operations.concatenate(Arrays.asList(query.toAutomaton(), gap, a));
    }

    // TODO: should we limit this?  Do any of our ContextQuery impls really create exponential regexps?  GeoQuery looks safe (union
    // of strings).
    return Operations.determinize(a, Integer.MAX_VALUE);
}
 
Example #3
Source File: TestAutomatonQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test some very simple automata.
 */
public void testAutomata() throws IOException {
  assertAutomatonHits(0, Automata.makeEmpty());
  assertAutomatonHits(0, Automata.makeEmptyString());
  assertAutomatonHits(2, Automata.makeAnyChar());
  assertAutomatonHits(3, Automata.makeAnyString());
  assertAutomatonHits(2, Automata.makeString("doc"));
  assertAutomatonHits(1, Automata.makeChar('a'));
  assertAutomatonHits(2, Automata.makeCharRange('a', 'b'));
  assertAutomatonHits(2, Automata.makeDecimalInterval(1233, 2346, 0));
  assertAutomatonHits(1, Automata.makeDecimalInterval(0, 2000, 0));
  assertAutomatonHits(2, Operations.union(Automata.makeChar('a'),
      Automata.makeChar('b')));
  assertAutomatonHits(0, Operations.intersection(Automata
      .makeChar('a'), Automata.makeChar('b')));
  assertAutomatonHits(1, Operations.minus(Automata.makeCharRange('a', 'b'), 
      Automata.makeChar('a'), DEFAULT_MAX_DETERMINIZED_STATES));
}
 
Example #4
Source File: TestRegexpQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = Operations.union(Arrays
        .asList(Automata.makeString("quick"),
        Automata.makeString("brown"),
        Automata.makeString("bob")));
    
    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
    myProvider, DEFAULT_MAX_DETERMINIZED_STATES);
  assertEquals(1, searcher.search(query, 5).totalHits.value);
}
 
Example #5
Source File: QueryParserTestBase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testBoost()
  throws Exception {
  CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
  CommonQueryParserConfiguration qp = getParserConfig(oneStopAnalyzer);
  Query q = getQuery("on^1.0",qp);
  assertNotNull(q);
  q = getQuery("\"hello\"^2.0",qp);
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("hello^2.0",qp);
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = getQuery("\"on\"^1.0",qp);
  assertNotNull(q);

  Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); 
  CommonQueryParserConfiguration qp2 = getParserConfig(a2);
  q = getQuery("the^3", qp2);
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertMatchNoDocsQuery(q);
  assertFalse(q instanceof BoostQuery);
}
 
Example #6
Source File: TestPrecedenceQueryParser.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);

  PrecedenceQueryParser qp = new PrecedenceQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);
  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  q = getParser(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)).parse("the^3",
      "field");
  assertNotNull(q);
}
 
Example #7
Source File: ContextQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static Automaton toContextAutomaton(final Map<IntsRef, ContextMetaData> contexts, final boolean matchAllContexts) {
  final Automaton matchAllAutomaton = Operations.repeat(Automata.makeAnyString());
  final Automaton sep = Automata.makeChar(ContextSuggestField.CONTEXT_SEPARATOR);
  if (matchAllContexts || contexts.size() == 0) {
    return Operations.concatenate(matchAllAutomaton, sep);
  } else {
    Automaton contextsAutomaton = null;
    for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
      final ContextMetaData contextMetaData = entry.getValue();
      final IntsRef ref = entry.getKey();
      Automaton contextAutomaton = Automata.makeString(ref.ints, ref.offset, ref.length);
      if (contextMetaData.exact == false) {
        contextAutomaton = Operations.concatenate(contextAutomaton, matchAllAutomaton);
      }
      contextAutomaton = Operations.concatenate(contextAutomaton, sep);
      if (contextsAutomaton == null) {
        contextsAutomaton = contextAutomaton;
      } else {
        contextsAutomaton = Operations.union(contextsAutomaton, contextAutomaton);
      }
    }
    return contextsAutomaton;
  }
}
 
Example #8
Source File: IncludeExclude.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private Automaton toAutomaton() {
    Automaton a = null;
    if (include != null) {
        a = include.toAutomaton();
    } else if (includeValues != null) {
        a = Automata.makeStringUnion(includeValues);
    } else {
        a = Automata.makeAnyString();
    }
    if (exclude != null) {
        a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    } else if (excludeValues != null) {
        a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    }
    return a;
}
 
Example #9
Source File: ContextQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  final CompletionWeight innerWeight = ((CompletionWeight) innerQuery.createWeight(searcher, scoreMode, boost));
  final Automaton innerAutomaton = innerWeight.getAutomaton();

  // If the inner automaton matches nothing, then we return an empty weight to avoid
  // traversing all contexts during scoring.
  if (innerAutomaton.getNumStates() == 0) {
    return new CompletionWeight(this, innerAutomaton);
  }

  // if separators are preserved the fst contains a SEP_LABEL
  // behind each gap. To have a matching automaton, we need to
  // include the SEP_LABEL in the query as well
  Automaton optionalSepLabel = Operations.optional(Automata.makeChar(ConcatenateGraphFilter.SEP_LABEL));
  Automaton prefixAutomaton = Operations.concatenate(optionalSepLabel, innerAutomaton);
  Automaton contextsAutomaton = Operations.concatenate(toContextAutomaton(contexts, matchAllContexts), prefixAutomaton);
  contextsAutomaton = Operations.determinize(contextsAutomaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES);

  final Map<IntsRef, Float> contextMap = new HashMap<>(contexts.size());
  final TreeSet<Integer> contextLengths = new TreeSet<>();
  for (Map.Entry<IntsRef, ContextMetaData> entry : contexts.entrySet()) {
    ContextMetaData contextMetaData = entry.getValue();
    contextMap.put(entry.getKey(), contextMetaData.boost);
    contextLengths.add(entry.getKey().length);
  }
  int[] contextLengthArray = new int[contextLengths.size()];
  final Iterator<Integer> iterator = contextLengths.descendingIterator();
  for (int i = 0; iterator.hasNext(); i++) {
    contextLengthArray[i] = iterator.next();
  }
  return new ContextCompletionWeight(this, contextsAutomaton, innerWeight, contextMap, contextLengthArray);
}
 
Example #10
Source File: XContentMapValues.java    From crate with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a function that filters a document map based on the given include and exclude rules.
 * @see #filter(Map, String[], String[]) for details
 */
public static Function<Map<String, ?>, Map<String, Object>> filter(String[] includes, String[] excludes) {
    CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString());

    CharacterRunAutomaton include;
    if (includes == null || includes.length == 0) {
        include = matchAllAutomaton;
    } else {
        Automaton includeA = Regex.simpleMatchToAutomaton(includes);
        includeA = makeMatchDotsInFieldNames(includeA);
        include = new CharacterRunAutomaton(includeA);
    }

    Automaton excludeA;
    if (excludes == null || excludes.length == 0) {
        excludeA = Automata.makeEmpty();
    } else {
        excludeA = Regex.simpleMatchToAutomaton(excludes);
        excludeA = makeMatchDotsInFieldNames(excludeA);
    }
    CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA);

    // NOTE: We cannot use Operations.minus because of the special case that
    // we want all sub properties to match as soon as an object matches

    return (map) -> filter(map,
        include, 0,
        exclude, 0,
        matchAllAutomaton);
}
 
Example #11
Source File: Regex.java    From crate with Apache License 2.0 5 votes vote down vote up
/** Return an {@link Automaton} that matches the given pattern. */
public static Automaton simpleMatchToAutomaton(String pattern) {
    List<Automaton> automata = new ArrayList<>();
    int previous = 0;
    for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) {
        automata.add(Automata.makeString(pattern.substring(previous, i)));
        automata.add(Automata.makeAnyString());
        previous = i + 1;
    }
    automata.add(Automata.makeString(pattern.substring(previous)));
    return Operations.concatenate(automata);
}
 
Example #12
Source File: XJoinQParserPlugin.java    From BioSolr with Apache License 2.0 5 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
Filter makeFilter(String fname, Iterator<BytesRef> it) {
  Automaton union = Automata.makeStringUnion(IteratorUtils.toList(it));
  return new MultiTermQueryWrapperFilter<AutomatonQuery>(new AutomatonQuery(new Term(fname), union)) {
  };
}
 
Example #13
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testStopwordsPosIncHole2() throws Exception {
  // use two stopfilters for testing here
  Directory dir = newDirectory();
  final Automaton secondSet = Automata.makeString("foobar");
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new MockTokenizer();
      TokenStream stream = new MockTokenFilter(tokenizer, MockTokenFilter.ENGLISH_STOPSET);
      stream = new MockTokenFilter(stream, new CharacterRunAutomaton(secondSet));
      return new TokenStreamComponents(tokenizer, stream);
    }
  };
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, a);
  Document doc = new Document();
  doc.add(new TextField("body", "just a foobar", Field.Store.NO));
  doc.add(new TextField("body", "test of gaps", Field.Store.NO));
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.add(new Term("body", "just"), 0);
  builder.add(new Term("body", "test"), 3);
  PhraseQuery pq = builder.build();
  // body:"just ? ? test"
  assertEquals(1, is.search(pq, 5).totalHits.value);
  ir.close();
  dir.close();
}
 
Example #14
Source File: TestAutomatonQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBiggishAutomaton() {
  int numTerms = TEST_NIGHTLY ? 3000 : 500;
  List<BytesRef> terms = new ArrayList<>();
  while (terms.size() < numTerms) {
    terms.add(new BytesRef(TestUtil.randomUnicodeString(random())));
  }
  Collections.sort(terms);
  new AutomatonQuery(new Term("foo", "bar"), Automata.makeStringUnion(terms), Integer.MAX_VALUE);
}
 
Example #15
Source File: TestAutomatonQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Test handling of the empty language
 */
public void testEmptyOptimization() throws IOException {
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeEmpty());
  // not yet available: assertTrue(aq.getEnum(searcher.getIndexReader())
  // instanceof EmptyTermEnum);
  Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), FN);
  assertSame(TermsEnum.EMPTY, aq.getTermsEnum(terms));
  assertEquals(0, automatonQueryNrHits(aq));
}
 
Example #16
Source File: TestAutomatonQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Test that rewriting to a prefix query works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewritePrefix() throws IOException {
  Automaton pfx = Automata.makeString("do");
  Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString());
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
  assertEquals(3, automatonQueryNrHits(aq));
}
 
Example #17
Source File: TestAutomatonQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Test that rewriting to a single term works as expected, preserves
 * MultiTermQuery semantics.
 */
public void testRewriteSingleTerm() throws IOException {
  AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), Automata.makeString("piece"));
  Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), FN);
  assertTrue(aq.getTermsEnum(terms) instanceof SingleTermsEnum);
  assertEquals(1, automatonQueryNrHits(aq));
}
 
Example #18
Source File: TestAutomatonQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEquals() {
  AutomatonQuery a1 = new AutomatonQuery(newTerm("foobar"), Automata
      .makeString("foobar"));
  // reference to a1
  AutomatonQuery a2 = a1;
  // same as a1 (accepts the same language, same term)
  AutomatonQuery a3 = new AutomatonQuery(newTerm("foobar"),
                          Operations.concatenate(
                               Automata.makeString("foo"),
                               Automata.makeString("bar")));
  // different than a1 (same term, but different language)
  AutomatonQuery a4 = new AutomatonQuery(newTerm("foobar"),
                                         Automata.makeString("different"));
  // different than a1 (different term, same language)
  AutomatonQuery a5 = new AutomatonQuery(newTerm("blah"),
                                         Automata.makeString("foobar"));
  
  assertEquals(a1.hashCode(), a2.hashCode());
  assertEquals(a1, a2);
  
  assertEquals(a1.hashCode(), a3.hashCode());
  assertEquals(a1, a3);

  // different class
  AutomatonQuery w1 = new WildcardQuery(newTerm("foobar"));
  // different class
  AutomatonQuery w2 = new RegexpQuery(newTerm("foobar"));
  
  assertFalse(a1.equals(w1));
  assertFalse(a1.equals(w2));
  assertFalse(w1.equals(w2));
  assertFalse(a1.equals(a4));
  assertFalse(a1.equals(a5));
  assertFalse(a1.equals(null));
}
 
Example #19
Source File: TestAutomatonQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Test that a nondeterministic automaton works correctly. (It should will be
 * determinized)
 */
public void testNFA() throws IOException {
  // accept this or three, the union is an NFA (two transitions for 't' from
  // initial state)
  Automaton nfa = Operations.union(Automata.makeString("this"),
      Automata.makeString("three"));
  assertAutomatonHits(2, nfa);
}
 
Example #20
Source File: CategoryContextMapping.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public Automaton toAutomaton() {
    List<Automaton> automatons = new ArrayList<>();
    for (CharSequence value : values) {
        automatons.add(Automata.makeString(value.toString()));
    }
    return Operations.union(automatons);
}
 
Example #21
Source File: TermInSetQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private ByteRunAutomaton asByteRunAutomaton() {
  TermIterator iterator = termData.iterator();
  List<Automaton> automata = new ArrayList<>();
  for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
    automata.add(Automata.makeBinary(term));
  }
  return new CompiledAutomaton(Operations.union(automata)).runAutomaton;

}
 
Example #22
Source File: TermRangeQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static Automaton toAutomaton(BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {

    if (lowerTerm == null) {
      // makeBinaryInterval is more picky than we are:
      includeLower = true;
    }

    if (upperTerm == null) {
      // makeBinaryInterval is more picky than we are:
      includeUpper = true;
    }

    return Automata.makeBinaryInterval(lowerTerm, includeLower, upperTerm, includeUpper);
  }
 
Example #23
Source File: GeolocationContextMapping.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public Automaton toAutomaton() {
    Automaton automaton;
    if(precisions == null || precisions.length == 0) {
         automaton = Automata.makeString(location);
    } else {
        automaton = Automata.makeString(location.substring(0, Math.max(1, Math.min(location.length(), precisions[0]))));
        for (int i = 1; i < precisions.length; i++) {
            final String cell = location.substring(0, Math.max(1, Math.min(location.length(), precisions[i])));
            automaton = Operations.union(automaton, Automata.makeString(cell));
        }
    }
    return automaton;
}
 
Example #24
Source File: TokenStreamOffsetStrategy.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static CharArrayMatcher[] convertTermsToMatchers(BytesRef[] terms, CharArrayMatcher[] matchers) {
  CharArrayMatcher[] newAutomata = new CharArrayMatcher[terms.length + matchers.length];
  for (int i = 0; i < terms.length; i++) {
    String termString = terms[i].utf8ToString();
    CharacterRunAutomaton a = new CharacterRunAutomaton(Automata.makeString(termString));
    newAutomata[i] = LabelledCharArrayMatcher.wrap(termString, a::run);
  }
  // Append existing automata (that which is used for MTQs)
  System.arraycopy(matchers, 0, newAutomata, terms.length, matchers.length);
  return newAutomata;
}
 
Example #25
Source File: TestMockAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Test a configuration that behaves a lot like KeepWordFilter */
public void testKeep() throws Exception {
  CharacterRunAutomaton keepWords = 
    new CharacterRunAutomaton(
        Operations.complement(
            Operations.union(
                Arrays.asList(Automata.makeString("foo"), Automata.makeString("bar"))),
            DEFAULT_MAX_DETERMINIZED_STATES));
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, keepWords);
  assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
      new String[] { "foo", "bar", "bar", "foo" },
      new int[] { 2, 2, 1, 2 });
}
 
Example #26
Source File: SearchEquivalenceTestBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  Random random = random();
  directory = newDirectory();
  stopword = "" + randomChar();
  CharacterRunAutomaton stopset = new CharacterRunAutomaton(Automata.makeString(stopword));
  analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
  RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
  Document doc = new Document();
  Field id = new StringField("id", "", Field.Store.NO);
  Field field = new TextField("field", "", Field.Store.NO);
  doc.add(id);
  doc.add(field);
  
  // index some docs
  int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
  for (int i = 0; i < numDocs; i++) {
    id.setStringValue(Integer.toString(i));
    field.setStringValue(randomFieldContents());
    iw.addDocument(doc);
  }
  
  // delete some docs
  int numDeletes = numDocs/20;
  for (int i = 0; i < numDeletes; i++) {
    Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs)));
    if (random.nextBoolean()) {
      iw.deleteDocuments(toDelete);
    } else {
      iw.deleteDocuments(new TermQuery(toDelete));
    }
  }
  
  reader = iw.getReader();
  s1 = newSearcher(reader);
  s2 = newSearcher(reader);
  iw.close();
}
 
Example #27
Source File: RegexCompletionQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  // If an empty regex is provided, we return an automaton that matches nothing. This ensures
  // consistency with PrefixCompletionQuery, which returns no results for an empty term.
  Automaton automaton = getTerm().text().isEmpty()
      ? Automata.makeEmpty()
      : new RegExp(getTerm().text(), flags).toAutomaton(maxDeterminizedStates);
  return new CompletionWeight(this, automaton);
}
 
Example #28
Source File: CharArrayMatcher.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
static CharArrayMatcher fromTerms(List<BytesRef> terms) {
  CharacterRunAutomaton a = new CharacterRunAutomaton(Automata.makeStringUnion(terms));
  return a::run;
}
 
Example #29
Source File: TestTermsEnum.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testInvalidAutomatonTermsEnum() throws Exception {
  expectThrows(IllegalArgumentException.class,
               () -> {
                 new AutomatonTermsEnum(TermsEnum.EMPTY, new CompiledAutomaton(Automata.makeString("foo")));
               });
}
 
Example #30
Source File: TestGraphTokenizers.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private Automaton s2a(String s) {
  return Automata.makeString(s);
}