Java Code Examples for org.apache.lucene.util.automaton.RegExp

The following examples show how to use org.apache.lucene.util.automaton.RegExp. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: HighlighterTest.java    License: Apache License 2.0 6 votes vote down vote up
public void testMaxSizeEndHighlight() throws Exception {
  TestHighlightRunner helper = new TestHighlightRunner() {
    @Override
    public void run() throws Exception {
      CharacterRunAutomaton stopWords = new CharacterRunAutomaton(new RegExp("i[nt]").toAutomaton());
      TermQuery query = new TermQuery(new Term("text", "searchterm"));

      String text = "this is a text with searchterm in it";
      SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
      Highlighter hg = getHighlighter(query, "text", fm);
      hg.setTextFragmenter(new NullFragmenter());
      hg.setMaxDocCharsToAnalyze(36);
      String match = hg.getBestFragment(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords), "text", text);
      assertTrue(
          "Matched text should contain remainder of text after highlighted query ",
          match.endsWith("in it"));
    }
  };
  helper.start();
}
 
Example 2
Source Project: lucene-solr   Source File: TestIntervals.java    License: Apache License 2.0 6 votes vote down vote up
public void testMultiTerm() throws IOException {
  RegExp re = new RegExp("p.*e");
  IntervalsSource source = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), re.toString());

  checkIntervals(source, "field1", 5, new int[][]{
      {},
      { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
      { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
      { 7, 7 },
      { 0, 0, 1, 1, 3, 3, 4, 4, 6, 6, 7, 7 },
      { 0, 0 }
  });

  IllegalStateException e = expectThrows(IllegalStateException.class, () -> {
    IntervalsSource s = Intervals.multiterm(new CompiledAutomaton(re.toAutomaton()), 1, re.toString());
    for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) {
      s.intervals("field1", ctx);
    }
  });
  assertEquals("Automaton [\\p(.)*\\e] expanded to too many terms (limit 1)", e.getMessage());

  checkVisits(source, 1);
}
 
Example 3
Source Project: lucene-solr   Source File: TestMockAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
/** Test a configuration where two characters makes a term */
public void testTwoChars() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("..").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "fo", "ob", "ar"},
      new int[] { 0, 2, 4 },
      new int[] { 2, 4, 6 }
  );
  // make sure when last term is a "partial" match that end() is correct
  assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
      new String[] { "fo", "ob" },
      new int[] { 0, 2 },
      new int[] { 2, 4 },
      new int[] { 1, 1 },
      5
  );
  checkRandomData(random(), a, 100);
}
 
Example 4
Source Project: lucene-solr   Source File: TestMockAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
/** Test a configuration where three characters makes a term */
public void testThreeChars() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("...").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "foo", "bar"},
      new int[] { 0, 3 },
      new int[] { 3, 6 }
  );
  // make sure when last term is a "partial" match that end() is correct
  assertTokenStreamContents(a.tokenStream("bogus", "fooba"),
      new String[] { "foo" },
      new int[] { 0 },
      new int[] { 3 },
      new int[] { 1 },
      5
  );
  checkRandomData(random(), a, 100);
}
 
Example 5
Source Project: lucene-solr   Source File: TestMockAnalyzer.java    License: Apache License 2.0 6 votes vote down vote up
/** Test a configuration where word starts with one uppercase */
public void testUppercase() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp("[A-Z][a-z]*").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "FooBarBAZ",
      new String[] { "Foo", "Bar", "B", "A", "Z"},
      new int[] { 0, 3, 6, 7, 8 },
      new int[] { 3, 6, 7, 8, 9 }
  );
  assertAnalyzesTo(a, "aFooBar",
      new String[] { "Foo", "Bar" },
      new int[] { 1, 4 },
      new int[] { 4, 7 }
  );
  checkRandomData(random(), a, 100);
}
 
Example 6
Source Project: lucene-solr   Source File: QueryParserTestBase.java    License: Apache License 2.0 6 votes vote down vote up
public void testStopwords() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
  CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
  Query result = getQuery("field:the OR field:foo",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery || result instanceof MatchNoDocsQuery);
  if (result instanceof BooleanQuery) {
    assertEquals(0, ((BooleanQuery) result).clauses().size());
  }
  result = getQuery("field:woo OR field:the",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a TermQuery", result instanceof TermQuery);
  result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)",qp);
  assertNotNull("result is null and it shouldn't be", result);
  assertTrue("result is not a BoostQuery", result instanceof BoostQuery);
  result = ((BoostQuery) result).getQuery();
  assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
  if (VERBOSE) System.out.println("Result: " + result);
  assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2);
}
 
Example 7
Source Project: lucene-solr   Source File: TestRegexpQuery.java    License: Apache License 2.0 6 votes vote down vote up
public void testCustomProvider() throws IOException {
  AutomatonProvider myProvider = new AutomatonProvider() {
    // automaton that matches quick or brown
    private Automaton quickBrownAutomaton = Operations.union(Arrays
        .asList(Automata.makeString("quick"),
        Automata.makeString("brown"),
        Automata.makeString("bob")));
    
    @Override
    public Automaton getAutomaton(String name) {
      if (name.equals("quickBrown")) return quickBrownAutomaton;
      else return null;
    }
  };
  RegexpQuery query = new RegexpQuery(newTerm("<quickBrown>"), RegExp.ALL,
    myProvider, DEFAULT_MAX_DETERMINIZED_STATES);
  assertEquals(1, searcher.search(query, 5).totalHits.value);
}
 
Example 8
Source Project: lucene-solr   Source File: MockTokenFilterFactory.java    License: Apache License 2.0 6 votes vote down vote up
/** Creates a new MockTokenizerFactory */
public MockTokenFilterFactory(Map<String, String> args) {
  super(args);
  String stopset = get(args, "stopset", Arrays.asList("english", "empty"), null, false);
  String stopregex = get(args, "stopregex");
  if (null != stopset) {
    if (null != stopregex) {
      throw new IllegalArgumentException("Parameters stopset and stopregex cannot both be specified.");
    }
    if ("english".equalsIgnoreCase(stopset)) {
      filter = MockTokenFilter.ENGLISH_STOPSET;
    } else { // must be "empty"
      filter = MockTokenFilter.EMPTY_STOPSET;
    }
  } else if (null != stopregex) {
    RegExp regex = new RegExp(stopregex);
    filter = new CharacterRunAutomaton(regex.toAutomaton());
  } else {
    throw new IllegalArgumentException
        ("Configuration Error: either the 'stopset' or the 'stopregex' parameter must be specified.");
  }
  if (!args.isEmpty()) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}
 
Example 9
Source Project: mtas   Source File: MtasToken.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates the automaton map.
 *
 * @param prefix the prefix
 * @param valueList the value list
 * @param filter the filter
 * @return the map
 */
public static Map<String, Automaton> createAutomatonMap(String prefix,
    List<String> valueList, Boolean filter) {
  HashMap<String, Automaton> automatonMap = new HashMap<>();
  if (valueList != null) {
    for (String item : valueList) {
      if (filter) {
        item = item.replaceAll("([\\\"\\)\\(\\<\\>\\.\\@\\#\\]\\[\\{\\}])",
            "\\\\$1");
      }
      automatonMap.put(item,
          new RegExp(prefix + MtasToken.DELIMITER + item + "\u0000*")
              .toAutomaton());
    }
  }
  return automatonMap;
}
 
Example 10
Source Project: Elasticsearch   Source File: MapperQueryParser.java    License: Apache License 2.0 5 votes vote down vote up
private Query getRegexpQuerySingle(String field, String termStr) throws ParseException {
    currentFieldType = null;
    Analyzer oldAnalyzer = getAnalyzer();
    try {
        currentFieldType = parseContext.fieldMapper(field);
        if (currentFieldType != null) {
            if (!forcedAnalyzer) {
                setAnalyzer(parseContext.getSearchAnalyzer(currentFieldType));
            }
            Query query = null;
            if (currentFieldType.useTermQueryWithQueryString()) {
                query = currentFieldType.regexpQuery(termStr, RegExp.ALL, maxDeterminizedStates, multiTermRewriteMethod, parseContext);
            }
            if (query == null) {
                query = super.getRegexpQuery(field, termStr);
            }
            return query;
        }
        return super.getRegexpQuery(field, termStr);
    } catch (RuntimeException e) {
        if (settings.lenient()) {
            return null;
        }
        throw e;
    } finally {
        setAnalyzer(oldAnalyzer);
    }
}
 
Example 11
Source Project: Elasticsearch   Source File: IncludeExclude.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param include   The regular expression pattern for the terms to be included
 * @param exclude   The regular expression pattern for the terms to be excluded
 */
public IncludeExclude(RegExp include, RegExp exclude) {
    if (include == null && exclude == null) {
        throw new IllegalArgumentException();
    }
    this.include = include;
    this.exclude = exclude;
    this.includeValues = null;
    this.excludeValues = null;
}
 
Example 12
Source Project: Elasticsearch   Source File: IncludeExclude.java    License: Apache License 2.0 5 votes vote down vote up
public IncludeExclude includeExclude() {
    RegExp includePattern =  include != null ? new RegExp(include) : null;
    RegExp excludePattern = exclude != null ? new RegExp(exclude) : null;
    if (includePattern != null || excludePattern != null) {
        if (includeValues != null || excludeValues != null) {
            throw new IllegalArgumentException("Can only use regular expression include/exclude or a set of values, not both");
        }
        return new IncludeExclude(includePattern, excludePattern);
    } else if (includeValues != null || excludeValues != null) {
        return new IncludeExclude(includeValues, excludeValues);
    } else {
        return null;
    }
}
 
Example 13
Source Project: lucene-solr   Source File: RegexCompletionQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  // If an empty regex is provided, we return an automaton that matches nothing. This ensures
  // consistency with PrefixCompletionQuery, which returns no results for an empty term.
  Automaton automaton = getTerm().text().isEmpty()
      ? Automata.makeEmpty()
      : new RegExp(getTerm().text(), flags).toAutomaton(maxDeterminizedStates);
  return new CompletionWeight(this, automaton);
}
 
Example 14
/** Creates a new SimpleSplitPatternTokenizerFactory */
public SimplePatternSplitTokenizerFactory(Map<String,String> args) {
  super(args);
  maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
  dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
  if (args.isEmpty() == false) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}
 
Example 15
Source Project: lucene-solr   Source File: SimplePatternTokenizerFactory.java    License: Apache License 2.0 5 votes vote down vote up
/** Creates a new SimplePatternTokenizerFactory */
public SimplePatternTokenizerFactory(Map<String,String> args) {
  super(args);
  maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
  dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
  if (args.isEmpty() == false) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}
 
Example 16
Source Project: lucene-solr   Source File: MinHashFilterTest.java    License: Apache License 2.0 5 votes vote down vote up
private static Tokenizer createMockShingleTokenizer(int shingleSize, String shingles) {
  MockTokenizer tokenizer = new MockTokenizer(
      new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+([ \t\r\n]+[^ \t\r\n]+){" + (shingleSize - 1) + "}").toAutomaton()),
      true);
  tokenizer.setEnableChecks(true);
  if (shingles != null) {
    tokenizer.setReader(new StringReader(shingles));
  }
  return tokenizer;
}
 
Example 17
Source Project: lucene-solr   Source File: LuceneTestCase.java    License: Apache License 2.0 5 votes vote down vote up
/** 
 * Terms api equivalency 
 */
public void assertTermsEquals(String info, IndexReader leftReader, Terms leftTerms, Terms rightTerms, boolean deep) throws IOException {
  if (leftTerms == null || rightTerms == null) {
    assertNull(info, leftTerms);
    assertNull(info, rightTerms);
    return;
  }
  assertTermsStatisticsEquals(info, leftTerms, rightTerms);
  assertEquals("hasOffsets", leftTerms.hasOffsets(), rightTerms.hasOffsets());
  assertEquals("hasPositions", leftTerms.hasPositions(), rightTerms.hasPositions());
  assertEquals("hasPayloads", leftTerms.hasPayloads(), rightTerms.hasPayloads());

  TermsEnum leftTermsEnum = leftTerms.iterator();
  TermsEnum rightTermsEnum = rightTerms.iterator();
  assertTermsEnumEquals(info, leftReader, leftTermsEnum, rightTermsEnum, true);
  
  assertTermsSeekingEquals(info, leftTerms, rightTerms);
  
  if (deep) {
    int numIntersections = atLeast(3);
    for (int i = 0; i < numIntersections; i++) {
      String re = AutomatonTestUtil.randomRegexp(random());
      CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
      if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
        // TODO: test start term too
        TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
        TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
        assertTermsEnumEquals(info, leftReader, leftIntersection, rightIntersection, rarely());
      }
    }
  }
}
 
Example 18
Source Project: lucene-solr   Source File: TestMockAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
/** Test a configuration where each character is a term */
public void testSingleChar() throws Exception {
  CharacterRunAutomaton single =
      new CharacterRunAutomaton(new RegExp(".").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), single, false);
  assertAnalyzesTo(a, "foobar",
      new String[] { "f", "o", "o", "b", "a", "r" },
      new int[] { 0, 1, 2, 3, 4, 5 },
      new int[] { 1, 2, 3, 4, 5, 6 }
  );
  checkRandomData(random(), a, 100);
}
 
Example 19
Source Project: lucene-solr   Source File: TestMockAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
/** Test a configuration that behaves a lot like LengthFilter */
public void testLength() throws Exception {
  CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toAutomaton());
  Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, true, length5);
  assertAnalyzesTo(a, "ok toolong fine notfine",
      new String[] { "ok", "fine" },
      new int[] { 1, 2 });
}
 
Example 20
Source Project: lucene-solr   Source File: QueryParserBase.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Builds a new RegexpQuery instance
 * @param regexp Regexp term
 * @return new RegexpQuery instance
 */
protected Query newRegexpQuery(Term regexp) {
  RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL,
    maxDeterminizedStates);
  query.setRewriteMethod(multiTermRewriteMethod);
  return query;
}
 
Example 21
Source Project: lucene-solr   Source File: QueryParserTestBase.java    License: Apache License 2.0 5 votes vote down vote up
public void testPhraseQueryPositionIncrements() throws Exception {
  CharacterRunAutomaton stopStopList =
  new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());

  CommonQueryParserConfiguration qp
      = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
  qp.setEnablePositionIncrements(true);

  PhraseQuery.Builder phraseQuery = new PhraseQuery.Builder();
  phraseQuery.add(new Term("field", "1"));
  phraseQuery.add(new Term("field", "2"), 2);
  assertEquals(phraseQuery.build(), getQuery("\"1 stop 2\"",qp));
}
 
Example 22
Source Project: lucene-solr   Source File: TestDocValuesRewriteMethod.java    License: Apache License 2.0 5 votes vote down vote up
/** check that the # of hits is the same as if the query
 * is run against the inverted index
 */
protected void assertSame(String regexp) throws IOException {
  RegexpQuery docValues = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
  docValues.setRewriteMethod(new DocValuesRewriteMethod());
  RegexpQuery inverted = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
 
  TopDocs invertedDocs = searcher1.search(inverted, 25);
  TopDocs docValuesDocs = searcher2.search(docValues, 25);

  CheckHits.checkEqual(inverted, invertedDocs.scoreDocs, docValuesDocs.scoreDocs);
}
 
Example 23
Source Project: lucene-solr   Source File: TestDocValuesRewriteMethod.java    License: Apache License 2.0 5 votes vote down vote up
public void testEquals() throws Exception {
  RegexpQuery a1 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExp.NONE);
  RegexpQuery a2 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExp.NONE);
  RegexpQuery b = new RegexpQuery(new Term(fieldName, "[bB]"), RegExp.NONE);
  assertEquals(a1, a2);
  assertFalse(a1.equals(b));
  
  a1.setRewriteMethod(new DocValuesRewriteMethod());
  a2.setRewriteMethod(new DocValuesRewriteMethod());
  b.setRewriteMethod(new DocValuesRewriteMethod());
  assertEquals(a1, a2);
  assertFalse(a1.equals(b));
  QueryUtils.check(a1);
}
 
Example 24
Source Project: lucene-solr   Source File: TestSpanFirstQuery.java    License: Apache License 2.0 5 votes vote down vote up
public void testStartPositions() throws Exception {
  Directory dir = newDirectory();
  
  // mimic StopAnalyzer
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|a|of").toAutomaton());
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
  
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, analyzer);
  Document doc = new Document();
  doc.add(newTextField("field", "the quick brown fox", Field.Store.NO));
  writer.addDocument(doc);
  Document doc2 = new Document();
  doc2.add(newTextField("field", "quick brown fox", Field.Store.NO));
  writer.addDocument(doc2);
  
  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  
  // user queries on "starts-with quick"
  SpanQuery sfq = spanFirstQuery(spanTermQuery("field", "quick"), 1);
  assertEquals(1, searcher.search(sfq, 10).totalHits.value);
  
  // user queries on "starts-with the quick"
  SpanQuery include = spanFirstQuery(spanTermQuery("field", "quick"), 2);
  sfq = spanNotQuery(include, sfq);
  assertEquals(1, searcher.search(sfq, 10).totalHits.value);
  
  writer.close();
  reader.close();
  dir.close();
}
 
Example 25
Source Project: lucene-solr   Source File: TestRegexpRandom2.java    License: Apache License 2.0 5 votes vote down vote up
/** check that the # of hits is the same as from a very
 * simple regexpquery implementation.
 */
protected void assertSame(String regexp) throws IOException {   
  RegexpQuery smart = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
  DumbRegexpQuery dumb = new DumbRegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
 
  TopDocs smartDocs = searcher1.search(smart, 25);
  TopDocs dumbDocs = searcher2.search(dumb, 25);

  CheckHits.checkEqual(smart, smartDocs.scoreDocs, dumbDocs.scoreDocs);
}
 
Example 26
Source Project: lucene-solr   Source File: TestFieldCacheRewriteMethod.java    License: Apache License 2.0 5 votes vote down vote up
/** Test fieldcache rewrite against filter rewrite */
@Override
protected void assertSame(String regexp) throws IOException {   
  RegexpQuery fieldCache = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
  fieldCache.setRewriteMethod(new DocValuesRewriteMethod());
  
  RegexpQuery filter = new RegexpQuery(new Term(fieldName, regexp), RegExp.NONE);
  filter.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_REWRITE);
  
  TopDocs fieldCacheDocs = searcher1.search(fieldCache, 25);
  TopDocs filterDocs = searcher2.search(filter, 25);

  CheckHits.checkEqual(fieldCache, fieldCacheDocs.scoreDocs, filterDocs.scoreDocs);
}
 
Example 27
Source Project: lucene-solr   Source File: TestFieldCacheRewriteMethod.java    License: Apache License 2.0 5 votes vote down vote up
public void testEquals() throws Exception {
  RegexpQuery a1 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExp.NONE);
  RegexpQuery a2 = new RegexpQuery(new Term(fieldName, "[aA]"), RegExp.NONE);
  RegexpQuery b = new RegexpQuery(new Term(fieldName, "[bB]"), RegExp.NONE);
  assertEquals(a1, a2);
  assertFalse(a1.equals(b));
  
  a1.setRewriteMethod(new DocValuesRewriteMethod());
  a2.setRewriteMethod(new DocValuesRewriteMethod());
  b.setRewriteMethod(new DocValuesRewriteMethod());
  assertEquals(a1, a2);
  assertFalse(a1.equals(b));
  QueryUtils.check(a1);
}
 
Example 28
Source Project: lucene-solr   Source File: TestTermsEnum.java    License: Apache License 2.0 5 votes vote down vote up
public void testIntersectRegexp() throws Exception {
  Directory d = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), d);
  Document doc = new Document();
  doc.add(newStringField("field", "foobar", Field.Store.NO));
  w.addDocument(doc);
  IndexReader r = w.getReader();
  Terms terms = MultiTerms.getTerms(r, "field");
  CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
  String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
  assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
  r.close();
  w.close();
  d.close();
}
 
Example 29
Source Project: lucene-solr   Source File: TestQueryBuilder.java    License: Apache License 2.0 5 votes vote down vote up
public void testPhraseQueryPositionIncrements() throws Exception {
  PhraseQuery.Builder pqBuilder = new PhraseQuery.Builder();
  pqBuilder.add(new Term("field", "1"), 0);
  pqBuilder.add(new Term("field", "2"), 2);
  PhraseQuery expected = pqBuilder.build();
  CharacterRunAutomaton stopList = new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());

  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopList);

  QueryBuilder builder = new QueryBuilder(analyzer);
  assertEquals(expected, builder.createPhraseQuery("field", "1 stop 2"));
}
 
Example 30
Source Project: lucene-solr   Source File: TestBlockPostingsFormat3.java    License: Apache License 2.0 5 votes vote down vote up
public void assertTerms(Terms leftTerms, Terms rightTerms, boolean deep) throws Exception {
  if (leftTerms == null || rightTerms == null) {
    assertNull(leftTerms);
    assertNull(rightTerms);
    return;
  }
  assertTermsStatistics(leftTerms, rightTerms);
  
  // NOTE: we don't assert hasOffsets/hasPositions/hasPayloads because they are allowed to be different

  boolean bothHaveFreqs = leftTerms.hasFreqs() && rightTerms.hasFreqs();
  boolean bothHavePositions = leftTerms.hasPositions() && rightTerms.hasPositions();
  TermsEnum leftTermsEnum = leftTerms.iterator();
  TermsEnum rightTermsEnum = rightTerms.iterator();
  assertTermsEnum(leftTermsEnum, rightTermsEnum, true, bothHaveFreqs, bothHavePositions);
  
  assertTermsSeeking(leftTerms, rightTerms);
  
  if (deep) {
    int numIntersections = atLeast(3);
    for (int i = 0; i < numIntersections; i++) {
      String re = AutomatonTestUtil.randomRegexp(random());
      CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(re, RegExp.NONE).toAutomaton());
      if (automaton.type == CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
        // TODO: test start term too
        TermsEnum leftIntersection = leftTerms.intersect(automaton, null);
        TermsEnum rightIntersection = rightTerms.intersect(automaton, null);
        assertTermsEnum(leftIntersection, rightIntersection, rarely(), bothHaveFreqs, bothHavePositions);
      }
    }
  }
}