org.apache.lucene.search.spans.SpanMultiTermQueryWrapper Java Examples

The following examples show how to use org.apache.lucene.search.spans.SpanMultiTermQueryWrapper. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestUnifiedHighlighterStrictPhrases.java    From lucene-solr with Apache License 2.0 7 votes vote down vote up
public void testFilteredOutSpan() throws IOException {
  indexWriter.addDocument(newDoc("freezing cold stuff like stuff freedom of speech"));
  initReaderSearcherHighlighter();

  WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "free*"));
  SpanMultiTermQueryWrapper<WildcardQuery> wildcardSpanQuery = new SpanMultiTermQueryWrapper<>(wildcardQuery);
  SpanTermQuery termQuery = new SpanTermQuery(new Term("body", "speech"));
  SpanQuery spanQuery = new SpanNearQuery(new SpanQuery[]{wildcardSpanQuery, termQuery}, 3, false);

  BooleanQuery query = new BooleanQuery.Builder()
      .add(spanQuery, BooleanClause.Occur.MUST)
      .build();

  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  String[] snippets = highlighter.highlight("body", query, topDocs);
  // spans' MatchesIterator exposes each underlying term; thus doesn't enclose intermediate "of"
  assertArrayEquals(new String[]{"freezing cold stuff like stuff <b>freedom</b> of <b>speech</b>"}, snippets);
}
 
Example #2
Source File: TestUnifiedHighlighterStrictPhrases.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test it does *not* highlight the same term's not next to the span-near.  "charlie" in this case.
 * This particular example exercises "Rewrite" plus "MTQ" in the same query.
 */
public void testRewriteAndMtq() throws IOException {
  indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
  initReaderSearcherHighlighter();

  SpanNearQuery snq = new SpanNearQuery(
      new SpanQuery[]{
          new SpanTermQuery(new Term("body", "bravo")),
          new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term("body", "ch")))}, // REWRITES
      0, true);

  BooleanQuery query = new BooleanQuery.Builder()
      .add(snq, BooleanClause.Occur.MUST)
      .add(new PrefixQuery(new Term("body", "al")), BooleanClause.Occur.MUST) // MTQ
      .add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
      // add queries for other fields; we shouldn't highlight these because of that.
      .add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD)
      .build();

  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  String[] snippets = highlighter.highlight("body", query, topDocs);

  if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
  } else {
    assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo <b>alpha</b>"}, snippets);
  }

  // do again, this time with MTQ disabled.  We should only find "alpha bravo".
  highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
  highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled

  topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  snippets = highlighter.highlight("body", query, topDocs);

  assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie - charlie bravo alpha"},
      snippets);
}
 
Example #3
Source File: AbstractEntitySearcher.java    From webdsl with Apache License 2.0 6 votes vote down vote up
private Query createRegexQuery ( QueryDef qd ) {
    BooleanQuery query = new BooleanQuery();
    List<SpanQuery> spanClausesList = new ArrayList<SpanQuery>();
    String[] queryStrings;
    SpanQuery[] spanClausesArray;
    RegexQuery regexQuery;
    for ( String fld : qd.fields ) {
        spanClausesList.clear();
        queryStrings = qd.query.split(" ");
        spanClausesArray = new SpanQuery[queryStrings.length];
        for ( String subquery : queryStrings ) {
            regexQuery = new RegexQuery( new Term( fld, subquery ) );
            regexQuery.setRegexImplementation( new JavaUtilRegexCapabilities() );
            //if emptyable, like a query '(optional)?' or 'bla|a*', make span optional by wrapping it SpanOrQuery
            if(Pattern.matches(subquery, "")){
                spanClausesList.add( new SpanOrQuery( new SpanMultiTermQueryWrapper<RegexQuery>( regexQuery ) ) );
            } else {
                spanClausesList.add( new SpanMultiTermQueryWrapper<RegexQuery>( regexQuery ) );
            }
        }

        spanClausesList.toArray( spanClausesArray );
        query.add( new SpanNearQuery( spanClausesArray, 0, true), Occur.SHOULD );
    }
    return query;
}
 
Example #4
Source File: MtasSpanRegexpQuery.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Instantiates a new mtas span regexp query.
 *
 * @param term the term
 * @param singlePosition the single position
 */
public MtasSpanRegexpQuery(Term term, boolean singlePosition) {
  super(singlePosition ? 1 : null, singlePosition ? 1 : null);
  RegexpQuery req = new RegexpQuery(term);
  query = new SpanMultiTermQueryWrapper<>(req);
  this.term = term;
  this.singlePosition = singlePosition;
  int i = term.text().indexOf(MtasToken.DELIMITER);
  if (i >= 0) {
    prefix = term.text().substring(0, i);
    value = term.text().substring((i + MtasToken.DELIMITER.length()));
    value = (value.length() > 0) ? value : null;
  } else {
    prefix = term.text();
    value = null;
  }
}
 
Example #5
Source File: MtasSpanWildcardQuery.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Instantiates a new mtas span wildcard query.
 *
 * @param term the term
 * @param singlePosition the single position
 */
public MtasSpanWildcardQuery(Term term, boolean singlePosition) {
  super(singlePosition ? 1 : null, singlePosition ? 1 : null);
  WildcardQuery wcq = new WildcardQuery(term);
  query = new SpanMultiTermQueryWrapper<>(wcq);
  this.term = term;
  this.singlePosition = singlePosition;
  int i = term.text().indexOf(MtasToken.DELIMITER);
  if (i >= 0) {
    prefix = term.text().substring(0, i);
    value = term.text().substring((i + MtasToken.DELIMITER.length()));
    value = (value.length() > 0) ? value : null;
  } else {
    prefix = term.text();
    value = null;
  }
}
 
Example #6
Source File: MtasSpanPrefixQuery.java    From mtas with Apache License 2.0 6 votes vote down vote up
/**
 * Instantiates a new mtas span prefix query.
 *
 * @param term the term
 * @param singlePosition the single position
 */
public MtasSpanPrefixQuery(Term term, boolean singlePosition) {
  super(singlePosition ? 1 : null, singlePosition ? 1 : null);
  PrefixQuery pfq = new PrefixQuery(term);
  query = new SpanMultiTermQueryWrapper<>(pfq);
  this.term = term;
  this.singlePosition = singlePosition;
  int i = term.text().indexOf(MtasToken.DELIMITER);
  if (i >= 0) {
    prefix = term.text().substring(0, i);
    value = term.text().substring((i + MtasToken.DELIMITER.length()));
    value = (value.length() > 0) ? value : null;
  } else {
    prefix = term.text();
    value = null;
  }
}
 
Example #7
Source File: TestHighlightingMatcher.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testWildcardProximityRewrites() throws Exception {
  final SpanNearQuery snq = SpanNearQuery.newOrderedNearQuery(FIELD)
      .addClause(new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(FIELD, "term*"))))
      .addClause(new SpanTermQuery(new Term(FIELD, "foo")))
      .build();

  try (Monitor monitor = newMonitor()) {

    monitor.register(new MonitorQuery("1", snq));

    MatchingQueries<HighlightsMatch> matches = monitor.match(buildDoc("term1 foo"), HighlightsMatch.MATCHER);
    HighlightsMatch m = matches.matches("1");
    assertNotNull(m);
    assertEquals(2, m.getHitCount());
  }
}
 
Example #8
Source File: TestPayloadCheckQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testRewrite() throws IOException {
  SpanMultiTermQueryWrapper<WildcardQuery> fiv = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "fiv*")));
  SpanMultiTermQueryWrapper<WildcardQuery> hund = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "hund*")));
  SpanMultiTermQueryWrapper<WildcardQuery> twent = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "twent*")));
  SpanMultiTermQueryWrapper<WildcardQuery> nin = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "nin*")));

  SpanNearQuery sq = new SpanNearQuery(new SpanQuery[] {fiv, hund, twent, nin}, 0, true);

  List<BytesRef> payloads = new ArrayList<>();
  payloads.add(new BytesRef("pos: 0"));
  payloads.add(new BytesRef("pos: 1"));
  payloads.add(new BytesRef("pos: 2"));
  payloads.add(new BytesRef("pos: 3"));

  SpanPayloadCheckQuery query = new SpanPayloadCheckQuery(sq, payloads);

  // if query wasn't rewritten properly, the query would have failed with "Rewrite first!"
  checkHits(query, new int[]{529});
}
 
Example #9
Source File: SpanMultiTermQueryParser.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
    XContentParser parser = parseContext.parser();

    Token token = parser.nextToken();
    if (!MATCH_NAME.equals(parser.currentName()) || token != XContentParser.Token.FIELD_NAME) {
        throw new QueryParsingException(parseContext, "spanMultiTerm must have [" + MATCH_NAME + "] multi term query clause");
    }

    token = parser.nextToken();
    if (token != XContentParser.Token.START_OBJECT) {
        throw new QueryParsingException(parseContext, "spanMultiTerm must have [" + MATCH_NAME + "] multi term query clause");
    }

    Query subQuery = parseContext.parseInnerQuery();
    if (!(subQuery instanceof MultiTermQuery)) {
        throw new QueryParsingException(parseContext, "spanMultiTerm [" + MATCH_NAME + "] must be of type multi term query");
    }

    parser.nextToken();
    return new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery);
}
 
Example #10
Source File: TestMemoryIndexAgainstDirectory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testPassesIfWrapped() throws IOException {
  RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
  SpanQuery wrappedquery = new SpanOrQuery(new SpanMultiTermQueryWrapper<>(regex));

  MemoryIndex mindex = randomMemoryIndex();
  mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there"));

  // This passes though
  assertEquals(0, mindex.search(wrappedquery), 0.00001f);
  TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
 
Example #11
Source File: TestSpanExtractors.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testOrderedNearWithWildcardExtractor() {
  SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
      new SpanMultiTermQueryWrapper<>(new RegexpQuery(new Term("field", "super.*cali.*"))),
      new SpanTermQuery(new Term("field", "is"))
  }, 0, true);

  Set<Term> expected = Collections.singleton(new Term("field", "is"));
  assertEquals(expected, collectTerms(q));
}
 
Example #12
Source File: TestPhraseWildcardQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected SpanNearQuery spanNearQuery(String field, int slop, String... terms) {
  SpanQuery[] spanQueries = new SpanQuery[terms.length];
  for (int i = 0; i < terms.length; i++) {
    String term = terms[i];
    spanQueries[i] = term.contains("*") || term.contains("?") ?
        new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(field, term)))
        : new SpanTermQuery(new Term(field, term));
  }
  return new SpanNearQuery(spanQueries, slop, true);
}
 
Example #13
Source File: TestPayloadScoreQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRewrite() throws IOException {
  SpanMultiTermQueryWrapper<WildcardQuery> xyz = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("field", "xyz*")));
  PayloadScoreQuery psq = new PayloadScoreQuery(xyz, new AveragePayloadFunction(), PayloadDecoder.FLOAT_DECODER, false);

  // if query wasn't rewritten properly, the query would have failed with "Rewrite first!"
  searcher.search(psq, 1);
}
 
Example #14
Source File: TestMemoryIndexAgainstDirectory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testNullPointerException() throws IOException {
  RegexpQuery regex = new RegexpQuery(new Term("field", "worl."));
  SpanQuery wrappedquery = new SpanMultiTermQueryWrapper<>(regex);
      
  MemoryIndex mindex = randomMemoryIndex();
  mindex.addField("field", new MockAnalyzer(random()).tokenStream("field", "hello there"));

  // This throws an NPE
  assertEquals(0, mindex.search(wrappedquery), 0.00001f);
  TestUtil.checkReader(mindex.createSearcher().getIndexReader());
}
 
Example #15
Source File: TestUnifiedHighlighterMTQ.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testCustomSpanQueryHighlighting() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  Document doc = new Document();
  doc.add(new Field("body", "alpha bravo charlie delta echo foxtrot golf hotel india juliet", fieldType));
  doc.add(newTextField("id", "id", Field.Store.YES));

  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);

  int docId = searcher.search(new TermQuery(new Term("id", "id")), 1).scoreDocs[0].doc;

  WildcardQuery wildcardQuery = new WildcardQuery(new Term("body", "foxtr*"));
  SpanMultiTermQueryWrapper<WildcardQuery> wildcardQueryWrapper = new SpanMultiTermQueryWrapper<>(wildcardQuery);

  SpanQuery wrappedQuery = new MyWrapperSpanQuery(wildcardQueryWrapper);

  BooleanQuery query = new BooleanQuery.Builder()
      .add(wrappedQuery, BooleanClause.Occur.SHOULD)
      .build();

  int[] docIds = new int[]{docId};

  String snippets[] = highlighter.highlightFields(new String[]{"body"}, query, docIds, new int[]{2}).get("body");
  assertEquals(1, snippets.length);
  assertEquals("alpha bravo charlie delta echo <b>foxtrot</b> golf hotel india juliet", snippets[0]);
  ir.close();
}
 
Example #16
Source File: TestUnifiedHighlighterMTQ.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSpanPositionCheck() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
  Query query = new SpanFirstQuery(childQuery, 1000000);
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
}
 
Example #17
Source File: TestUnifiedHighlighterMTQ.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSpanNot() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  SpanQuery include = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
  SpanQuery exclude = new SpanTermQuery(new Term("body", "bogus"));
  Query query = new SpanNotQuery(include, exclude);
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
}
 
Example #18
Source File: TestUnifiedHighlighterMTQ.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSpanNear() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
  Query query = new SpanNearQuery(new SpanQuery[]{childQuery, childQuery}, 0, false);
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
}
 
Example #19
Source File: TestUnifiedHighlighterMTQ.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSpanOr() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  SpanQuery childQuery = new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*")));
  Query query = new SpanOrQuery(new SpanQuery[]{childQuery});
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
}
 
Example #20
Source File: TestUnifiedHighlighterMTQ.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSpanWildcard() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test.");
  iw.addDocument(doc);
  body.setStringValue("Test a one sentence document.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  // wrap in a SpanBoostQuery to also show we see inside it
  Query query = new SpanBoostQuery(
      new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term("body", "te*"))), 2.0f);
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("This is a <b>test</b>.", snippets[0]);
  assertEquals("<b>Test</b> a one sentence document.", snippets[1]);

  ir.close();
}
 
Example #21
Source File: TestUnifiedHighlighterStrictPhrases.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
   * Like {@link #testRewriteAndMtq} but no freestanding MTQ
   */
  public void testRewrite() throws IOException {
    indexWriter.addDocument(newDoc("alpha bravo charlie - charlie bravo alpha"));
    initReaderSearcherHighlighter();

    SpanNearQuery snq = new SpanNearQuery(
        new SpanQuery[]{
            new SpanTermQuery(new Term("body", "bravo")),
            new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term("body", "ch")))}, // REWRITES
        0, true);
    BooleanQuery query = new BooleanQuery.Builder()
        .add(snq, BooleanClause.Occur.MUST)
//          .add(new PrefixQuery(new Term("body", "al")), BooleanClause.Occur.MUST) // MTQ
        .add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.MUST)
        // add queries for other fields; we shouldn't highlight these because of that.
        .add(newPhraseQuery("title", "bravo alpha"), BooleanClause.Occur.SHOULD)
        .build();

    TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    String[] snippets = highlighter.highlight("body", query, topDocs);

    if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
      assertArrayEquals(new String[]{"<b>alpha bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
    } else {
      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> - charlie bravo alpha"}, snippets);
    }

    // do again, this time with MTQ disabled.  We should only find "alpha bravo".
    highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
    highlighter.setHandleMultiTermQuery(false);//disable but leave phrase processing enabled

    topDocs = searcher.search(query, 10, Sort.INDEXORDER);
    snippets = highlighter.highlight("body", query, topDocs);

    assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie - charlie bravo alpha"},
        snippets);
  }
 
Example #22
Source File: TestSpanExtractors.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testSpanMultiTerms() {
  SpanQuery q = new SpanMultiTermQueryWrapper<>(new RegexpQuery(new Term("field", "term.*")));
  Set<Term> terms = collectTerms(q);
  assertEquals(1, terms.size());
  assertEquals(TermFilteredPresearcher.ANYTOKEN_FIELD, terms.iterator().next().field());
}
 
Example #23
Source File: QueryParser.java    From FXDesktopSearch with Apache License 2.0 4 votes vote down vote up
public Query parse(final String aQuery, final String aSearchField) throws IOException {

        final var theTokenizer = new QueryTokenizer(aQuery);

        // Now we have the terms, lets construct the query

        final var theResult = new BooleanQuery.Builder();

        if (!theTokenizer.getRequiredTerms().isEmpty()) {

            final List<SpanQuery> theSpans = new ArrayList<>();
            for (final var theTerm : theTokenizer.getRequiredTerms()) {
                if (QueryUtils.isWildCard(theTerm)) {
                    theSpans.add(new SpanMultiTermQueryWrapper<>(new WildcardQuery(new Term(aSearchField, theTerm))));
                } else if (QueryUtils.isFuzzy(theTerm)) {
                    theSpans.add(new SpanMultiTermQueryWrapper<>(new FuzzyQuery(new Term(aSearchField, theTerm))));
                } else {
                    // Ok, we need to check of the token would be removed due to stopwords and so on
                    final var theTokenizedTerm = toToken(theTerm, aSearchField);
                    if (!StringUtils.isEmpty(theTokenizedTerm)) {
                        theSpans.add(new SpanTermQuery(new Term(aSearchField, theTokenizedTerm)));
                    }
                }
            }

            if (theSpans.size() > 1) {
                // This is the original span, so we boost it a lot
                final SpanQuery theExactMatchQuery = new SpanNearQuery(theSpans.toArray(new SpanQuery[theSpans.size()]), 0, true);
                theResult.add(new BoostQuery(theExactMatchQuery, 61), BooleanClause.Occur.SHOULD);

                // We expect a maximum edit distance of 10 between the searched terms in any order
                // This seems to be the most useful value
                final var theMaxEditDistance = 10;
                for (var theSlop = 0; theSlop < theMaxEditDistance; theSlop++) {
                    final SpanQuery theNearQuery = new SpanNearQuery(theSpans.toArray(new SpanQuery[theSpans.size()]), theSlop, false);
                    theResult.add(new BoostQuery(theNearQuery, 50 + theMaxEditDistance - theSlop), BooleanClause.Occur.SHOULD);
                }
            }

            // Finally, we just add simple term queries, but do not boost them
            // This makes sure that at least the searched terms
            // are found in the document
            addToBooleanQuery(theTokenizer.getRequiredTerms(), aSearchField, theResult, BooleanClause.Occur.MUST);
        }


        // Finally, add the terms that must not occur in the search result
        addToBooleanQuery(theTokenizer.getNotRequiredTerms(), aSearchField, theResult, BooleanClause.Occur.MUST_NOT);

        return theResult.build();
    }