Java Code Examples for org.apache.lucene.search.PhraseQuery

The following examples show how to use org.apache.lucene.search.PhraseQuery. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: uyuni   Source File: NGramQueryParser.java    License: GNU General Public License v2.0 6 votes vote down vote up
protected Query getFieldQuery(String defaultField,
        String queryText) throws ParseException {
    Query orig = super.getFieldQuery(defaultField, queryText);
    if (!(orig instanceof PhraseQuery)) {
        log.debug("Returning default query.  No phrase query translation.");
        return orig;
    }
    /**
     * A ngram when parsed will become a series of smaller search terms,
     * these terms are grouped together into a PhraseQuery.  We are taking
     * that PhraseQuery and breaking out each ngram term then combining all
     * ngrams together to form a BooleanQuery.
     */
    PhraseQuery pq = (PhraseQuery)orig;
    return new NGramQuery(pq, useMust);
}
 
Example 2
Source Project: spacewalk   Source File: NGramQueryParser.java    License: GNU General Public License v2.0 6 votes vote down vote up
protected Query getFieldQuery(String defaultField,
        String queryText) throws ParseException {
    Query orig = super.getFieldQuery(defaultField, queryText);
    if (!(orig instanceof PhraseQuery)) {
        log.debug("Returning default query.  No phrase query translation.");
        return orig;
    }
    /**
     * A ngram when parsed will become a series of smaller search terms,
     * these terms are grouped together into a PhraseQuery.  We are taking
     * that PhraseQuery and breaking out each ngram term then combining all
     * ngrams together to form a BooleanQuery.
     */
    PhraseQuery pq = (PhraseQuery)orig;
    return new NGramQuery(pq, useMust);
}
 
Example 3
Source Project: lucene-solr   Source File: FieldQuery.java    License: Apache License 2.0 6 votes vote down vote up
public FieldQuery(Query query, IndexReader reader, boolean phraseHighlight, boolean fieldMatch) throws IOException {
  this.fieldMatch = fieldMatch;
  Set<Query> flatQueries = new LinkedHashSet<>();
  flatten( query, reader, flatQueries, 1f );
  saveTerms( flatQueries, reader );
  Collection<Query> expandQueries = expand( flatQueries );

  for( Query flatQuery : expandQueries ){
    QueryPhraseMap rootMap = getRootMap( flatQuery );
    rootMap.add( flatQuery, reader );
    float boost = 1f;
    while (flatQuery instanceof BoostQuery) {
      BoostQuery bq = (BoostQuery) flatQuery;
      flatQuery = bq.getQuery();
      boost *= bq.getBoost();
    }
    if( !phraseHighlight && flatQuery instanceof PhraseQuery ){
      PhraseQuery pq = (PhraseQuery)flatQuery;
      if( pq.getTerms().length > 1 ){
        for( Term term : pq.getTerms() )
          rootMap.addTerm( term, boost );
      }
    }
  }
}
 
Example 4
Source Project: lucene-solr   Source File: FieldQuery.java    License: Apache License 2.0 6 votes vote down vote up
private String getKey( Query query ){
  if( !fieldMatch ) return null;
  while (query instanceof BoostQuery) {
    query = ((BoostQuery) query).getQuery();
  }
  if( query instanceof TermQuery )
    return ((TermQuery)query).getTerm().field();
  else if ( query instanceof PhraseQuery ){
    PhraseQuery pq = (PhraseQuery)query;
    Term[] terms = pq.getTerms();
    return terms[0].field();
  }
  else if (query instanceof MultiTermQuery) {
    return ((MultiTermQuery)query).getField();
  }
  else
    throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
}
 
Example 5
Source Project: lucene-solr   Source File: FieldQuery.java    License: Apache License 2.0 6 votes vote down vote up
void saveTerms( Collection<Query> flatQueries, IndexReader reader ) throws IOException{
  for( Query query : flatQueries ){
    while (query instanceof BoostQuery) {
      query = ((BoostQuery) query).getQuery();
    }
    Set<String> termSet = getTermSet( query );
    if( query instanceof TermQuery )
      termSet.add( ((TermQuery)query).getTerm().text() );
    else if( query instanceof PhraseQuery ){
      for( Term term : ((PhraseQuery)query).getTerms() )
        termSet.add( term.text() );
    }
    else if (query instanceof MultiTermQuery && reader != null) {
      BooleanQuery mtqTerms = (BooleanQuery) query.rewrite(reader);
      for (BooleanClause clause : mtqTerms) {
        termSet.add (((TermQuery) clause.getQuery()).getTerm().text());
      }
    }
    else
      throw new RuntimeException( "query \"" + query.toString() + "\" must be flatten first." );
  }
}
 
Example 6
Source Project: lucene-solr   Source File: HighlighterTest.java    License: Apache License 2.0 6 votes vote down vote up
public void testGetBestFragmentsFilteredPhraseQuery() throws Exception {
  TestHighlightRunner helper = new TestHighlightRunner() {

    @Override
    public void run() throws Exception {
      numHighlights = 0;
      PhraseQuery pq = new PhraseQuery("contents", "john", "kennedy");
      BooleanQuery.Builder bq = new BooleanQuery.Builder();
      bq.add(pq, Occur.MUST);
      bq.add(TermRangeQuery.newStringRange("contents", "john", "john", true, true), Occur.FILTER);

      doSearching(bq.build());
      doStandardHighlights(analyzer, searcher, hits, query, HighlighterTest.this);
      // Currently highlights "John" and "Kennedy" separately
      assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
          numHighlights == 2);
    }
  };

  helper.start();
}
 
Example 7
Source Project: crate   Source File: StringFieldType.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException {

    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.setSlop(slop);

    TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
    PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
    int position = -1;

    stream.reset();
    while (stream.incrementToken()) {
        if (enablePosIncrements) {
            position += posIncrAtt.getPositionIncrement();
        } else {
            position += 1;
        }
        builder.add(new Term(field, termAtt.getBytesRef()), position);
    }

    return builder.build();
}
 
Example 8
Source Project: lucene-solr   Source File: TestUnifiedHighlighter.java    License: Apache License 2.0 6 votes vote down vote up
public void testMatchesSlopBug() throws IOException {
  IndexReader ir = indexSomeFields();
  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new PhraseQuery(2, "title", "this", "is", "the", "field");
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String[] snippets = highlighter.highlight("title", query, topDocs, 10);
  assertEquals(1, snippets.length);
  if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertEquals("<b>This is the title field</b>.", snippets[0]);
  } else {
    assertEquals("<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.", snippets[0]);
  }
  ir.close();
}
 
Example 9
Source Project: lucene-solr   Source File: TestMemoryIndex.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildFromDocument() {

  Document doc = new Document();
  doc.add(new TextField("field1", "some text", Field.Store.NO));
  doc.add(new TextField("field1", "some more text", Field.Store.NO));
  doc.add(new StringField("field2", "untokenized text", Field.Store.NO));

  analyzer.setPositionIncrementGap(100);

  MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);

  assertThat(mi.search(new TermQuery(new Term("field1", "text"))), not(0.0f));
  assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
  assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));

  assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
  assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
  assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));

}
 
Example 10
Source Project: lumongo   Source File: LumongoMultiFieldQueryParser.java    License: Apache License 2.0 6 votes vote down vote up
private Query applySlop(Query q, int slop) {
	if (q instanceof PhraseQuery) {
		PhraseQuery.Builder builder = new PhraseQuery.Builder();
		builder.setSlop(slop);
		PhraseQuery pq = (PhraseQuery) q;
		org.apache.lucene.index.Term[] terms = pq.getTerms();
		int[] positions = pq.getPositions();
		for (int i = 0; i < terms.length; ++i) {
			builder.add(terms[i], positions[i]);
		}
		q = builder.build();
	}
	else if (q instanceof MultiPhraseQuery) {
		MultiPhraseQuery mpq = (MultiPhraseQuery) q;

		if (slop != mpq.getSlop()) {
			q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
		}
	}
	return q;
}
 
Example 11
Source Project: lucene-solr   Source File: MultiFieldQueryParser.java    License: Apache License 2.0 6 votes vote down vote up
private Query applySlop(Query q, int slop) {
  if (q instanceof PhraseQuery) {
    PhraseQuery.Builder builder = new PhraseQuery.Builder();
    builder.setSlop(slop);
    PhraseQuery pq = (PhraseQuery) q;
    org.apache.lucene.index.Term[] terms = pq.getTerms();
    int[] positions = pq.getPositions();
    for (int i = 0; i < terms.length; ++i) {
      builder.add(terms[i], positions[i]);
    }
    q = builder.build();
  } else if (q instanceof MultiPhraseQuery) {
    MultiPhraseQuery mpq = (MultiPhraseQuery)q;
    
    if (slop != mpq.getSlop()) {
      q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
    }
  }
  return q;
}
 
Example 12
Source Project: imhotep   Source File: LuceneQueryTranslator.java    License: Apache License 2.0 6 votes vote down vote up
public static Query rewrite(org.apache.lucene.search.Query q, Set<String> intFields) {
    if (q instanceof TermQuery) {
        return rewrite((TermQuery)q, intFields);
    } else if (q instanceof BooleanQuery) {
        return rewrite((BooleanQuery)q, intFields);
    } else if (q instanceof RangeQuery) {
        return rewrite((RangeQuery)q, intFields);
    } else if (q instanceof ConstantScoreRangeQuery) {
        return rewrite((ConstantScoreRangeQuery)q, intFields);
    } else if (q instanceof PrefixQuery) {
        return rewrite((PrefixQuery)q, intFields);
    } else if (q instanceof PhraseQuery) {
        return rewrite((PhraseQuery)q, intFields);
    }
    throw new IllegalArgumentException("unsupported lucene query type: " + q.getClass().getSimpleName());
}
 
Example 13
Source Project: lucene-solr   Source File: TestQPHelper.java    License: Apache License 2.0 6 votes vote down vote up
public void testPositionIncrement() throws Exception {
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(
      new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));

  qp.setEnablePositionIncrements(true);

  String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
  // 0 2 5 7 8
  int expectedPositions[] = { 1, 3, 4, 6, 9 };
  PhraseQuery pq = (PhraseQuery) qp.parse(qtxt, "a");
  // System.out.println("Query text: "+qtxt);
  // System.out.println("Result: "+pq);
  Term t[] = pq.getTerms();
  int pos[] = pq.getPositions();
  for (int i = 0; i < t.length; i++) {
    // System.out.println(i+". "+t[i]+"  pos: "+pos[i]);
    assertEquals("term " + i + " = " + t[i] + " has wrong term-position!",
        expectedPositions[i], pos[i]);
  }
}
 
Example 14
@Override
public Query handle(Query query, QueryTransformer queryTransformer) {
    PhraseQuery phraseQuery = (PhraseQuery) query;
    SpanNearQuery.Builder builder = new SpanNearQuery.Builder(phraseQuery.getTerms()[0].field(), true);
    int i = 0;
    int position = -1;
    for (Term term : phraseQuery.getTerms()) {
        if (i > 0) {
            int gap = (phraseQuery.getPositions()[i] - position) - 1;
            if (gap > 0) {
                builder.addGap(gap);
            }
        }
        position = phraseQuery.getPositions()[i];
        builder.addClause(new CustomSpanPayloadCheckQuery(new SpanTermQuery(term), Collections.singletonList(null)));
        i++;
    }
    return builder.setSlop(phraseQuery.getSlop()).build();
}
 
Example 15
Source Project: lucene-solr   Source File: QueryParserTestBase.java    License: Apache License 2.0 6 votes vote down vote up
public void testPositionIncrement() throws Exception {
  CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));
  qp.setEnablePositionIncrements(true);
  String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
  //               0         2                      5           7  8
  int expectedPositions[] = {1,3,4,6,9};
  PhraseQuery pq = (PhraseQuery) getQuery(qtxt,qp);
  //System.out.println("Query text: "+qtxt);
  //System.out.println("Result: "+pq);
  Term t[] = pq.getTerms();
  int pos[] = pq.getPositions();
  for (int i = 0; i < t.length; i++) {
    //System.out.println(i+". "+t[i]+"  pos: "+pos[i]);
    assertEquals("term "+i+" = "+t[i]+" has wrong term-position!",expectedPositions[i],pos[i]);
  }
}
 
Example 16
/**
 * Create a Lucene query using the bigrams in the given text
 * @param text
 */
public BooleanQuery queryFromSkipBigrams(String text) {
	BooleanQuery q = new BooleanQuery();
	String prev_word = null;
	for (String word : text.split("\\W+")) {
		if (prev_word != null) {
			PhraseQuery pq = new PhraseQuery();
			pq.setSlop(1);
			pq.add(new Term("text", prev_word));
			pq.add(new Term("text", word));
			q.add(pq, BooleanClause.Occur.SHOULD);
		}
		q.add(new TermQuery(new Term("text", word)), BooleanClause.Occur.SHOULD);
		prev_word = word;
	}
	return q;
}
 
Example 17
Source Project: lucene-solr   Source File: QueryBuilder.java    License: Apache License 2.0 6 votes vote down vote up
/** 
 * Creates simple phrase query from the cached tokenstream contents 
 */
protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.setSlop(slop);
  
  TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
  BoostAttribute boostAtt = stream.addAttribute(BoostAttribute.class);
  PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
  int position = -1;
  float phraseBoost = DEFAULT_BOOST;
  stream.reset();
  while (stream.incrementToken()) {
    if (enablePositionIncrements) {
      position += posIncrAtt.getPositionIncrement();
    } else {
      position += 1;
    }
    builder.add(new Term(field, termAtt.getBytesRef()), position);
    phraseBoost *= boostAtt.getBoost();
  }
  PhraseQuery query = builder.build();
  if (phraseBoost == DEFAULT_BOOST) {
    return query;
  }
  return new BoostQuery(query, phraseBoost);
}
 
Example 18
Source Project: lucene-solr   Source File: TestSpanSearchEquivalence.java    License: Apache License 2.0 6 votes vote down vote up
/** SpanNearQuery([A, B], 0, true) = "A B" */
public void testSpanNearVersusPhrase() throws Exception {
  Term t1 = randomTerm();
  Term t2 = randomTerm();
  SpanQuery subquery[] = new SpanQuery[] { 
                           spanQuery(new SpanTermQuery(t1)), 
                           spanQuery(new SpanTermQuery(t2)) 
                         };
  SpanQuery q1 = spanQuery(new SpanNearQuery(subquery, 0, true));
  PhraseQuery q2 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
  if (t1.equals(t2)) {
    assertSameSet(q1, q2);
  } else {
    assertSameScores(q1, q2);
  }
}
 
Example 19
Source Project: lucene-solr   Source File: TestBooleanSimilarity.java    License: Apache License 2.0 6 votes vote down vote up
public void testPhraseScoreIsEqualToBoost() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir,
      newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
  Document doc = new Document();
  doc.add(new TextField("foo", "bar baz quux", Store.NO));
  w.addDocument(doc);

  DirectoryReader reader = w.getReader();
  w.close();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new BooleanSimilarity());

  PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");

  TopDocs topDocs = searcher.search(query, 2);
  assertEquals(1, topDocs.totalHits.value);
  assertEquals(1f, topDocs.scoreDocs[0].score, 0f);

  topDocs = searcher.search(new BoostQuery(query, 7), 2);
  assertEquals(1, topDocs.totalHits.value);
  assertEquals(7f, topDocs.scoreDocs[0].score, 0f);

  reader.close();
  dir.close();
}
 
Example 20
Source Project: lucene-solr   Source File: TestReadOnlyIndex.java    License: Apache License 2.0 6 votes vote down vote up
private Void doTestReadOnlyIndex() throws Exception {
  Directory dir = FSDirectory.open(indexPath); 
  IndexReader ireader = DirectoryReader.open(dir); 
  IndexSearcher isearcher = newSearcher(ireader);
  
  // borrows from TestDemo, but not important to keep in sync with demo

  assertEquals(1, isearcher.count(new TermQuery(new Term("fieldname", longTerm))));
  Query query = new TermQuery(new Term("fieldname", "text"));
  TopDocs hits = isearcher.search(query, 1);
  assertEquals(1, hits.totalHits.value);
  // Iterate through the results:
  for (int i = 0; i < hits.scoreDocs.length; i++) {
    Document hitDoc = isearcher.doc(hits.scoreDocs[i].doc);
    assertEquals(text, hitDoc.get("fieldname"));
  }

  // Test simple phrase query
  PhraseQuery phraseQuery = new PhraseQuery("fieldname", "to", "be");
  assertEquals(1, isearcher.count(phraseQuery));

  ireader.close();
  return null; // void
}
 
Example 21
Source Project: lucene-solr   Source File: TestDocument.java    License: Apache License 2.0 6 votes vote down vote up
public void testPositionIncrementMultiFields() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  writer.addDocument(makeDocumentWithFields());
  IndexReader reader = writer.getReader();
  
  IndexSearcher searcher = newSearcher(reader);
  PhraseQuery query = new PhraseQuery("indexed_not_tokenized", "test1", "test2");
  
  ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
  assertEquals(1, hits.length);
  
  doAssert(searcher.doc(hits[0].doc), true);
  writer.close();
  reader.close();
  dir.close();    
}
 
Example 22
Source Project: onedev   Source File: NGramLuceneQuery.java    License: MIT License 5 votes vote down vote up
private static PhraseQuery build(String fieldName, String fieldValue, int gramSize) {
	Preconditions.checkArgument(fieldValue.length()>=gramSize);
	PhraseQuery.Builder builder = new PhraseQuery.Builder();
	try (NGramTokenizer tokenizer = new NGramTokenizer(gramSize, gramSize)) {
		tokenizer.setReader(new StringReader(fieldValue.toLowerCase()));
		tokenizer.reset();
		while (tokenizer.incrementToken()) { 
			builder.add(new Term(fieldName, 
					tokenizer.getAttribute(CharTermAttribute.class).toString()));
		}
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
	return builder.build();
}
 
Example 23
Source Project: jstarcraft-core   Source File: LuceneQueryTestCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testPhraseQuery() throws Exception {
    // 短语查询
    // 设置短语之间的跨度为2,也就是说Story和The之间的短语小于等于均可检索
    PhraseQuery build = new PhraseQuery.Builder().setSlop(2).add(new Term("title", "Story")).add(new Term("title", "The")).build();
    TopDocs search = searcher.search(build, 1000);
    Assert.assertEquals(2, search.totalHits.value);
}
 
Example 24
public void testSubsetFeaturesTermQ() throws IOException {
    //     public LambdaMART(List<RankList> samples, int[] features, MetricScorer scorer) {
    String userQuery = "brown cow";

    Query baseQuery = new MatchAllDocsQuery();

    List<Query> features = Arrays.asList(
            new TermQuery(new Term("field",  userQuery.split(" ")[0])),
            new PhraseQuery("field", userQuery.split(" ")),
            new PhraseQuery(1, "field", userQuery.split(" ") ));
    checkModelWithFeatures(toPrebuildFeatureWithNoName(features), new int[] {1}, null);
}
 
Example 25
Source Project: act   Source File: Searcher.java    License: GNU General Public License v3.0 5 votes vote down vote up
private BooleanQuery makeQuery(String synonym, String field) {
  BooleanQuery bq = new BooleanQuery();

  // Set the synonym as a required phrase query.  Phrase queries handle multi-word synonyms, but require construction.
  String queryString = synonym.trim().toLowerCase();
  String[] parts = queryString.split("\\s+");
  PhraseQuery query = new PhraseQuery();
  Arrays.stream(parts).forEach(p -> query.add(new Term(field, p)));
  bq.add(query, BooleanClause.Occur.MUST);

  // Append all keywords as optional clauses.  The more of these we find, the higher the score will be.
  KEYWORDS.forEach(term -> bq.add(new TermQuery(new Term(field, term)), BooleanClause.Occur.SHOULD));

  return bq;
}
 
Example 26
Source Project: lucene-solr   Source File: FieldQuery.java    License: Apache License 2.0 5 votes vote down vote up
private void checkOverlap( Collection<Query> expandQueries, PhraseQuery a, float aBoost, PhraseQuery b, float bBoost ){
  if( a.getSlop() != b.getSlop() ) return;
  Term[] ats = a.getTerms();
  Term[] bts = b.getTerms();
  if( fieldMatch && !ats[0].field().equals( bts[0].field() ) ) return;
  checkOverlap( expandQueries, ats, bts, a.getSlop(), aBoost);
  checkOverlap( expandQueries, bts, ats, b.getSlop(), bBoost );
}
 
Example 27
public void testExplainWithNames() throws IOException {
    //     public LambdaMART(List<RankList> samples, int[] features, MetricScorer scorer) {
    String userQuery = "brown cow";
    List<PrebuiltFeature> features = Arrays.asList(
            new PrebuiltFeature("funky_term_q", new TermQuery(new Term("field",  userQuery.split(" ")[0]))),
            new PrebuiltFeature("funky_phrase_q", new PhraseQuery("field", userQuery.split(" "))));
    checkModelWithFeatures(features, null, null);
}
 
Example 28
public void testTrainModel() throws IOException {
    String userQuery = "brown cow";
    List<Query> features = Arrays.asList(
            new TermQuery(new Term("field",  userQuery.split(" ")[0])),
            new PhraseQuery("field", userQuery.split(" ")));
    checkModelWithFeatures(toPrebuildFeatureWithNoName(features), null, null);
}
 
Example 29
Source Project: lucene-solr   Source File: HighlighterTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testRepeatingTermsInMultBooleans() throws Exception {
  String content = "x y z a b c d e f g b c g";
  String f1 = "f1";
  String f2 = "f2";

  PhraseQuery f1ph1 = new PhraseQuery(f1, "a", "b", "c", "d");

  PhraseQuery f2ph1 = new PhraseQuery(f2, "a", "b", "c", "d");

  PhraseQuery f1ph2 = new PhraseQuery(f1, "b", "c", "g");

  PhraseQuery f2ph2 = new PhraseQuery(f2, "b", "c", "g");

  BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
  BooleanQuery.Builder leftChild = new BooleanQuery.Builder();
  leftChild.add(f1ph1, Occur.SHOULD);
  leftChild.add(f2ph1, Occur.SHOULD);
  booleanQuery.add(leftChild.build(), Occur.MUST);

  BooleanQuery.Builder rightChild = new BooleanQuery.Builder();
  rightChild.add(f1ph2, Occur.SHOULD);
  rightChild.add(f2ph2, Occur.SHOULD);
  booleanQuery.add(rightChild.build(), Occur.MUST);

  QueryScorer scorer = new QueryScorer(booleanQuery.build(), f1);
  scorer.setExpandMultiTermQuery(false);

  Highlighter h = new Highlighter(this, scorer);

  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);

  h.getBestFragment(analyzer, f1, content);

  assertTrue("Failed to find correct number of highlights " + numHighlights + " found",
      numHighlights == 7);
}
 
Example 30
Source Project: lucene-solr   Source File: HighlighterPhraseTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testSparsePhrase() throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  try {
    final Document document = new Document();

    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectors(true);
    document.add(new Field(FIELD, new TokenStreamSparse(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final PhraseQuery phraseQuery = new PhraseQuery(FIELD, "did", "jump");
    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(0, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals(
        highlighter.getBestFragment(new TokenStreamSparse(), TEXT),
        highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}