org.apache.lucene.search.highlight.TokenSources Java Examples

The following examples show how to use org.apache.lucene.search.highlight.TokenSources. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: SearchTravRetHighlightTask.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void withTopDocs(IndexSearcher searcher, Query q, TopDocs hits) throws Exception {
  IndexReader reader = searcher.getIndexReader();
  highlighter.setFragmentScorer(new QueryScorer(q));
  // highlighter.setTextFragmenter();  unfortunately no sentence mechanism, not even regex. Default here is trivial
  for (ScoreDoc scoreDoc : docIdOrder(hits.scoreDocs)) {
    Document document = reader.document(scoreDoc.doc, hlFields);
    Fields tvFields = termVecs ? reader.getTermVectors(scoreDoc.doc) : null;
    for (IndexableField indexableField : document) {
      TokenStream tokenStream;
      if (termVecs) {
        tokenStream = TokenSources.getTokenStream(indexableField.name(), tvFields,
            indexableField.stringValue(), analyzer, maxDocCharsToAnalyze);
      } else {
        tokenStream = analyzer.tokenStream(indexableField.name(), indexableField.stringValue());
      }
      // will close TokenStream:
      String[] fragments = highlighter.getBestFragments(tokenStream, indexableField.stringValue(), maxFrags);
      preventOptimizeAway = fragments.length;
    }
  }
}

Example #2

Source File: HighlightHelper.java From incubator-retired-blur with Apache License 2.0

4 votes

/**
 * NOTE: This method will not preserve the correct field types.
 * 
 * @param preTag
 * @param postTag
 */
public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
    IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {

  String fieldLessFieldName = fieldManager.getFieldLessFieldName();

  Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);

  Analyzer analyzer = fieldManager.getAnalyzerForQuery();

  SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
  Document result = new Document();
  for (IndexableField f : document) {
    String name = f.name();
    if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
      result.add(f);
      continue;
    }
    String text = f.stringValue();
    Number numericValue = f.numericValue();

    Query fieldFixedQuery;
    if (fieldManager.isFieldLessIndexed(name)) {
      fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
    } else {
      fieldFixedQuery = fixedQuery;
    }

    if (numericValue != null) {
      if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
        String numberHighlight = preTag + text + postTag;
        result.add(new StringField(name, numberHighlight, Store.YES));
      }
    } else {
      Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
      TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
      for (int j = 0; j < frag.length; j++) {
        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
          result.add(new StringField(name, frag[j].toString(), Store.YES));
        }
      }
    }
  }
  return result;
}