Java Code Examples for org.apache.lucene.search.highlight.InvalidTokenOffsetsException

The following examples show how to use org.apache.lucene.search.highlight.InvalidTokenOffsetsException. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: HighlightCustomQueryTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * This method intended for use with
 * <code>testHighlightingWithDefaultField()</code>
 */
private String highlightField(Query query, String fieldName,
    String text) throws IOException, InvalidTokenOffsetsException {
  try (MockAnalyzer mockAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE,true,
      MockTokenFilter.ENGLISH_STOPSET); TokenStream tokenStream = mockAnalyzer.tokenStream(fieldName, text)) {
    // Assuming "<B>", "</B>" used to highlight
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));

    String rv = highlighter.getBestFragments(tokenStream, text, 1,
        "(FIELD TEXT TRUNCATED)");
    return rv.length() == 0 ? text : rv;
  }
}
 
Example 2
Source Project: lucene-solr   Source File: HighlightCustomQueryTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testHighlightCustomQuery() throws IOException,
    InvalidTokenOffsetsException {
  String s1 = "I call our world Flatland, not because we call it so,";

  // Verify that a query against the default field results in text being
  // highlighted
  // regardless of the field name.

  CustomQuery q = new CustomQuery(new Term(FIELD_NAME, "world"));

  String expected = "I call our <B>world</B> Flatland, not because we call it so,";
  String observed = highlightField(q, "SOME_FIELD_NAME", s1);
  if (VERBOSE)
    System.out.println("Expected: \"" + expected + "\n" + "Observed: \""
        + observed);
  assertEquals(
      "Query in the default field results in text for *ANY* field being highlighted",
      expected, observed);

  // Verify that a query against a named field does not result in any
  // highlighting
  // when the query field name differs from the name of the field being
  // highlighted,
  // which in this example happens to be the default field name.
  q = new CustomQuery(new Term("text", "world"));

  expected = s1;
  observed = highlightField(q, FIELD_NAME, s1);
  if (VERBOSE)
    System.out.println("Expected: \"" + expected + "\n" + "Observed: \""
        + observed);
  assertEquals(
      "Query in a named field does not result in highlighting when that field isn't in the query",
      s1, highlightField(q, FIELD_NAME, s1));

}
 
Example 3
Source Project: olat   Source File: SearchResultsImpl.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * 
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
 */
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
    final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);

        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
            resultDocument.setHighlightingDescription(true);
        }
        resultDocument.setHighlightResult(highlightResult);

        // highlight title
        final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
        resultDocument.setHighlightTitle(highlightTitle);
    } catch (final InvalidTokenOffsetsException e) {
        log.warn("", e);
    }
}
 
Example 4
Source Project: olat   Source File: SearchResultsImpl.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * 
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
 */
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
    final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);

        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
            resultDocument.setHighlightingDescription(true);
        }
        resultDocument.setHighlightResult(highlightResult);

        // highlight title
        final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
        resultDocument.setHighlightTitle(highlightTitle);
    } catch (final InvalidTokenOffsetsException e) {
        log.warn("", e);
    }
}
 
Example 5
Source Project: incubator-retired-blur   Source File: HighlightHelper.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * NOTE: This method will not preserve the correct field types.
 * 
 * @param preTag
 * @param postTag
 */
public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
    IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {

  String fieldLessFieldName = fieldManager.getFieldLessFieldName();

  Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);

  Analyzer analyzer = fieldManager.getAnalyzerForQuery();

  SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
  Document result = new Document();
  for (IndexableField f : document) {
    String name = f.name();
    if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
      result.add(f);
      continue;
    }
    String text = f.stringValue();
    Number numericValue = f.numericValue();

    Query fieldFixedQuery;
    if (fieldManager.isFieldLessIndexed(name)) {
      fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
    } else {
      fieldFixedQuery = fixedQuery;
    }

    if (numericValue != null) {
      if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
        String numberHighlight = preTag + text + postTag;
        result.add(new StringField(name, numberHighlight, Store.YES));
      }
    } else {
      Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
      TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
      for (int j = 0; j < frag.length; j++) {
        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
          result.add(new StringField(name, frag[j].toString(), Store.YES));
        }
      }
    }
  }
  return result;
}