org.apache.lucene.search.highlight.SimpleHTMLFormatter Java Examples

The following examples show how to use org.apache.lucene.search.highlight.SimpleHTMLFormatter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HighlightCustomQueryTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * This method intended for use with
 * <code>testHighlightingWithDefaultField()</code>
 */
private String highlightField(Query query, String fieldName,
    String text) throws IOException, InvalidTokenOffsetsException {
  try (MockAnalyzer mockAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE,true,
      MockTokenFilter.ENGLISH_STOPSET); TokenStream tokenStream = mockAnalyzer.tokenStream(fieldName, text)) {
    // Assuming "<B>", "</B>" used to highlight
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
    MyQueryScorer scorer = new MyQueryScorer(query, fieldName, FIELD_NAME);
    Highlighter highlighter = new Highlighter(formatter, scorer);
    highlighter.setTextFragmenter(new SimpleFragmenter(Integer.MAX_VALUE));

    String rv = highlighter.getBestFragments(tokenStream, text, 1,
        "(FIELD TEXT TRUNCATED)");
    return rv.length() == 0 ? text : rv;
  }
}
 
Example #2
Source File: LumongoSegment.java    From lumongo with Apache License 2.0 6 votes vote down vote up
private List<LumongoHighlighter> getHighlighterList(List<HighlightRequest> highlightRequests, Query q) {

		if (highlightRequests.isEmpty()) {
			return Collections.emptyList();
		}

		List<LumongoHighlighter> highlighterList = new ArrayList<>();

		for (HighlightRequest highlight : highlightRequests) {
			QueryScorer queryScorer = new QueryScorer(q, highlight.getField());
			queryScorer.setExpandMultiTermQuery(true);
			Fragmenter fragmenter = new SimpleSpanFragmenter(queryScorer, highlight.getFragmentLength());
			SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter(highlight.getPreTag(), highlight.getPostTag());
			LumongoHighlighter highlighter = new LumongoHighlighter(simpleHTMLFormatter, queryScorer, highlight);
			highlighter.setTextFragmenter(fragmenter);
			highlighterList.add(highlighter);
		}
		return highlighterList;
	}
 
Example #3
Source File: SearcherTest.java    From cjs_ssms with GNU General Public License v2.0 5 votes vote down vote up
public static void search(String indexDir, String q) throws Exception {
  Directory dir = FSDirectory.open(Paths.get(indexDir));
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher is = new IndexSearcher(reader);
  // Analyzer analyzer=new StandardAnalyzer(); // 标准分词器
  SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
  QueryParser parser = new QueryParser("desc", analyzer);
  Query query = parser.parse(q);

  long start = System.currentTimeMillis();
  TopDocs hits = is.search(query, 10);
  long end = System.currentTimeMillis();
  System.out.println("匹配 " + q + " ,总共花费" + (end - start) + "毫秒" + "查询到" + hits.totalHits + "个记录");

  QueryScorer scorer = new QueryScorer(query);
  Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);
  SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
  Highlighter highlighter = new Highlighter(simpleHTMLFormatter, scorer);
  highlighter.setTextFragmenter(fragmenter);
  for (ScoreDoc scoreDoc : hits.scoreDocs) {
    Document doc = is.doc(scoreDoc.doc);
    System.out.println(doc.get("city"));
    System.out.println(doc.get("desc"));
    String desc = doc.get("desc");
    if (desc != null) {
      TokenStream tokenStream = analyzer.tokenStream("desc", new StringReader(desc));
      System.out.println(highlighter.getBestFragment(tokenStream, desc));
    }
  }
  reader.close();
}
 
Example #4
Source File: LuceneQuery.java    From rdf4j with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Override
@Deprecated
public void highlight(IRI property) {
	Formatter formatter = new SimpleHTMLFormatter(SearchFields.HIGHLIGHTER_PRE_TAG,
			SearchFields.HIGHLIGHTER_POST_TAG);
	highlighter = new Highlighter(formatter, new QueryScorer(query));
}
 
Example #5
Source File: HtmlFormatter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Formatter getFormatter(String fieldName, SolrParams params ) 
{
  numRequests.inc();
  params = SolrParams.wrapDefaults(params, defaults);

  return new SimpleHTMLFormatter(
      params.getFieldParam(fieldName, HighlightParams.SIMPLE_PRE,  "<em>" ), 
      params.getFieldParam(fieldName, HighlightParams.SIMPLE_POST, "</em>"));
}
 
Example #6
Source File: SearchResultsImpl.java    From olat with Apache License 2.0 5 votes vote down vote up
/**
 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * 
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
 */
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
    final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);

        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
            resultDocument.setHighlightingDescription(true);
        }
        resultDocument.setHighlightResult(highlightResult);

        // highlight title
        final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
        resultDocument.setHighlightTitle(highlightTitle);
    } catch (final InvalidTokenOffsetsException e) {
        log.warn("", e);
    }
}
 
Example #7
Source File: SearchResultsImpl.java    From olat with Apache License 2.0 5 votes vote down vote up
/**
 * Highlight (bold,color) query words in result-document. Set HighlightResult for content or description.
 * 
 * @param query
 * @param analyzer
 * @param doc
 * @param resultDocument
 * @throws IOException
 */
private void doHighlight(final Query query, final Analyzer analyzer, final Document doc, final ResultDocument resultDocument) throws IOException {
    final Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(HIGHLIGHT_PRE_TAG, HIGHLIGHT_POST_TAG), new QueryScorer(query));
    // Get 3 best fragments of content and seperate with a "..."
    try {
        // highlight content
        final String content = doc.get(AbstractOlatDocument.CONTENT_FIELD_NAME);
        TokenStream tokenStream = analyzer.tokenStream(AbstractOlatDocument.CONTENT_FIELD_NAME, new StringReader(content));
        String highlightResult = highlighter.getBestFragments(tokenStream, content, 3, HIGHLIGHT_SEPARATOR);

        // if no highlightResult is in content => look in description
        if (highlightResult.length() == 0) {
            final String description = doc.get(AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            tokenStream = analyzer.tokenStream(AbstractOlatDocument.DESCRIPTION_FIELD_NAME, new StringReader(description));
            highlightResult = highlighter.getBestFragments(tokenStream, description, 3, HIGHLIGHT_SEPARATOR);
            resultDocument.setHighlightingDescription(true);
        }
        resultDocument.setHighlightResult(highlightResult);

        // highlight title
        final String title = doc.get(AbstractOlatDocument.TITLE_FIELD_NAME);
        tokenStream = analyzer.tokenStream(AbstractOlatDocument.TITLE_FIELD_NAME, new StringReader(title));
        final String highlightTitle = highlighter.getBestFragments(tokenStream, title, 3, " ");
        resultDocument.setHighlightTitle(highlightTitle);
    } catch (final InvalidTokenOffsetsException e) {
        log.warn("", e);
    }
}
 
Example #8
Source File: HighlightHelper.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
/**
 * NOTE: This method will not preserve the correct field types.
 * 
 * @param preTag
 * @param postTag
 */
public static Document highlight(int docId, Document document, Query query, FieldManager fieldManager,
    IndexReader reader, String preTag, String postTag) throws IOException, InvalidTokenOffsetsException {

  String fieldLessFieldName = fieldManager.getFieldLessFieldName();

  Query fixedQuery = fixSuperQuery(query, null, fieldLessFieldName);

  Analyzer analyzer = fieldManager.getAnalyzerForQuery();

  SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(preTag, postTag);
  Document result = new Document();
  for (IndexableField f : document) {
    String name = f.name();
    if (fieldLessFieldName.equals(name) || FIELDS_NOT_TO_HIGHLIGHT.contains(name)) {
      result.add(f);
      continue;
    }
    String text = f.stringValue();
    Number numericValue = f.numericValue();

    Query fieldFixedQuery;
    if (fieldManager.isFieldLessIndexed(name)) {
      fieldFixedQuery = fixSuperQuery(query, name, fieldLessFieldName);
    } else {
      fieldFixedQuery = fixedQuery;
    }

    if (numericValue != null) {
      if (shouldNumberBeHighlighted(name, numericValue, fieldFixedQuery)) {
        String numberHighlight = preTag + text + postTag;
        result.add(new StringField(name, numberHighlight, Store.YES));
      }
    } else {
      Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(fieldFixedQuery, name));
      TokenStream tokenStream = TokenSources.getAnyTokenStream(reader, docId, name, analyzer);
      TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);
      for (int j = 0; j < frag.length; j++) {
        if ((frag[j] != null) && (frag[j].getScore() > 0)) {
          result.add(new StringField(name, frag[j].toString(), Store.YES));
        }
      }
    }
  }
  return result;
}
 
Example #9
Source File: PostSearchServiceImpl.java    From mblog with GNU General Public License v3.0 4 votes vote down vote up
@Override
@PostStatusFilter
public Page<PostVO> search(Pageable pageable, String term) throws Exception {
    FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManager);
    QueryBuilder builder = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Post.class).get();

    Query luceneQuery = builder
            .keyword()
            .fuzzy()
            .withEditDistanceUpTo(1)
            .withPrefixLength(1)
            .onFields("title", "summary", "tags")
            .matching(term).createQuery();

    FullTextQuery query = fullTextEntityManager.createFullTextQuery(luceneQuery, Post.class);
    query.setFirstResult((int) pageable.getOffset());
    query.setMaxResults(pageable.getPageSize());

    SmartChineseAnalyzer analyzer = new SmartChineseAnalyzer();
    SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("<span style='color:red;'>", "</span>");
    QueryScorer scorer = new QueryScorer(luceneQuery);
    Highlighter highlighter = new Highlighter(formatter, scorer);

    List<Post> list = query.getResultList();
    List<PostVO> rets = list.stream().map(po -> {
        PostVO post = BeanMapUtils.copy(po);

        try {
            // 处理高亮
            String title = highlighter.getBestFragment(analyzer, "title", post.getTitle());
            String summary = highlighter.getBestFragment(analyzer, "summary", post.getSummary());

            if (StringUtils.isNotEmpty(title)) {
                post.setTitle(title);
            }
            if (StringUtils.isNotEmpty(summary)) {
                post.setSummary(summary);
            }
        } catch (Exception e) {
            log.error(e.getMessage(), e);
        }
        return post;
    }).collect(Collectors.toList());
    buildUsers(rets);
    return new PageImpl<>(rets, pageable, query.getResultSize());
}
 
Example #10
Source File: SearchManager.java    From maven-framework-project with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception{
		ApplicationContext applicationContext=new ClassPathXmlApplicationContext("applicationContext.xml");
		SessionFactory sessionFactory = applicationContext.getBean("hibernate4sessionFactory",SessionFactory.class);
		FullTextSession fullTextSession = Search.getFullTextSession(sessionFactory.openSession());
		
		//使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name
//		QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get();
//		Query luceneQuery = qb.keyword().onFields("name","description","authors.name").matching("移动互联网").createQuery();
		
		//使用lucene api查询 从多个字段匹配 name、description、authors.name
		//使用庖丁分词器
		MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36, new String[]{"name","description","authors.name"}, new PaodingAnalyzer());
		Query luceneQuery=queryParser.parse("实战");
		
		FullTextQuery fullTextQuery =fullTextSession.createFullTextQuery(luceneQuery, Book.class);
		//设置每页显示多少条
		fullTextQuery.setMaxResults(5);
		//设置当前页
		fullTextQuery.setFirstResult(0);
		
		//高亮设置
		SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "<font/></b>");
		QueryScorer queryScorer=new QueryScorer(luceneQuery);
		Highlighter highlighter=new Highlighter(formatter, queryScorer);

		@SuppressWarnings("unchecked")
		List<Book> resultList = fullTextQuery.list();
		System.out.println("共查找到["+resultList.size()+"]条记录");
		for (Book book : resultList) {
			String highlighterString=null;
			Analyzer analyzer=new PaodingAnalyzer();
			try {
				//高亮name
				highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName());
				if(highlighterString!=null){
					book.setName(highlighterString);
				}
				//高亮authors.name
				Set<Author> authors = book.getAuthors();
				for (Author author : authors) {
					highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName());
					if(highlighterString!=null){
						author.setName(highlighterString);
					}
				}
				//高亮description
				highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription());
				if(highlighterString!=null){
					book.setDescription(highlighterString);
				}
			} catch (Exception e) {
			}
			
			System.out.println("书名:"+book.getName()+"\n描述:"+book.getDescription()+"\n出版日期:"+book.getPublicationDate());
			System.out.println("----------------------------------------------------------");
		}
		
		fullTextSession.close();
		sessionFactory.close();
		
	}
 
Example #11
Source File: BookDaoImpl.java    From maven-framework-project with MIT License 4 votes vote down vote up
@Override
public QueryResult<Book> query(String keyword, int start, int pagesize,Analyzer analyzer,String...field) throws Exception{
	
	QueryResult<Book> queryResult=new QueryResult<Book>();
	
	List<Book> books=new ArrayList<Book>();
	
	FullTextSession fullTextSession = Search.getFullTextSession(getSession());
	
	//使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name
	//QueryBuilder qb = fullTextSession.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get();
	//Query luceneQuery = qb.keyword().onFields(field).matching(keyword).createQuery();

	//使用lucene api查询 从多个字段匹配 name、description、authors.name
	
	MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"name","description","authors.name"}, analyzer);
	Query luceneQuery=queryParser.parse(keyword);
	
	FullTextQuery fullTextQuery = fullTextSession.createFullTextQuery(luceneQuery);
	int searchresultsize = fullTextQuery.getResultSize();
	queryResult.setSearchresultsize(searchresultsize);
	System.out.println("共查找到["+searchresultsize+"]条记录");
	
	fullTextQuery.setFirstResult(start);
	fullTextQuery.setMaxResults(pagesize);
	
	//设置按id排序
	fullTextQuery.setSort(new Sort(new SortField("id", SortField.INT ,true)));
	
	//高亮设置
	SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
	QueryScorer queryScorer=new QueryScorer(luceneQuery);
	Highlighter highlighter=new Highlighter(formatter, queryScorer);

	@SuppressWarnings("unchecked")
	List<Book> tempresult = fullTextQuery.list();
	for (Book book : tempresult) {
		String highlighterString=null;
		try {
			//高亮name
			highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName());
			if(highlighterString!=null){
				book.setName(highlighterString);
			}
			//高亮authors.name
			Set<Author> authors = book.getAuthors();
			for (Author author : authors) {
				highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName());
				if(highlighterString!=null){
					author.setName(highlighterString);
				}
			}
			//高亮description
			highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription());
			if(highlighterString!=null){
				book.setDescription(highlighterString);
			}
		} catch (Exception e) {
		}
		
		books.add(book);
		
		
		System.out.println("书名:"+book.getName()+"\n描述:"+book.getDescription()+"\n出版日期:"+book.getPublicationDate());
		System.out.println("----------------------------------------------------------");
	}
	
	queryResult.setSearchresult(books);
	
	return queryResult;
}
 
Example #12
Source File: SearchManager.java    From maven-framework-project with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception{
		ApplicationContext applicationContext=new ClassPathXmlApplicationContext("applicationContext.xml");
		EntityManagerFactory entityManagerFactory = applicationContext.getBean("entityManagerFactory",EntityManagerFactory.class);
		FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(entityManagerFactory.createEntityManager());
		
		//使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name
//		QueryBuilder qb = fullTextEntityManager.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get();
//		Query luceneQuery = qb.keyword().onFields("name","description","authors.name").matching("移动互联网").createQuery();
		
		//使用lucene api查询 从多个字段匹配 name、description、authors.name
		//使用庖丁分词器
		MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36, new String[]{"name","description","authors.name"}, new PaodingAnalyzer());
		Query luceneQuery=queryParser.parse("实战");
		
		FullTextQuery fullTextQuery =fullTextEntityManager.createFullTextQuery(luceneQuery, Book.class);
		//设置每页显示多少条
		fullTextQuery.setMaxResults(5);
		//设置当前页
		fullTextQuery.setFirstResult(0);
		
		//高亮设置
		SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "<font/></b>");
		QueryScorer queryScorer=new QueryScorer(luceneQuery);
		Highlighter highlighter=new Highlighter(formatter, queryScorer);

		@SuppressWarnings("unchecked")
		List<Book> resultList = fullTextQuery.getResultList();
		
		for (Book book : resultList) {
			String highlighterString=null;
			Analyzer analyzer=new PaodingAnalyzer();
			try {
				//高亮name
				highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName());
				if(highlighterString!=null){
					book.setName(highlighterString);
				}
				//高亮authors.name
				Set<Author> authors = book.getAuthors();
				for (Author author : authors) {
					highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName());
					if(highlighterString!=null){
						author.setName(highlighterString);
					}
				}
				//高亮description
				highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription());
				if(highlighterString!=null){
					book.setDescription(highlighterString);
				}
			} catch (Exception e) {
			}
			
		}
		
		fullTextEntityManager.close();
		entityManagerFactory.close();
		
	}
 
Example #13
Source File: BookDaoImpl.java    From maven-framework-project with MIT License 4 votes vote down vote up
@Override
public QueryResult<Book> query(String keyword, int start, int pagesize,Analyzer analyzer,String...field) throws Exception{
	
	QueryResult<Book> queryResult=new QueryResult<Book>();
	
	List<Book> books=new ArrayList<Book>();
	
	FullTextEntityManager fullTextEntityManager = Search.getFullTextEntityManager(em);
	
	//使用Hibernate Search api查询 从多个字段匹配 name、description、authors.name
	//QueryBuilder qb = fullTextSession.getSearchFactory().buildQueryBuilder().forEntity(Book.class ).get();
	//Query luceneQuery = qb.keyword().onFields(field).matching(keyword).createQuery();

	//使用lucene api查询 从多个字段匹配 name、description、authors.name
	
	MultiFieldQueryParser queryParser=new MultiFieldQueryParser(Version.LUCENE_36,new String[]{"name","description","authors.name"}, analyzer);
	Query luceneQuery=queryParser.parse(keyword);
	
	FullTextQuery fullTextQuery = fullTextEntityManager.createFullTextQuery(luceneQuery);
	int searchresultsize = fullTextQuery.getResultSize();
	queryResult.setSearchresultsize(searchresultsize);
			
	fullTextQuery.setFirstResult(start);
	fullTextQuery.setMaxResults(pagesize);
	
	//设置按id排序
	fullTextQuery.setSort(new Sort(new SortField("id", SortField.INT ,true)));
	
	//高亮设置
	SimpleHTMLFormatter formatter=new SimpleHTMLFormatter("<b><font color='red'>", "</font></b>");
	QueryScorer queryScorer=new QueryScorer(luceneQuery);
	Highlighter highlighter=new Highlighter(formatter, queryScorer);

	@SuppressWarnings("unchecked")
	List<Book> tempresult = fullTextQuery.getResultList();
	for (Book book : tempresult) {
		String highlighterString=null;
		try {
			//高亮name
			highlighterString=highlighter.getBestFragment(analyzer, "name", book.getName());
			if(highlighterString!=null){
				book.setName(highlighterString);
			}
			//高亮authors.name
			Set<Author> authors = book.getAuthors();
			for (Author author : authors) {
				highlighterString=highlighter.getBestFragment(analyzer, "authors.name", author.getName());
				if(highlighterString!=null){
					author.setName(highlighterString);
				}
			}
			//高亮description
			highlighterString=highlighter.getBestFragment(analyzer, "description", book.getDescription());
			if(highlighterString!=null){
				book.setDescription(highlighterString);
			}
		} catch (Exception e) {
		}
		
		books.add(book);	
		
	}
	
	queryResult.setSearchresult(books);
	
	return queryResult;
}