org.apache.lucene.search.TopDocs Java Examples

The following examples show how to use org.apache.lucene.search.TopDocs. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestPersistentProvenanceRepository.java    From localization_nifi with Apache License 2.0 7 votes vote down vote up
private List<Document> runQuery(final File indexDirectory, final List<File> storageDirs, final String query) throws IOException, ParseException {
    try (final DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(indexDirectory))) {
        final IndexSearcher searcher = new IndexSearcher(directoryReader);

        final Analyzer analyzer = new SimpleAnalyzer();
        final org.apache.lucene.search.Query luceneQuery = new QueryParser("uuid", analyzer).parse(query);

        final Query q = new Query("");
        q.setMaxResults(1000);
        final TopDocs topDocs = searcher.search(luceneQuery, 1000);

        final List<Document> docs = new ArrayList<>();
        for (final ScoreDoc scoreDoc : topDocs.scoreDocs) {
            final int docId = scoreDoc.doc;
            final Document d = directoryReader.document(docId);
            docs.add(d);
        }

        return docs;
    }
}
 
Example #2
Source File: DocsReader.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
public Set<ProvenanceEventRecord> read(final TopDocs topDocs, final EventAuthorizer authorizer, final IndexReader indexReader, final Collection<Path> allProvenanceLogFiles,
        final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException {
    if (retrievalCount.get() >= maxResults) {
        return Collections.emptySet();
    }

    final long start = System.nanoTime();
    final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
    final int numDocs = Math.min(scoreDocs.length, maxResults);
    final List<Document> docs = new ArrayList<>(numDocs);

    for (int i = numDocs - 1; i >= 0; i--) {
        final int docId = scoreDocs[i].doc;
        final Document d = indexReader.document(docId);
        docs.add(d);
    }

    final long readDocuments = System.nanoTime() - start;
    logger.debug("Reading {} Lucene Documents took {} millis", docs.size(), TimeUnit.NANOSECONDS.toMillis(readDocuments));
    return read(docs, authorizer, allProvenanceLogFiles, retrievalCount, maxResults, maxAttributeChars);
}
 
Example #3
Source File: TestUnifiedHighlighterStrictPhrases.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testWithSameTermQuery() throws IOException {
  indexWriter.addDocument(newDoc("Yin yang, yin gap yang"));
  initReaderSearcherHighlighter();

  BooleanQuery query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "yin")), BooleanClause.Occur.MUST)
      .add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
      // add queries for other fields; we shouldn't highlight these because of that.
      .add(new TermQuery(new Term("title", "yang")), BooleanClause.Occur.SHOULD)
      .build();

  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  dupMatchAllowed.set(false); // We don't want duplicates from "Yin" being in TermQuery & PhraseQuery.
  String[] snippets = highlighter.highlight("body", query, topDocs);
  if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertArrayEquals(new String[]{"<b>Yin yang</b>, <b>yin</b> gap yang"}, snippets);
  } else {
    assertArrayEquals(new String[]{"<b>Yin</b> <b>yang</b>, <b>yin</b> gap yang"}, snippets);
  }
}
 
Example #4
Source File: TestPayloadScoreQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static void checkQuery(SpanQuery query, PayloadFunction function, boolean includeSpanScore, int[] expectedDocs, float[] expectedScores) throws IOException {

    assertTrue("Expected docs and scores arrays must be the same length!", expectedDocs.length == expectedScores.length);

    PayloadScoreQuery psq = new PayloadScoreQuery(query, function, PayloadDecoder.FLOAT_DECODER, includeSpanScore);
    TopDocs hits = searcher.search(psq, expectedDocs.length);

    for (int i = 0; i < hits.scoreDocs.length; i++) {
      if (i > expectedDocs.length - 1)
        fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
      if (hits.scoreDocs[i].doc != expectedDocs[i])
        fail("Unexpected hit in document " + hits.scoreDocs[i].doc);
      assertEquals("Bad score in document " + expectedDocs[i], expectedScores[i], hits.scoreDocs[i].score, 0.000001);
    }

    if (hits.scoreDocs.length > expectedDocs.length)
      fail("Unexpected hit in document " + hits.scoreDocs[expectedDocs.length]);

    QueryUtils.check(random(), psq, searcher);
  }
 
Example #5
Source File: TestBooleanSimilarity.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPhraseScoreIsEqualToBoost() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir,
      newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
  Document doc = new Document();
  doc.add(new TextField("foo", "bar baz quux", Store.NO));
  w.addDocument(doc);

  DirectoryReader reader = w.getReader();
  w.close();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new BooleanSimilarity());

  PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");

  TopDocs topDocs = searcher.search(query, 2);
  assertEquals(1, topDocs.totalHits.value);
  assertEquals(1f, topDocs.scoreDocs[0].score, 0f);

  topDocs = searcher.search(new BoostQuery(query, 7), 2);
  assertEquals(1, topDocs.totalHits.value);
  assertEquals(7f, topDocs.scoreDocs[0].score, 0f);

  reader.close();
  dir.close();
}
 
Example #6
Source File: LuceneQueryTestCase.java    From jstarcraft-core with Apache License 2.0 6 votes vote down vote up
@Test
public void testPointExactQuery() throws Exception {
    // 精确查询
    Query exactQuery = IntPoint.newExactQuery("id", 1);
    TopDocs search = searcher.search(exactQuery, 1000);
    Assert.assertEquals(1, search.totalHits.value);
}
 
Example #7
Source File: TestHierarchicalDocBuilder.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void assertSearch(Query query, String field, String... values) throws IOException {
  /* The limit of search queue is doubled to catch the error in case when for some reason there are more docs than expected  */
  SolrIndexSearcher searcher = req.getSearcher();
  TopDocs result = searcher.search(query, values.length * 2);
  assertEquals(values.length, result.totalHits.value);
  List<String> actualValues = new ArrayList<String>();
  for (int index = 0; index < values.length; ++index) {
    Document doc = searcher.doc(result.scoreDocs[index].doc);
    actualValues.add(doc.get(field));
  }
  
  for (String expectedValue: values) {
    boolean removed = actualValues.remove(expectedValue);
    if (!removed) {
      fail("Search result does not contain expected values");
    }
  }
}
 
Example #8
Source File: ExplorerQueryTests.java    From elasticsearch-learning-to-rank with Apache License 2.0 6 votes vote down vote up
public void testBooleanQuery() throws Exception {
    TermQuery tq1 = new TermQuery(new Term("text", "cow"));
    TermQuery tq2 = new TermQuery(new Term("text", "brown"));
    TermQuery tq3 = new TermQuery(new Term("text", "how"));

    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(tq1, BooleanClause.Occur.SHOULD);
    builder.add(tq2, BooleanClause.Occur.SHOULD);
    builder.add(tq3, BooleanClause.Occur.SHOULD);

    Query q = builder.build();
    String statsType = "sum_raw_tf";

    ExplorerQuery eq = new ExplorerQuery(q, statsType);

    // Verify tf score
    TopDocs docs = searcher.search(eq, 4);
    assertThat(docs.scoreDocs[0].score, equalTo(3.0f));
}
 
Example #9
Source File: HighlighterTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void searchIndex() throws IOException, InvalidTokenOffsetsException {
  Query query = new TermQuery(new Term("t_text1", "random"));
  IndexReader reader = DirectoryReader.open(dir1);
  IndexSearcher searcher = newSearcher(reader);
  // This scorer can return negative idf -> null fragment
  Scorer scorer = new QueryTermScorer( query, searcher.getIndexReader(), "t_text1" );
  // This scorer doesn't use idf (patch version)
  //Scorer scorer = new QueryTermScorer( query, "t_text1" );
  Highlighter h = new Highlighter( scorer );

  TopDocs hits = searcher.search(query, 10);
  for( int i = 0; i < hits.totalHits.value; i++ ){
    Document doc = searcher.doc( hits.scoreDocs[i].doc );
    String result = h.getBestFragment( a, "t_text1", doc.get( "t_text1" ));
    if (VERBOSE) System.out.println("result:" +  result);
    assertEquals("more <B>random</B> words for second field", result);
  }
  reader.close();
}
 
Example #10
Source File: TestNumericTerms64.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void testSorting(int precisionStep) throws Exception {
  String field="field"+precisionStep;
  // 10 random tests, the index order is ascending,
  // so using a reverse sort field should retun descending documents
  int num = TestUtil.nextInt(random(), 10, 20);
  for (int i = 0; i < num; i++) {
    long lower=(long)(random().nextDouble()*noDocs*distance)+startOffset;
    long upper=(long)(random().nextDouble()*noDocs*distance)+startOffset;
    if (lower>upper) {
      long a=lower; lower=upper; upper=a;
    }
    Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
    TopDocs topDocs = searcher.search(tq, noDocs, new Sort(new SortField(field, SortField.Type.LONG, true)));
    if (topDocs.totalHits.value==0) continue;
    ScoreDoc[] sd = topDocs.scoreDocs;
    assertNotNull(sd);
    long last=searcher.doc(sd[0].doc).getField(field).numericValue().longValue();
    for (int j=1; j<sd.length; j++) {
      long act=searcher.doc(sd[j].doc).getField(field).numericValue().longValue();
      assertTrue("Docs should be sorted backwards", last>act );
      last=act;
    }
  }
}
 
Example #11
Source File: LuceneTranslator.java    From Indra with MIT License 6 votes vote down vote up
private Map<String, List<String>> doTranslate(Set<String> terms) {

        Map<String, List<String>> res = new HashMap<>();

        try {
            TopDocs topDocs = LuceneUtils.getTopDocs(searcher, terms, TERM_FIELD);


            if (topDocs != null) {
                for (ScoreDoc sd : topDocs.scoreDocs) {
                    Document doc = searcher.doc(sd.doc);
                    Map<String, Double> content = convert(doc.getBinaryValue(TRANSLATION_FIELD).bytes);
                    res.put(doc.get(TERM_FIELD), getRelevantTranslations(content));
                }
            }

        } catch (IOException e) {
            logger.error(e.getMessage());
            //TODO throw new expection here.
            e.printStackTrace();
        }

        return res;
    }
 
Example #12
Source File: TestUnifiedHighlighterStrictPhrases.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testBasics() throws IOException {
  indexWriter.addDocument(newDoc("Yin yang, filter")); // filter out. test getTermToSpanLists reader 1-doc filter
  indexWriter.addDocument(newDoc("yin alone, Yin yang, yin gap yang"));
  initReaderSearcherHighlighter();

  //query:  -filter +"yin yang"
  BooleanQuery query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "filter")), BooleanClause.Occur.MUST_NOT)
      .add(newPhraseQuery("body", "yin yang"), BooleanClause.Occur.MUST)
      .build();


  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  String[] snippets = highlighter.highlight("body", query, topDocs);
  if (highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertArrayEquals(new String[]{"yin alone, <b>Yin yang</b>, yin gap yang"}, snippets);
  } else {
    assertArrayEquals(new String[]{"yin alone, <b>Yin</b> <b>yang</b>, yin gap yang"}, snippets);
  }
}
 
Example #13
Source File: DistanceFacetsExample.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** User drills down on the specified range. */
public TopDocs drillDown(DoubleRange range) throws IOException {

  // Passing no baseQuery means we drill down on all
  // documents ("browse only"):
  DrillDownQuery q = new DrillDownQuery(null);
  final DoubleValuesSource vs = getDistanceValueSource();
  q.add("field", range.getQuery(getBoundingBoxQuery(ORIGIN_LATITUDE, ORIGIN_LONGITUDE, range.max), vs));
  DrillSideways ds = new DrillSideways(searcher, config, (TaxonomyReader) null) {
      @Override
      protected Facets buildFacetsResult(FacetsCollector drillDowns, FacetsCollector[] drillSideways, String[] drillSidewaysDims) throws IOException {        
        assert drillSideways.length == 1;
        return new DoubleRangeFacetCounts("field", vs, drillSideways[0], ONE_KM, TWO_KM, FIVE_KM, TEN_KM);
      }
    };
  return ds.search(q, 10).hits;
}
 
Example #14
Source File: SORecommender.java    From scava with Eclipse Public License 2.0 6 votes vote down vote up
public TopDocs executeQuery(org.apache.lucene.search.Query query) throws IOException, ParseException {
	Directory indexDir = FSDirectory.open(Paths.get(INDEX_DIRECTORY));
	try {
		IndexReader reader = DirectoryReader.open(indexDir);
		IndexSearcher searcher = new IndexSearcher(reader);
		if (isBm25 == false) {
			ClassicSimilarity CS = new ClassicSimilarity();
			searcher.setSimilarity(CS);
		}
		TopDocs docs = searcher.search(query, hitsPerPage);
		return docs;
	} catch (Exception e) {
		logger.error(e.getMessage());
		return null;
	}
}
 
Example #15
Source File: KNearestNeighborClassifier.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private TopDocs knnSearch(String text) throws IOException {
  BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();
  for (String fieldName : textFieldNames) {
    String boost = null;
    mlt.setBoost(true); //terms boost actually helps in MLT queries
    if (fieldName.contains("^")) {
      String[] field2boost = fieldName.split("\\^");
      fieldName = field2boost[0];
      boost = field2boost[1];
    }
    if (boost != null) {
      mlt.setBoostFactor(Float.parseFloat(boost));//if we have a field boost, we add it
    }
    mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(text)), BooleanClause.Occur.SHOULD));
    mlt.setBoostFactor(1);// restore neutral boost for next field
  }
  Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
  mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
  if (query != null) {
    mltQuery.add(query, BooleanClause.Occur.MUST);
  }
  return indexSearcher.search(mltQuery.build(), k);
}
 
Example #16
Source File: TestFieldScoreQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void doTestExactScore (ValueSource valueSource) throws Exception {
  Query functionQuery = getFunctionQuery(valueSource);
  IndexReader r = DirectoryReader.open(dir);
  IndexSearcher s = newSearcher(r);
  TopDocs td = s.search(functionQuery,1000);
  assertEquals("All docs should be matched!",N_DOCS,td.totalHits.value);
  ScoreDoc sd[] = td.scoreDocs;
  for (ScoreDoc aSd : sd) {
    float score = aSd.score;
    log(s.explain(functionQuery, aSd.doc));
    String id = s.getIndexReader().document(aSd.doc).get(ID_FIELD);
    float expectedScore = expectedFieldScore(id); // "ID7" --> 7.0
    assertEquals("score of " + id + " shuould be " + expectedScore + " != " + score, expectedScore, score, TEST_SCORE_TOLERANCE_DELTA);
  }
  r.close();
}
 
Example #17
Source File: TestNumericRangeQuery32.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void testLeftOpenRange(int precisionStep) throws Exception {
  String field="field"+precisionStep;
  int count=3000;
  int upper=(count-1)*distance + (distance/3) + startOffset;
  LegacyNumericRangeQuery<Integer> q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, null, upper, true, true);
  TopDocs topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
  ScoreDoc[] sd = topDocs.scoreDocs;
  assertNotNull(sd);
  assertEquals("Score doc count", count, sd.length );
  Document doc=searcher.doc(sd[0].doc);
  assertEquals("First doc", startOffset, doc.getField(field).numericValue().intValue());
  doc=searcher.doc(sd[sd.length-1].doc);
  assertEquals("Last doc", (count-1)*distance+startOffset, doc.getField(field).numericValue().intValue());
  
  q= LegacyNumericRangeQuery.newIntRange(field, precisionStep, null, upper, false, true);
  topDocs = searcher.search(q, noDocs, Sort.INDEXORDER);
  sd = topDocs.scoreDocs;
  assertNotNull(sd);
  assertEquals("Score doc count", count, sd.length );
  doc=searcher.doc(sd[0].doc);
  assertEquals("First doc", startOffset, doc.getField(field).numericValue().intValue());
  doc=searcher.doc(sd[sd.length-1].doc);
  assertEquals("Last doc", (count-1)*distance+startOffset, doc.getField(field).numericValue().intValue());
}
 
Example #18
Source File: TestLucene.java    From RedisDirectory with Apache License 2.0 6 votes vote down vote up
public void testRamDirectory() throws IOException {
    long start = System.currentTimeMillis();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(IndexWriterConfig
            .OpenMode.CREATE);
    RAMDirectory ramDirectory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(ramDirectory, indexWriterConfig);
    for (int i = 0; i < 10000000; i++) {
        indexWriter.addDocument(addDocument(i));
    }
    indexWriter.commit();
    indexWriter.close();
    long end = System.currentTimeMillis();
    log.error("RamDirectory consumes {}s!", (end - start) / 1000);
    start = System.currentTimeMillis();
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(ramDirectory));
    int total = 0;
    for (int i = 0; i < 10000000; i++) {
        TermQuery key1 = new TermQuery(new Term("key1", "key" + i));
        TopDocs search = indexSearcher.search(key1, 10);
        total += search.totalHits;
    }
    System.out.println(total);
    end = System.currentTimeMillis();
    log.error("RamDirectory search consumes {}ms!", (end - start));
}
 
Example #19
Source File: TestUnifiedHighlighterTermIntervals.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testMatchesSlopBug() throws IOException {
  IndexReader ir = indexSomeFields();
  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("title", Intervals.maxgaps(random().nextBoolean() ? 1 : 2,
      Intervals.ordered(
          Intervals.term("this"), Intervals.term("is"), Intervals.term("the"), Intervals.term("field"))));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String[] snippets = highlighter.highlight("title", query, topDocs, 10);
  assertEquals(1, snippets.length);
  if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
    assertEquals("" + highlighter.getFlags("title"),
        "<b>This is the title field</b>.", snippets[0]);
  } else {
    assertEquals("" + highlighter.getFlags("title"),
        "<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.", snippets[0]);
  }
  ir.close();
}
 
Example #20
Source File: TestSelectiveWeightCreation.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private LTRScoringQuery.ModelWeight performQuery(TopDocs hits,
    IndexSearcher searcher, int docid, LTRScoringQuery model) throws IOException,
    ModelException {
  final List<LeafReaderContext> leafContexts = searcher.getTopReaderContext()
      .leaves();
  final int n = ReaderUtil.subIndex(hits.scoreDocs[0].doc, leafContexts);
  final LeafReaderContext context = leafContexts.get(n);
  final int deBasedDoc = hits.scoreDocs[0].doc - context.docBase;

  final Weight weight = searcher.createWeight(searcher.rewrite(model), ScoreMode.COMPLETE, 1);
  final Scorer scorer = weight.scorer(context);

  // rerank using the field final-score
  scorer.iterator().advance(deBasedDoc);
  scorer.score();
  assertTrue(weight instanceof LTRScoringQuery.ModelWeight);
  final LTRScoringQuery.ModelWeight modelWeight = (LTRScoringQuery.ModelWeight) weight;
  return modelWeight;

}
 
Example #21
Source File: TestUnifiedHighlighterStrictPhrases.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testMaxLen() throws IOException {
  indexWriter.addDocument(newDoc("alpha bravo charlie - gap alpha bravo")); // hyphen is at char 21
  initReaderSearcherHighlighter();
  highlighter.setMaxLength(21);

  BooleanQuery query = new BooleanQuery.Builder()
      .add(newPhraseQuery("body", "alpha bravo"), BooleanClause.Occur.SHOULD)
      .add(newPhraseQuery("body", "gap alpha"), BooleanClause.Occur.SHOULD)
      .add(newPhraseQuery("body", "charlie gap"), BooleanClause.Occur.SHOULD)
      .build();

  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  String[] snippets = highlighter.highlight("body", query, topDocs);

  final boolean weightMatches = highlighter.getFlags("body").contains(HighlightFlag.WEIGHT_MATCHES);
  if (fieldType == UHTestHelper.reanalysisType || weightMatches) {
    if (weightMatches) {
      assertArrayEquals(new String[]{"<b>alpha bravo</b> charlie -"}, snippets);
    } else {
      assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> charlie -"}, snippets);
    }
  } else {
    assertArrayEquals(new String[]{"<b>alpha</b> <b>bravo</b> <b>charlie</b> -"}, snippets);
  }
}
 
Example #22
Source File: SearchBuilder.java    From taoshop with Apache License 2.0 5 votes vote down vote up
public static void doSearch(String indexDir , String queryStr) throws IOException, ParseException, InvalidTokenOffsetsException {
    Directory directory = FSDirectory.open(Paths.get(indexDir));
    DirectoryReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer = new SmartChineseAnalyzer();
    QueryParser parser = new QueryParser("tcontent",analyzer);
    Query query = parser.parse(queryStr);

    long startTime = System.currentTimeMillis();
    TopDocs docs = searcher.search(query,10);

    System.out.println("查找"+queryStr+"所用时间:"+(System.currentTimeMillis()-startTime));
    System.out.println("查询到"+docs.totalHits+"条记录");

    //加入高亮显示的
    SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<b><font color=red>","</font></b>");
    QueryScorer scorer = new QueryScorer(query);//计算查询结果最高的得分
    Fragmenter fragmenter = new SimpleSpanFragmenter(scorer);//根据得分算出一个片段
    Highlighter highlighter = new Highlighter(simpleHTMLFormatter,scorer);
    highlighter.setTextFragmenter(fragmenter);//设置显示高亮的片段

    //遍历查询结果
    for(ScoreDoc scoreDoc : docs.scoreDocs){
        Document doc = searcher.doc(scoreDoc.doc);
        System.out.println(doc.get("title"));
        System.out.println(doc.get("tcontent"));
        String tcontent = doc.get("tcontent");
        if(tcontent != null){
            TokenStream tokenStream =  analyzer.tokenStream("tcontent", new StringReader(tcontent));
            String summary = highlighter.getBestFragment(tokenStream, tcontent);
            System.out.println(summary);
        }
    }
    reader.close();
}
 
Example #23
Source File: TestUnifiedHighlighterTermIntervals.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMultipleTerms() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from postings. Feel free to ignore.");
  iw.addDocument(doc);
  body.setStringValue("Highlighting the first term. Hope it works.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = randomUnifiedHighlighter(searcher, indexAnalyzer);
  Query query = new IntervalQuery("body", Intervals.or(
      Intervals.term("highlighting"),
      Intervals.term("just"),
      Intervals.term("first")));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(2, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(2, snippets.length);
  assertEquals("<b>Just</b> a test <b>highlighting</b> from postings. ", snippets[0]);
  assertEquals("<b>Highlighting</b> the <b>first</b> term. ", snippets[1]);
  ir.close();
}
 
Example #24
Source File: PersistentProvenanceRepository.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Override
public void run() {
    if (submission.isCanceled()) {
        return;
    }

    try {
        final DocumentToEventConverter converter = new DocumentToEventConverter() {
            @Override
            public Set<ProvenanceEventRecord> convert(TopDocs topDocs, IndexReader indexReader) throws IOException {
                // Always authorized. We do this because we need to pull back the event, regardless of whether or not
                // the user is truly authorized, because instead of ignoring unauthorized events, we want to replace them.
                final EventAuthorizer authorizer = EventAuthorizer.GRANT_ALL;
                final DocsReader docsReader = new DocsReader();
                return docsReader.read(topDocs, authorizer, indexReader, getAllLogFiles(), new AtomicInteger(0), Integer.MAX_VALUE, maxAttributeChars);
            }
        };

        final Set<ProvenanceEventRecord> matchingRecords = LineageQuery.computeLineageForFlowFiles(getIndexManager(), indexDir, null, flowFileUuids, converter);

        final StandardLineageResult result = submission.getResult();
        result.update(replaceUnauthorizedWithPlaceholders(matchingRecords, user), matchingRecords.size());

        logger.info("Successfully created Lineage for FlowFiles with UUIDs {} in {} milliseconds; Lineage contains {} nodes and {} edges",
                flowFileUuids, result.getComputationTime(TimeUnit.MILLISECONDS), result.getNodes().size(), result.getEdges().size());
    } catch (final Throwable t) {
        logger.error("Failed to query provenance repository due to {}", t.toString());
        if (logger.isDebugEnabled()) {
            logger.error("", t);
        }

        if (t.getMessage() == null) {
            submission.getResult().setError(t.toString());
        } else {
            submission.getResult().setError(t.getMessage());
        }
    }
}
 
Example #25
Source File: TestSoftDeletesRetentionMergePolicy.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static void doUpdate(Term doc, IndexWriter writer, Field... fields) throws IOException {
  long seqId = -1;
  do { // retry if we just committing a merge
    try (DirectoryReader reader = writer.getReader()) {
      TopDocs topDocs = new IndexSearcher(new IncludeSoftDeletesWrapper(reader)).search(new TermQuery(doc), 10);
      assertEquals(1, topDocs.totalHits.value);
      int theDoc = topDocs.scoreDocs[0].doc;
      seqId = writer.tryUpdateDocValue(reader, theDoc, fields);
    }
  } while (seqId == -1);
}
 
Example #26
Source File: LuceneQueryTestCase.java    From jstarcraft-core with Apache License 2.0 5 votes vote down vote up
@Test
public void testMatchAllDocsQuery() throws Exception {
    // 全部匹配查询
    Query query = new MatchAllDocsQuery();
    TopDocs search = searcher.search(query, 1000000);
    Assert.assertEquals(1681, search.totalHits.value);
}
 
Example #27
Source File: TestFeatureSort.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testFeatureMissing() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig().setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  Document doc = new Document();
  writer.addDocument(doc);
  doc = new Document();
  doc.add(new FeatureField("field", "name", 1.3F));
  doc.add(newStringField("value", "1.3", Field.Store.YES));
  writer.addDocument(doc);
  doc = new Document();
  doc.add(new FeatureField("field", "name", 4.2F));
  doc.add(newStringField("value", "4.2", Field.Store.YES));
  writer.addDocument(doc);
  IndexReader ir = writer.getReader();
  writer.close();

  IndexSearcher searcher = newSearcher(ir);
  Sort sort = new Sort(FeatureField.newFeatureSort("field", "name"));

  TopDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort);
  assertEquals(3, td.totalHits.value);
  // null is treated as 0
  assertEquals("4.2", searcher.doc(td.scoreDocs[0].doc).get("value"));
  assertEquals("1.3", searcher.doc(td.scoreDocs[1].doc).get("value"));
  assertNull(searcher.doc(td.scoreDocs[2].doc).get("value"));

  ir.close();
  dir.close();
}
 
Example #28
Source File: TestUnifiedHighlighterTermIntervals.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testEncode() throws Exception {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);

  Field body = new Field("body", "", fieldType);
  Document doc = new Document();
  doc.add(body);

  body.setStringValue("This is a test. Just a test highlighting from <i>postings</i>. Feel free to ignore.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();

  IndexSearcher searcher = newSearcher(ir);
  UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer) {
    @Override
    protected PassageFormatter getFormatter(String field) {
      return new DefaultPassageFormatter("<b>", "</b>", "... ", true);
    }
  };
  
  Query query = new IntervalQuery("body", Intervals.term("highlighting"));
  TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
  assertEquals(1, topDocs.totalHits.value);
  String snippets[] = highlighter.highlight("body", query, topDocs);
  assertEquals(1, snippets.length);
  assertEquals("Just a test <b>highlighting</b> from &lt;i&gt;postings&lt;&#x2F;i&gt;. ", snippets[0]);
  ir.close();
}
 
Example #29
Source File: KNearestNeighborDocumentClassifier.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the top k results from a More Like This query based on the input document
 *
 * @param document the document to use for More Like This search
 * @return the top results for the MLT query
 * @throws IOException If there is a low-level I/O error
 */
private TopDocs knnSearch(Document document) throws IOException {
  BooleanQuery.Builder mltQuery = new BooleanQuery.Builder();

  for (String fieldName : textFieldNames) {
    String boost = null;
    if (fieldName.contains("^")) {
      String[] field2boost = fieldName.split("\\^");
      fieldName = field2boost[0];
      boost = field2boost[1];
    }
    String[] fieldValues = document.getValues(fieldName);
    mlt.setBoost(true); // we want always to use the boost coming from TF * IDF of the term
    if (boost != null) {
      mlt.setBoostFactor(Float.parseFloat(boost)); // this is an additional multiplicative boost coming from the field boost
    }
    mlt.setAnalyzer(field2analyzer.get(fieldName));
    for (String fieldContent : fieldValues) {
      mltQuery.add(new BooleanClause(mlt.like(fieldName, new StringReader(fieldContent)), BooleanClause.Occur.SHOULD));
    }
    mlt.setBoostFactor(1);// restore neutral boost for next field
  }
  Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
  mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
  if (query != null) {
    mltQuery.add(query, BooleanClause.Occur.MUST);
  }
  return indexSearcher.search(mltQuery.build(), k);
}
 
Example #30
Source File: LuceneQueryTestCase.java    From jstarcraft-core with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultiPhraseQuery() throws Exception {
    // 多短语查询
    Term[] terms = new Term[] { new Term("title", "NeverEnding"), new Term("title", "Xinghua,") };
    Term term = new Term("title", "The");
    // add之间认为是OR操作,即"NeverEnding", "Xinghua,"和"The"之间的slop不大于3
    MultiPhraseQuery multiPhraseQuery = new MultiPhraseQuery.Builder().add(terms).add(term).setSlop(3).build();
    TopDocs search = searcher.search(multiPhraseQuery, 1000);
    Assert.assertEquals(2, search.totalHits.value);
}