org.apache.lucene.search.TopScoreDocCollector Java Examples

The following examples show how to use org.apache.lucene.search.TopScoreDocCollector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Grouping.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
protected Collector createFirstPassCollector() throws IOException {
  DocSet groupFilt = searcher.getDocSet(query);
  int groupDocsToCollect = getMax(groupOffset, docsPerGroup, maxDoc);
  Collector subCollector;
  if (withinGroupSort == null || withinGroupSort.equals(Sort.RELEVANCE)) {
    subCollector = topCollector = TopScoreDocCollector.create(groupDocsToCollect, Integer.MAX_VALUE);
  } else {
    topCollector = TopFieldCollector.create(searcher.weightSort(withinGroupSort), groupDocsToCollect, Integer.MAX_VALUE);
    if (needScores) {
      maxScoreCollector = new MaxScoreCollector();
      subCollector = MultiCollector.wrap(topCollector, maxScoreCollector);
    } else {
      subCollector = topCollector;
    }
  }
  collector = new FilterCollector(groupFilt, subCollector);
  return collector;
}
 
Example #2
Source File: TripleIndex.java    From AGDISTIS with GNU Affero General Public License v3.0 6 votes vote down vote up
private List<Triple> getFromIndex(int maxNumberOfResults, BooleanQuery bq) throws IOException {
	log.debug("\t start asking index...");
	TopScoreDocCollector collector = TopScoreDocCollector.create(maxNumberOfResults, true);
	isearcher.search(bq, collector);
	ScoreDoc[] hits = collector.topDocs().scoreDocs;

	List<Triple> triples = new ArrayList<Triple>();
	String s, p, o;
	for (int i = 0; i < hits.length; i++) {
		Document hitDoc = isearcher.doc(hits[i].doc);
		s = hitDoc.get(FIELD_NAME_SUBJECT);
		p = hitDoc.get(FIELD_NAME_PREDICATE);
		o = hitDoc.get(FIELD_NAME_OBJECT_URI);
		if (o == null) {
			o = hitDoc.get(FIELD_NAME_OBJECT_LITERAL);
		}
		Triple triple = new Triple(s, p, o);
		triples.add(triple);
	}
	log.debug("\t finished asking index...");
	return triples;
}
 
Example #3
Source File: AlfrescoReRankQParserPlugin.java    From SearchServices with GNU Lesser General Public License v3.0 6 votes vote down vote up
public ReRankCollector(int reRankDocs,
                       int length,
                       Query reRankQuery,
                       double reRankWeight,
                       QueryCommand cmd,
                       IndexSearcher searcher,
                       Map<BytesRef, Integer> boostedPriority,
                       boolean scale) throws IOException {
    super(null);
    this.reRankQuery = reRankQuery;
    this.reRankDocs = reRankDocs;
    this.length = length;
    this.boostedPriority = boostedPriority;
    this.scale = scale;
    Sort sort = cmd.getSort();
    if(sort == null) {
        this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), null);
    } else {
        sort = sort.rewrite(searcher);
        this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), null, false, true, true);
    }
    this.searcher = searcher;
    this.reRankWeight = reRankWeight;
}
 
Example #4
Source File: ResumeSearch.java    From Easy-Cassandra-samples with Apache License 2.0 6 votes vote down vote up
private List<String> returnResume(Query query) throws IOException {
	int hitsPerPage = 10;
	IndexReader reader = DirectoryReader.open(LuceneUtil.INSTANCE.getDirectory());
	IndexSearcher searcher = new IndexSearcher(reader);
	TopScoreDocCollector collector = TopScoreDocCollector.create(
			hitsPerPage, true);
	searcher.search(query, collector);
	ScoreDoc[] hits = collector.topDocs().scoreDocs;
	
	   
	   List<String> resumeIDs = new LinkedList<>();
	    for(int i=0;i<hits.length;++i) {
	      int docId = hits[i].doc;
	      Document d = searcher.doc(docId);
	      resumeIDs.add(d.get(COLUMN_NICk_NAME));
	    }
	return resumeIDs;
}
 
Example #5
Source File: MusicSearch.java    From Easy-Cassandra-samples with Apache License 2.0 6 votes vote down vote up
private List<String> returnMusics(Query query) throws IOException {
	int hitsPerPage = 10;
	IndexReader reader = DirectoryReader.open(LuceneUtil.INSTANCE.getDirectory());
	IndexSearcher searcher = new IndexSearcher(reader);
	TopScoreDocCollector collector = TopScoreDocCollector.create(
			hitsPerPage, true);
	searcher.search(query, collector);
	ScoreDoc[] hits = collector.topDocs().scoreDocs;
	
	   
	   List<String> musics = new LinkedList<>();
	    for(int i=0;i<hits.length;++i) {
	      int docId = hits[i].doc;
	      Document d = searcher.doc(docId);
	      musics.add(d.get(COLUMN_NAME));
	    }
	return musics;
}
 
Example #6
Source File: IndexDBO_classes.java    From NLIWOD with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public List<String> search(final String object) {
	if (stopwords.contains(object.toLowerCase())) {
		log.debug("\t Stopword detected: |" + object + "|");
		return ImmutableList.of();
	}
	ArrayList<String> uris = Lists.newArrayList();
	try {
		log.debug("\t start asking index...");

		Query q = new FuzzyQuery(new Term(FIELD_NAME_OBJECT, object), 0);
		TopScoreDocCollector collector = TopScoreDocCollector.create(numberOfDocsRetrievedFromIndex);

		isearcher.search(q, collector);
		ScoreDoc[] hits = collector.topDocs().scoreDocs;

		for (ScoreDoc hit : hits) {
			Document hitDoc = isearcher.doc(hit.doc);
			uris.add(hitDoc.get(FIELD_NAME_SUBJECT));
		}
		log.debug("\t finished asking index...");
	} catch (Exception e) {
		log.error(e.getLocalizedMessage() + " -> " + object, e);
	}
	return uris;
}
 
Example #7
Source File: QueryCommand.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public List<Collector> create() throws IOException {
  Collector subCollector;
  if (sort == null || sort.equals(Sort.RELEVANCE)) {
    subCollector = topDocsCollector = TopScoreDocCollector.create(docsToCollect, Integer.MAX_VALUE);
  } else {
    topDocsCollector = TopFieldCollector.create(sort, docsToCollect, Integer.MAX_VALUE);
    if (needScores) {
      maxScoreCollector = new MaxScoreCollector();
      subCollector = MultiCollector.wrap(topDocsCollector, maxScoreCollector);
    } else {
      subCollector = topDocsCollector;
    }
  }
  filterCollector = new FilterCollector(docSet, subCollector);
  return Arrays.asList((Collector) filterCollector);
}
 
Example #8
Source File: ReRankCollector.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({"unchecked"})
public ReRankCollector(int reRankDocs,
    int length,
    Rescorer reRankQueryRescorer,
    QueryCommand cmd,
    IndexSearcher searcher,
    Set<BytesRef> boostedPriority) throws IOException {
  super(null);
  this.reRankDocs = reRankDocs;
  this.length = length;
  this.boostedPriority = boostedPriority;
  this.query = cmd.getQuery();
  Sort sort = cmd.getSort();
  if(sort == null) {
    this.sort = null;
    this.mainCollector = TopScoreDocCollector.create(Math.max(this.reRankDocs, length), cmd.getMinExactCount());
  } else {
    this.sort = sort = sort.rewrite(searcher);
    //scores are needed for Rescorer (regardless of whether sort needs it)
    this.mainCollector = TopFieldCollector.create(sort, Math.max(this.reRankDocs, length), cmd.getMinExactCount());
  }
  this.searcher = searcher;
  this.reRankQueryRescorer = reRankQueryRescorer;
}
 
Example #9
Source File: SearchImpl.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private SearchResults search() throws IOException {
  // execute search
  ScoreDoc after = docs.length == 0 ? null : docs[docs.length - 1];

  TopDocs topDocs;
  if (sort != null) {
    topDocs = searcher.searchAfter(after, query, pageSize, sort);
  } else {
    int hitsThreshold = exactHitsCount ? Integer.MAX_VALUE : DEFAULT_TOTAL_HITS_THRESHOLD;
    TopScoreDocCollector collector = TopScoreDocCollector.create(pageSize, after, hitsThreshold);
    searcher.search(query, collector);
    topDocs = collector.topDocs();
  }

  // reset total hits for the current query
  this.totalHits = topDocs.totalHits;

  // cache search results for later use
  ScoreDoc[] newDocs = new ScoreDoc[docs.length + topDocs.scoreDocs.length];
  System.arraycopy(docs, 0, newDocs, 0, docs.length);
  System.arraycopy(topDocs.scoreDocs, 0, newDocs, docs.length, topDocs.scoreDocs.length);
  this.docs = newDocs;

  return SearchResults.of(topDocs.totalHits, topDocs.scoreDocs, currentPage * pageSize, searcher, fieldsToLoad);
}
 
Example #10
Source File: BlurScoreDocCollector.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public Collector newCollector() {
  TopScoreDocCollector collector = TopScoreDocCollector.create(_numHitsToCollect, _after, true);
  Collector col = new StopExecutionCollector(collector, _running);
  if (_runSlow) {
    return new SlowCollector(col);
  }
  return col;
}
 
Example #11
Source File: TestNumericRangeQuery32.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void testRangeSplit(int precisionStep) throws Exception {
  String field="ascfield"+precisionStep;
  // 10 random tests
  int num = TestUtil.nextInt(random(), 10, 20);
  for (int  i =0;  i< num; i++) {
    int lower=(int)(random().nextDouble()*noDocs - noDocs/2);
    int upper=(int)(random().nextDouble()*noDocs - noDocs/2);
    if (lower>upper) {
      int a=lower; lower=upper; upper=a;
    }
    // test inclusive range
    Query tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, true);
    TopScoreDocCollector collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    searcher.search(tq, collector);
    TopDocs tTopDocs = collector.topDocs();
    assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits.value );
    // test exclusive range
    tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, false);
    collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    searcher.search(tq, collector);
    tTopDocs = collector.topDocs();
    assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits.value );
    // test left exclusive range
    tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, false, true);
    collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    searcher.search(tq, collector);
    tTopDocs = collector.topDocs();
    assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits.value );
    // test right exclusive range
    tq= LegacyNumericRangeQuery.newIntRange(field, precisionStep, lower, upper, true, false);
    collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    searcher.search(tq, collector);
    tTopDocs = collector.topDocs();
    assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits.value );
  }
}
 
Example #12
Source File: TestNumericRangeQuery32.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** we fake a float test using int2float conversion of LegacyNumericUtils */
private void testFloatRange(int precisionStep) throws Exception {
  final String field="ascfield"+precisionStep;
  final int lower=-1000, upper=+2000;
  
  Query tq= LegacyNumericRangeQuery.newFloatRange(field, precisionStep,
      NumericUtils.sortableIntToFloat(lower), NumericUtils.sortableIntToFloat(upper), true, true);
  TopScoreDocCollector collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
  searcher.search(tq, collector);
  TopDocs tTopDocs = collector.topDocs();
  assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits.value );
}
 
Example #13
Source File: TestNumericRangeQuery64.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void testRangeSplit(int precisionStep) throws Exception {
  String field="ascfield"+precisionStep;
  // 10 random tests
  int num = TestUtil.nextInt(random(), 10, 20);
  for (int i = 0; i < num; i++) {
    long lower=(long)(random().nextDouble()*noDocs - noDocs/2);
    long upper=(long)(random().nextDouble()*noDocs - noDocs/2);
    if (lower>upper) {
      long a=lower; lower=upper; upper=a;
    }
    // test inclusive range
    Query tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, true);
    TopScoreDocCollector collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    searcher.search(tq, collector);
    TopDocs tTopDocs = collector.topDocs();
    assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits.value );
    // test exclusive range
    tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, false);
    collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    searcher.search(tq, collector);
    tTopDocs = collector.topDocs();
    assertEquals("Returned count of range query must be equal to exclusive range length", Math.max(upper-lower-1, 0), tTopDocs.totalHits.value );
    // test left exclusive range
    tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, false, true);
    collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    searcher.search(tq, collector);
    tTopDocs = collector.topDocs();
    assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits.value );
    // test right exclusive range
    tq= LegacyNumericRangeQuery.newLongRange(field, precisionStep, lower, upper, true, false);
    collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    searcher.search(tq, collector);
    tTopDocs = collector.topDocs();
    assertEquals("Returned count of range query must be equal to half exclusive range length", upper-lower, tTopDocs.totalHits.value );
  }
}
 
Example #14
Source File: TestNumericRangeQuery64.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** we fake a double test using long2double conversion of LegacyNumericUtils */
private void testDoubleRange(int precisionStep) throws Exception {
  final String field="ascfield"+precisionStep;
  final long lower=-1000L, upper=+2000L;
  
  Query tq= LegacyNumericRangeQuery.newDoubleRange(field, precisionStep,
      NumericUtils.sortableLongToDouble(lower), NumericUtils.sortableLongToDouble(upper), true, true);
  TopScoreDocCollector collector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
  searcher.search(tq, collector);
  TopDocs tTopDocs = collector.topDocs();
  assertEquals("Returned count of range query must be equal to inclusive range length", upper-lower+1, tTopDocs.totalHits.value );
}
 
Example #15
Source File: ClassDependencyIndexCreator.java    From netbeans with Apache License 2.0 5 votes vote down vote up
static void search(String className, Indexer indexer, Collection<IndexingContext> contexts, List<? super ClassUsage> results) throws IOException {
    String searchString = crc32base64(className.replace('.', '/'));
    Query refClassQuery = indexer.constructQuery(ClassDependencyIndexCreator.FLD_NB_DEPENDENCY_CLASS.getOntology(), new StringSearchExpression(searchString));
    TopScoreDocCollector collector = TopScoreDocCollector.create(NexusRepositoryIndexerImpl.MAX_RESULT_COUNT, null);
    for (IndexingContext context : contexts) {
        IndexSearcher searcher = context.acquireIndexSearcher();
        try {
    searcher.search(refClassQuery, collector);
    ScoreDoc[] hits = collector.topDocs().scoreDocs;
    LOG.log(Level.FINER, "for {0} ~ {1} found {2} hits", new Object[] {className, searchString, hits.length});
    for (ScoreDoc hit : hits) {
        int docId = hit.doc;
        Document d = searcher.doc(docId);
        String fldValue = d.get(ClassDependencyIndexCreator.NB_DEPENDENCY_CLASSES);
        LOG.log(Level.FINER, "{0} uses: {1}", new Object[] {className, fldValue});
        Set<String> refClasses = parseField(searchString, fldValue, d.get(ArtifactInfo.NAMES));
        if (!refClasses.isEmpty()) {
            ArtifactInfo ai = IndexUtils.constructArtifactInfo(d, context);
            if (ai != null) {
                ai.setRepository(context.getRepositoryId());
                List<NBVersionInfo> version = NexusRepositoryIndexerImpl.convertToNBVersionInfo(Collections.singleton(ai));
                if (!version.isEmpty()) {
                    results.add(new ClassUsage(version.get(0), refClasses));
                }
            }
        }
    }
    } finally {
        context.releaseIndexSearcher(searcher);
    }
    }
}
 
Example #16
Source File: BlurScoreDocCollector.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private TopScoreDocCollector getTopScoreDocCollector(Collector collector) {
  if (collector instanceof SlowCollector) {
    SlowCollector slowCollector = (SlowCollector) collector;
    return getTopScoreDocCollector(slowCollector.getCollector());
  } else if (collector instanceof StopExecutionCollector) {
    StopExecutionCollector stopExecutionCollector = (StopExecutionCollector) collector;
    return getTopScoreDocCollector(stopExecutionCollector.getCollector());
  } else if (collector instanceof TopScoreDocCollector) {
    TopScoreDocCollector topScoreDocCollector = (TopScoreDocCollector) collector;
    return topScoreDocCollector;
  } else {
    throw new RuntimeException("Collector type [" + collector + "] not supported.");
  }
}
 
Example #17
Source File: LuceneHelper.java    From dexter with Apache License 2.0 5 votes vote down vote up
/**
 * @param query
 *            - a query
 * @param field
 *            - the field where to search the query
 * @return number of documents containing the text in query in the given
 *         fields
 */
public int getFreq(String query, String field) {
	Query q = null;
	searcher = getSearcher();
	TopScoreDocCollector collector = TopScoreDocCollector.create(1, true);

	// try {

	Text t = new Text(query).disableStopwords();
	PhraseQuery pq = new PhraseQuery();
	int i = 0;
	for (String term : t.getTerms()) {
		pq.add(new Term(field, term), i++);
	}
	q = pq;
	logger.debug(q.toString());
	// } catch (ParseException e) {
	// logger.error("querying the index: {} ", e.toString());
	// return -1;
	// }
	try {
		searcher.search(q, collector);
	} catch (IOException e) {
		logger.error("querying the index: {} ", e.toString());
		return -1;
	}
	return collector.getTotalHits();
}
 
Example #18
Source File: TopGroupsCollector.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
TopDocsReducer(Sort withinGroupSort,
               int maxDocsPerGroup, boolean getMaxScores) {
  this.needsScores = getMaxScores || withinGroupSort.needsScores();
  if (withinGroupSort == Sort.RELEVANCE) {
    supplier = () -> new TopDocsAndMaxScoreCollector(true, TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE), null);
  } else {
    supplier = () -> {
      TopFieldCollector topDocsCollector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
      MaxScoreCollector maxScoreCollector = getMaxScores ? new MaxScoreCollector() : null;
      return new TopDocsAndMaxScoreCollector(false, topDocsCollector, maxScoreCollector);
    };
  }
}
 
Example #19
Source File: BibleSearchIndex.java    From Quelea with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Search for bible chapters that match the given filter.
 *
 * @param queryString the query string to filter.
 * @param type ignored - may be null.
 * @return a list of all bible chapters that match the given filter.
 */
@Override
public BibleChapter[] filter(String queryString, FilterType type) {
    String sanctifyQueryString = SearchIndexUtils.makeLuceneQuery(queryString);
    if(chapters.isEmpty() || sanctifyQueryString.isEmpty()) {
        return chapters.values().toArray(new BibleChapter[chapters.size()]);
    }
    List<BibleChapter> ret;
    try (DirectoryReader dr = DirectoryReader.open(index)) {
        IndexSearcher searcher = new IndexSearcher(dr);
        BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
        Query q = new ComplexPhraseQueryParser("text", analyzer).parse(sanctifyQueryString);
        TopScoreDocCollector collector = TopScoreDocCollector.create(10000,10000);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        ret = new ArrayList<>();
        for(int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            BibleChapter chapter = chapters.get(Integer.parseInt(d.get("number")));
            ret.add(chapter);
        }
        return ret.toArray(new BibleChapter[ret.size()]);
    }
    catch (ParseException | IOException ex) {
        LOGGER.log(Level.WARNING, "Invalid query string: " + sanctifyQueryString, ex);
        return new BibleChapter[0];
    }
}
 
Example #20
Source File: DBpediaIndex.java    From NLIWOD with GNU Affero General Public License v3.0 5 votes vote down vote up
public ArrayList<String> search(final String object) {
	ArrayList<String> uris = Lists.newArrayList();
	try {
		log.debug("\t start asking index...");

		// remove hyphens assertTrue
		// if (object.contains("-")) {
		// object = "\"" + object.replace("-", " ") + "\"";
		// }
		// FuzzyQuery q = new FuzzyQuery(new Term(FIELD_NAME_OBJECT,
		// object), 0);
		QueryParser qp = new QueryParser(FIELD_NAME_OBJECT, analyzer);
		TopScoreDocCollector collector = TopScoreDocCollector.create(numberOfDocsRetrievedFromIndex);
		isearcher.search(qp.createPhraseQuery(FIELD_NAME_OBJECT, object), collector);
		// isearcher.search(q, collector);
		ScoreDoc[] hits = collector.topDocs().scoreDocs;

		for (ScoreDoc hit : hits) {
			Document hitDoc = isearcher.doc(hit.doc);
			uris.add(hitDoc.get(FIELD_NAME_SUBJECT));
		}
		log.debug("\t finished asking index...");
	} catch (Exception e) {
		log.error(e.getLocalizedMessage() + " -> " + object, e);
	}
	return uris;
}
 
Example #21
Source File: IndexDBO_properties.java    From NLIWOD with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public List<String> search(final String object) {
	if (stopwords.contains(object.toLowerCase())) {
		log.debug("\t Stopword detected: |" + object + "|");
		System.out.println("returning immutable empty");
		return ImmutableList.of();
	}
	ArrayList<String> uris = Lists.newArrayList();
	try {
		log.debug("\t start asking index for |" + object + "|");

		Query q = new FuzzyQuery(new Term(FIELD_NAME_OBJECT, object), 0);
		TopScoreDocCollector collector = TopScoreDocCollector.create(numberOfDocsRetrievedFromIndex);

		isearcher.search(q, collector);
		ScoreDoc[] hits = collector.topDocs().scoreDocs;

		for (ScoreDoc hit : hits) {
			Document hitDoc = isearcher.doc(hit.doc);
			log.debug(object + "->" + hitDoc.get(FIELD_NAME_SUBJECT) + ", " + hitDoc.get(FIELD_NAME_OBJECT));
			uris.add(hitDoc.get(FIELD_NAME_SUBJECT));
		}
		log.debug("\t finished asking index...");
	} catch (Exception e) {
		log.error(e.getLocalizedMessage() + " -> " + object, e);
	}
	return uris;
}
 
Example #22
Source File: SearchWithCollectorTask.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected Collector createCollector() throws Exception {
  Collector collector = null;
  if (clnName.equalsIgnoreCase("topScoreDoc") == true) {
    collector = TopScoreDocCollector.create(numHits(), Integer.MAX_VALUE);
  } else if (clnName.length() > 0){
    collector = Class.forName(clnName).asSubclass(Collector.class).getConstructor().newInstance();

  } else {
    collector = super.createCollector();
  }
  return collector;
}
 
Example #23
Source File: TestIndexWriterMaxDocs.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Monster("takes over two hours")
public void testExactlyAtTrueLimit() throws Exception {
  Directory dir = newFSDirectory(createTempDir("2BDocs3"));
  IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null));
  Document doc = new Document();
  doc.add(newStringField("field", "text", Field.Store.NO));
  for (int i = 0; i < IndexWriter.MAX_DOCS; i++) {
    iw.addDocument(doc);
    /*
    if (i%1000000 == 0) {
      System.out.println((i/1000000) + " M docs...");
    }
    */
  }
  iw.commit();

  // First unoptimized, then optimized:
  for(int i=0;i<2;i++) {
    DirectoryReader ir = DirectoryReader.open(dir);
    assertEquals(IndexWriter.MAX_DOCS, ir.maxDoc());
    assertEquals(IndexWriter.MAX_DOCS, ir.numDocs());
    IndexSearcher searcher = new IndexSearcher(ir);
    TopScoreDocCollector collector = TopScoreDocCollector.create(10, Integer.MAX_VALUE);
    searcher.search(new TermQuery(new Term("field", "text")), collector);
    TopDocs hits = collector.topDocs();
    assertEquals(IndexWriter.MAX_DOCS, hits.totalHits.value);

    // Sort by docID reversed:
    hits = searcher.search(new TermQuery(new Term("field", "text")), 10, new Sort(new SortField(null, SortField.Type.DOC, true)));
    assertEquals(IndexWriter.MAX_DOCS, hits.totalHits.value);
    assertEquals(10, hits.scoreDocs.length);
    assertEquals(IndexWriter.MAX_DOCS-1, hits.scoreDocs[0].doc);
    ir.close();

    iw.forceMerge(1);
  }

  iw.close();
  dir.close();
}
 
Example #24
Source File: TestMultiValuedNumericRangeQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Tests LegacyNumericRangeQuery on a multi-valued field (multiple numeric values per document).
 * This test ensures, that a classical TermRangeQuery returns exactly the same document numbers as
 * LegacyNumericRangeQuery (see SOLR-1322 for discussion) and the multiple precision terms per numeric value
 * do not interfere with multiple numeric values.
 */
public void testMultiValuedNRQ() throws Exception {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
      newIndexWriterConfig(new MockAnalyzer(random()))
      .setMaxBufferedDocs(TestUtil.nextInt(random(), 50, 1000)));
  
  DecimalFormat format = new DecimalFormat("00000000000", new DecimalFormatSymbols(Locale.ROOT));
  
  int num = atLeast(500);
  for (int l = 0; l < num; l++) {
    Document doc = new Document();
    for (int m=0, c=random().nextInt(10); m<=c; m++) {
      int value = random().nextInt(Integer.MAX_VALUE);
      doc.add(newStringField("asc", format.format(value), Field.Store.NO));
      doc.add(new LegacyIntField("trie", value, Field.Store.NO));
    }
    writer.addDocument(doc);
  }
  IndexReader reader = writer.getReader();
  writer.close();
  
  IndexSearcher searcher=newSearcher(reader);
  num = atLeast(50);
  for (int i = 0; i < num; i++) {
    int lower=random().nextInt(Integer.MAX_VALUE);
    int upper=random().nextInt(Integer.MAX_VALUE);
    if (lower>upper) {
      int a=lower; lower=upper; upper=a;
    }
    TermRangeQuery cq=TermRangeQuery.newStringRange("asc", format.format(lower), format.format(upper), true, true);
    LegacyNumericRangeQuery<Integer> tq= LegacyNumericRangeQuery.newIntRange("trie", lower, upper, true, true);
    TopScoreDocCollector trCollector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    TopScoreDocCollector nrCollector = TopScoreDocCollector.create(1, Integer.MAX_VALUE);
    searcher.search(cq, trCollector);
    searcher.search(tq, nrCollector);
    TopDocs trTopDocs = trCollector.topDocs();
    TopDocs nrTopDocs = nrCollector.topDocs();
    assertEquals("Returned count for LegacyNumericRangeQuery and TermRangeQuery must be equal", trTopDocs.totalHits.value, nrTopDocs.totalHits.value );
  }
  reader.close();
  directory.close();
}
 
Example #25
Source File: TestLatLonPointDistanceFeatureQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testBasics() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()
      .setMergePolicy(newLogMergePolicy(random().nextBoolean())));
  Document doc = new Document();
  LatLonPoint point = new LatLonPoint("foo", 0.0, 0.0);
  doc.add(point);
  LatLonDocValuesField docValue = new LatLonDocValuesField("foo",0.0, 0.0);
  doc.add(docValue);

  double pivotDistance = 5000;//5k

  point.setLocationValue(-7, -7);
  docValue.setLocationValue(-7, -7);
  w.addDocument(doc);

  point.setLocationValue(9, 9);
  docValue.setLocationValue(9, 9);
  w.addDocument(doc);


  point.setLocationValue(8, 8);
  docValue.setLocationValue(8, 8);
  w.addDocument(doc);

  point.setLocationValue(4, 4);
  docValue.setLocationValue(4, 4);
  w.addDocument(doc);

  point.setLocationValue(-1, -1);
  docValue.setLocationValue(-1, -1);
  w.addDocument(doc);

  DirectoryReader reader = w.getReader();
  IndexSearcher searcher = newSearcher(reader);
  
  Query q = LatLonPoint.newDistanceFeatureQuery("foo", 3, 10, 10, pivotDistance);
  TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1);
  searcher.search(q, collector);
  TopDocs topHits = collector.topDocs();
  assertEquals(2, topHits.scoreDocs.length);

  double distance1 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(9)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(9)), 10,10);
  double distance2 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(8)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(8)), 10,10);

  CheckHits.checkEqual(q,
      new ScoreDoc[] {
          new ScoreDoc(1, (float) (3f * (pivotDistance / (pivotDistance + distance1)))),
          new ScoreDoc(2, (float) (3f * (pivotDistance / (pivotDistance + distance2))))
      },
      topHits.scoreDocs);

  distance1 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(9)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(9)), 9,9);
  distance2 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(8)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(8)), 9,9);

  q = LatLonPoint.newDistanceFeatureQuery("foo", 3, 9, 9,  pivotDistance);
  collector = TopScoreDocCollector.create(2, null, 1);
  searcher.search(q, collector);
  topHits = collector.topDocs();
  assertEquals(2, topHits.scoreDocs.length);
  CheckHits.checkExplanations(q, "", searcher);

  CheckHits.checkEqual(q,
      new ScoreDoc[] {
          new ScoreDoc(1, (float) (3f * (pivotDistance / (pivotDistance + distance1)))),
          new ScoreDoc(2, (float) (3f * (pivotDistance / (pivotDistance + distance2))))
      },
      topHits.scoreDocs);
  
  reader.close();
  w.close();
  dir.close();
}
 
Example #26
Source File: TestLongDistanceFeatureQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testBasics() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()
      .setMergePolicy(newLogMergePolicy(random().nextBoolean())));
  Document doc = new Document();
  LongPoint point = new LongPoint("foo", 0L);
  doc.add(point);
  NumericDocValuesField docValue = new NumericDocValuesField("foo", 0L);
  doc.add(docValue);

  point.setLongValue(3);
  docValue.setLongValue(3);
  w.addDocument(doc);

  point.setLongValue(12);
  docValue.setLongValue(12);
  w.addDocument(doc);

  point.setLongValue(8);
  docValue.setLongValue(8);
  w.addDocument(doc);

  point.setLongValue(-1);
  docValue.setLongValue(-1);
  w.addDocument(doc);

  point.setLongValue(7);
  docValue.setLongValue(7);
  w.addDocument(doc);

  DirectoryReader reader = w.getReader();
  IndexSearcher searcher = newSearcher(reader);
  
  Query q = LongPoint.newDistanceFeatureQuery("foo", 3, 10, 5);
  TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1);
  searcher.search(q, collector);
  TopDocs topHits = collector.topDocs();
  assertEquals(2, topHits.scoreDocs.length);

  CheckHits.checkEqual(q,
      new ScoreDoc[] {
          new ScoreDoc(1, (float) (3f * (5. / (5. + 2.)))),
          new ScoreDoc(2, (float) (3f * (5. / (5. + 2.))))
      },
      topHits.scoreDocs);

  q = LongPoint.newDistanceFeatureQuery("foo", 3, 7, 5);
  collector = TopScoreDocCollector.create(2, null, 1);
  searcher.search(q, collector);
  topHits = collector.topDocs();
  assertEquals(2, topHits.scoreDocs.length);
  CheckHits.checkExplanations(q, "", searcher);

  CheckHits.checkEqual(q,
      new ScoreDoc[] {
          new ScoreDoc(4, (float) (3f * (5. / (5. + 0.)))),
          new ScoreDoc(2, (float) (3f * (5. / (5. + 1.))))
      },
      topHits.scoreDocs);
  
  reader.close();
  w.close();
  dir.close();
}
 
Example #27
Source File: BlockGroupingCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Returns the grouped results.  Returns null if the
 *  number of groups collected is &lt;= groupOffset.
 *
 *  <p><b>NOTE</b>: This collector is unable to compute
 *  the groupValue per group so it will always be null.
 *  This is normally not a problem, as you can obtain the
 *  value just like you obtain other values for each
 *  matching document (eg, via stored fields, via
 *  DocValues, etc.)
 *
 *  @param withinGroupSort The {@link Sort} used to sort
 *    documents within each group.
 *  @param groupOffset Which group to start from
 *  @param withinGroupOffset Which document to start from
 *    within each group
 *  @param maxDocsPerGroup How many top documents to keep
 *     within each group.
 */
public TopGroups<?> getTopGroups(Sort withinGroupSort, int groupOffset, int withinGroupOffset, int maxDocsPerGroup) throws IOException {

  //if (queueFull) {
  //System.out.println("getTopGroups groupOffset=" + groupOffset + " topNGroups=" + topNGroups);
  //}
  if (subDocUpto != 0) {
    processGroup();
  }
  if (groupOffset >= groupQueue.size()) {
    return null;
  }
  int totalGroupedHitCount = 0;

  final ScoreAndDoc fakeScorer = new ScoreAndDoc();

  float maxScore = Float.MIN_VALUE;

  @SuppressWarnings({"unchecked","rawtypes"})
  final GroupDocs<Object>[] groups = new GroupDocs[groupQueue.size() - groupOffset];
  for(int downTo=groupQueue.size()-groupOffset-1;downTo>=0;downTo--) {
    final OneGroup og = groupQueue.pop();

    // At this point we hold all docs w/ in each group,
    // unsorted; we now sort them:
    final TopDocsCollector<?> collector;
    if (withinGroupSort.equals(Sort.RELEVANCE)) {
      // Sort by score
      if (!needsScores) {
        throw new IllegalArgumentException("cannot sort by relevance within group: needsScores=false");
      }
      collector = TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE);
    } else {
      // Sort by fields
      collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
    }

    float groupMaxScore = needsScores ? Float.NEGATIVE_INFINITY : Float.NaN;
    LeafCollector leafCollector = collector.getLeafCollector(og.readerContext);
    leafCollector.setScorer(fakeScorer);
    for(int docIDX=0;docIDX<og.count;docIDX++) {
      final int doc = og.docs[docIDX];
      fakeScorer.doc = doc;
      if (needsScores) {
        fakeScorer.score = og.scores[docIDX];
        groupMaxScore = Math.max(groupMaxScore, fakeScorer.score);
      }
      leafCollector.collect(doc);
    }
    totalGroupedHitCount += og.count;

    final Object[] groupSortValues;

    groupSortValues = new Comparable<?>[comparators.length];
    for(int sortFieldIDX=0;sortFieldIDX<comparators.length;sortFieldIDX++) {
      groupSortValues[sortFieldIDX] = comparators[sortFieldIDX].value(og.comparatorSlot);
    }

    final TopDocs topDocs = collector.topDocs(withinGroupOffset, maxDocsPerGroup);

    // TODO: we could aggregate scores across children
    // by Sum/Avg instead of passing NaN:
    groups[downTo] = new GroupDocs<>(Float.NaN,
                                           groupMaxScore,
                                           new TotalHits(og.count, TotalHits.Relation.EQUAL_TO),
                                           topDocs.scoreDocs,
                                           null,
                                           groupSortValues);
    maxScore = Math.max(maxScore, groupMaxScore);
  }

  /*
  while (groupQueue.size() != 0) {
    final OneGroup og = groupQueue.pop();
    //System.out.println("  leftover: og ord=" + og.groupOrd + " count=" + og.count);
    totalGroupedHitCount += og.count;
  }
  */

  return new TopGroups<>(new TopGroups<>(groupSort.getSort(),
                                     withinGroupSort.getSort(),
                                     totalHitCount, totalGroupedHitCount, groups, maxScore),
                       totalGroupCount);
}
 
Example #28
Source File: TestLatLonPointDistanceFeatureQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testCrossesDateLine() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()
      .setMergePolicy(newLogMergePolicy(random().nextBoolean())));
  Document doc = new Document();
  LatLonPoint point = new LatLonPoint("foo", 0.0, 0.0);
  doc.add(point);
  LatLonDocValuesField docValue = new LatLonDocValuesField("foo",0.0, 0.0);
  doc.add(docValue);

  double pivotDistance = 5000;//5k

  point.setLocationValue(0, -179);
  docValue.setLocationValue(0, -179);
  w.addDocument(doc);

  point.setLocationValue(0, 176);
  docValue.setLocationValue(0, 176);
  w.addDocument(doc);

  point.setLocationValue(0, -150);
  docValue.setLocationValue(0, -150);
  w.addDocument(doc);

  point.setLocationValue(0, -140);
  docValue.setLocationValue(0, -140);
  w.addDocument(doc);

  point.setLocationValue(0, 140);
  docValue.setLocationValue(01, 140);
  w.addDocument(doc);

  DirectoryReader reader = w.getReader();
  IndexSearcher searcher = newSearcher(reader);

  Query q = LatLonPoint.newDistanceFeatureQuery("foo", 3, 0, 179, pivotDistance);
  TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 1);
  searcher.search(q, collector);
  TopDocs topHits = collector.topDocs();
  assertEquals(2, topHits.scoreDocs.length);

  double distance1 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(0)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(-179)), 0,179);
  double distance2 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(0)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(176)), 0,179);

  CheckHits.checkEqual(q,
      new ScoreDoc[] {
          new ScoreDoc(0, (float) (3f * (pivotDistance / (pivotDistance + distance1)))),
          new ScoreDoc(1, (float) (3f * (pivotDistance / (pivotDistance + distance2))))
      },
      topHits.scoreDocs);

  reader.close();
  w.close();
  dir.close();
}
 
Example #29
Source File: ReadTask.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
protected Collector createCollector() throws Exception {
  return TopScoreDocCollector.create(numHits(), withTotalHits() ? Integer.MAX_VALUE : 1);
}
 
Example #30
Source File: TestLatLonPointDistanceFeatureQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testMissingValue() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, newIndexWriterConfig()
      .setMergePolicy(newLogMergePolicy(random().nextBoolean())));
  Document doc = new Document();
  LatLonPoint point = new LatLonPoint("foo", 0, 0);
  doc.add(point);
  LatLonDocValuesField docValue = new LatLonDocValuesField("foo", 0, 0);
  doc.add(docValue);

  point.setLocationValue(3, 3);
  docValue.setLocationValue(3, 3);
  w.addDocument(doc);

  w.addDocument(new Document());

  point.setLocationValue(7, 7);
  docValue.setLocationValue(7, 7);
  w.addDocument(doc);

  DirectoryReader reader = w.getReader();
  IndexSearcher searcher = newSearcher(reader);
  
  Query q = LatLonPoint.newDistanceFeatureQuery("foo", 3, 10, 10, 5);
  TopScoreDocCollector collector = TopScoreDocCollector.create(3, null, 1);
  searcher.search(q, collector);
  TopDocs topHits = collector.topDocs();
  assertEquals(2, topHits.scoreDocs.length);

  double distance1 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(7)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(7)), 10,10);
  double distance2 = SloppyMath.haversinMeters(GeoEncodingUtils.decodeLatitude(GeoEncodingUtils.encodeLatitude(3)) , GeoEncodingUtils.decodeLongitude(GeoEncodingUtils.encodeLongitude(3)), 10,10);

  CheckHits.checkEqual(q,
      new ScoreDoc[] {
          new ScoreDoc(2, (float) (3f * (5. / (5. + distance1)))),
          new ScoreDoc(0, (float) (3f * (5. / (5. + distance2))))
      },
      topHits.scoreDocs);

  CheckHits.checkExplanations(q, "", searcher);

  reader.close();
  w.close();
  dir.close();
}