Java Code Examples for org.apache.lucene.search.DocIdSet#iterator()

The following examples show how to use org.apache.lucene.search.DocIdSet#iterator() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParentQuery.java    From Elasticsearch with Apache License 2.0 7 votes vote down vote up
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
    DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, null);
    // we forcefully apply live docs here so that deleted children don't give matching parents
    childrenDocSet = BitsFilteredDocIdSet.wrap(childrenDocSet, context.reader().getLiveDocs());
    if (Lucene.isEmpty(childrenDocSet)) {
        return null;
    }
    final DocIdSetIterator childIterator = childrenDocSet.iterator();
    if (childIterator == null) {
        return null;
    }
    SortedDocValues bytesValues = globalIfd.load(context).getOrdinalsValues(parentType);
    if (bytesValues == null) {
        return null;
    }

    return new ChildScorer(this, parentIdxs, scores, childIterator, bytesValues);
}
 
Example 2
Source File: ParentConstantScoreQuery.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
    DocIdSet childrenDocIdSet = childrenFilter.getDocIdSet(context, null);
    if (Lucene.isEmpty(childrenDocIdSet)) {
        return null;
    }

    SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType);
    if (globalValues != null) {
        // we forcefully apply live docs here so that deleted children don't give matching parents
        childrenDocIdSet = BitsFilteredDocIdSet.wrap(childrenDocIdSet, context.reader().getLiveDocs());
        DocIdSetIterator innerIterator = childrenDocIdSet.iterator();
        if (innerIterator != null) {
            ChildrenDocIdIterator childrenDocIdIterator = new ChildrenDocIdIterator(
                    innerIterator, parentOrds, globalValues
            );
            return ConstantScorer.create(childrenDocIdIterator, this, queryWeight);
        }
    }
    return null;
}
 
Example 3
Source File: FilterCache.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader, String segmentName, Directory directory)
    throws IOException {
  if (docIdSet == null) {
    // this is better than returning null, as the nonnull result can be cached
    return DocIdSet.EMPTY_DOCIDSET;
  } else if (docIdSet.isCacheable()) {
    return docIdSet;
  } else {
    final DocIdSetIterator it = docIdSet.iterator();
    // null is allowed to be returned by iterator(),
    // in this case we wrap with the empty set,
    // which is cacheable.
    if (it == null) {
      return DocIdSet.EMPTY_DOCIDSET;
    } else {
      final IndexFileBitSet bits = new IndexFileBitSet(reader.maxDoc(), _id, segmentName, directory);
      if (!bits.exists()) {
        bits.create(it);
      }
      bits.load();
      return bits;
    }
  }
}
 
Example 4
Source File: CrossCollectionJoinQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
  if (filter == null) {
    filter = getDocSet().getTopFilter();
  }

  DocIdSet readerSet = filter.getDocIdSet(context, null);
  if (readerSet == null) {
    return null;
  }
  DocIdSetIterator readerSetIterator = readerSet.iterator();
  if (readerSetIterator == null) {
    return null;
  }
  return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator);
}
 
Example 5
Source File: TestDocIdSetBuilder.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException {
  if (d1 == null) {
    if (d2 != null) {
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, d2.iterator().nextDoc());
    }
  } else if (d2 == null) {
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, d1.iterator().nextDoc());
  } else {
    DocIdSetIterator i1 = d1.iterator();
    DocIdSetIterator i2 = d2.iterator();
    for (int doc = i1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = i1.nextDoc()) {
      assertEquals(doc, i2.nextDoc());
    }
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, i2.nextDoc());
  }
}
 
Example 6
Source File: BaseBitSetTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void testOr(float load) throws IOException {
  final int numBits = 1 + random().nextInt(100000);
  BitSet set1 = new JavaUtilBitSet(randomSet(numBits, 0), numBits); // empty
  T set2 = copyOf(set1, numBits);
  
  final int iterations = atLeast(10);
  for (int iter = 0; iter < iterations; ++iter) {
    DocIdSet otherSet = randomCopy(new JavaUtilBitSet(randomSet(numBits, load), numBits), numBits);
    DocIdSetIterator otherIterator = otherSet.iterator();
    if (otherIterator != null) {
      set1.or(otherIterator);
      set2.or(otherSet.iterator());
      assertEquals(set1, set2, numBits);
    }
  }
}
 
Example 7
Source File: AbstractPrefixTreeQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  return new ConstantScoreWeight(this, boost) {
    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      DocIdSet docSet = getDocIdSet(context);
      if (docSet == null) {
        return null;
      }
      DocIdSetIterator disi = docSet.iterator();
      if (disi == null) {
        return null;
      }
      return new ConstantScoreScorer(this, score(), scoreMode, disi);
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return true;
    }
  };
}
 
Example 8
Source File: AnalyticsDriver.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Drive the collection of reduction data. This includes overall data as well as faceted data.
 *
 * @param manager of the request to drive
 * @param searcher the results of the query
 * @param filter that represents the overall query
 * @param queryRequest used for the search request
 * @throws IOException if an error occurs while reading from Solr
 */
public static void drive(AnalyticsRequestManager manager, SolrIndexSearcher searcher, Filter filter, SolrQueryRequest queryRequest) throws IOException {
  StreamingInfo streamingInfo = manager.getStreamingFacetInfo();
  Iterable<StreamingFacet> streamingFacets = streamingInfo.streamingFacets;
  ReductionCollectionManager collectionManager = streamingInfo.streamingCollectionManager;

  Iterable<FacetValueQueryExecuter> facetExecuters = manager.getFacetExecuters(filter, queryRequest);

  // Streaming phase (Overall results & Value/Pivot Facets)
  // Loop through all documents and collect reduction data for streaming facets and overall results
  if (collectionManager.needsCollection()) {
    List<LeafReaderContext> contexts = searcher.getTopReaderContext().leaves();
    for (int leafNum = 0; leafNum < contexts.size(); leafNum++) {
      LeafReaderContext context = contexts.get(leafNum);
      DocIdSet dis = filter.getDocIdSet(context, null); // solr docsets already exclude any deleted docs
      if (dis == null) {
        continue;
      }
      DocIdSetIterator disi = dis.iterator();
      if (disi != null) {
        collectionManager.doSetNextReader(context);
        int doc = disi.nextDoc();
        while( doc != DocIdSetIterator.NO_MORE_DOCS){
          // Add a document to the statistics being generated
          collectionManager.collect(doc);
          streamingFacets.forEach( facet -> facet.addFacetValueCollectionTargets() );
          collectionManager.apply();
          doc = disi.nextDoc();
        }
      }
    }
  }

  // Executing phase (Query/Range Facets)
  // Send additional Solr Queries to compute facet values
  for (FacetValueQueryExecuter executer : facetExecuters) {
    executer.execute(searcher);
  }
}
 
Example 9
Source File: SolrRangeQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Scorer scorer(DocIdSet set) throws IOException {
  if (set == null) {
    return null;
  }
  final DocIdSetIterator disi = set.iterator();
  if (disi == null) {
    return null;
  }
  return new ConstantScoreScorer(this, score(), scoreMode, disi);
}
 
Example 10
Source File: GraphTermsQParserPlugin.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  return new ConstantScoreWeight(this, boost) {
    Filter filter;

    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      if (filter == null) {
        DocSet set = getDocSet(searcher);
        filter = set.getTopFilter();
      }

      // Although this set only includes live docs, other filters can be pushed down to queries.
      DocIdSet readerSet = filter.getDocIdSet(context, null);
      if (readerSet == null) {
        return null;
      }
      DocIdSetIterator readerSetIterator = readerSet.iterator();
      if (readerSetIterator == null) {
        return null;
      }
      return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator);
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return true;
    }
  };
}
 
Example 11
Source File: SolrConstantScoreQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
  DocIdSet docIdSet = filter instanceof SolrFilter ? ((SolrFilter)filter).getDocIdSet(this.context, context, null) : filter.getDocIdSet(context, null);
  if (docIdSet == null) {
    return null;
  }
  DocIdSetIterator iterator = docIdSet.iterator();
  if (iterator == null) {
    return null;
  }
  return new ConstantScoreScorer(this, score(), scoreMode, iterator);
}
 
Example 12
Source File: GraphQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
  if (filter == null) {
    resultSet = getDocSet();
    filter = resultSet.getTopFilter();
  }
  DocIdSet readerSet = filter.getDocIdSet(context,context.reader().getLiveDocs());
  // create a scrorer on the result set, if results from right query are empty, use empty iterator.
  return new GraphScorer(this, readerSet == null ? DocIdSetIterator.empty() : readerSet.iterator(), 1);
}
 
Example 13
Source File: TestDocSet.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void doTestIteratorEqual(DocIdSet a, DocIdSet b) throws IOException {
  DocIdSetIterator ia = a.iterator();
  DocIdSetIterator ib = b.iterator();

  // test for next() equivalence
  for(;;) {
    int da = ia.nextDoc();
    int db = ib.nextDoc();
    assertEquals(da, db);
    assertEquals(ia.docID(), ib.docID());
    if (da==DocIdSetIterator.NO_MORE_DOCS) break;
  }

  for (int i=0; i<10; i++) {
    // test random skipTo() and next()
    ia = a.iterator();
    ib = b.iterator();
    int doc = -1;
    for (;;) {
      int da,db;
      if (rand.nextBoolean()) {
        da = ia.nextDoc();
        db = ib.nextDoc();
      } else {
        int target = doc + rand.nextInt(10) + 1;  // keep in mind future edge cases like probing (increase if necessary)
        da = ia.advance(target);
        db = ib.advance(target);
      }

      assertEquals(da, db);
      assertEquals(ia.docID(), ib.docID());
      if (da==DocIdSetIterator.NO_MORE_DOCS) break;
      doc = da;
    }
  }
}
 
Example 14
Source File: IndexManager.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
private static boolean isFiltered(int notAdjustedDocId, IndexReader reader, Filter filter) throws IOException {
  if (filter == null) {
    return false;
  }
  if (reader instanceof BaseCompositeReader) {
    BaseCompositeReader<IndexReader> indexReader = (BaseCompositeReader<IndexReader>) reader;
    List<? extends IndexReader> sequentialSubReaders = BaseCompositeReaderUtil.getSequentialSubReaders(indexReader);
    int readerIndex = BaseCompositeReaderUtil.readerIndex(indexReader, notAdjustedDocId);
    int readerBase = BaseCompositeReaderUtil.readerBase(indexReader, readerIndex);
    int docId = notAdjustedDocId - readerBase;
    IndexReader orgReader = sequentialSubReaders.get(readerIndex);
    SegmentReader sReader = AtomicReaderUtil.getSegmentReader(orgReader);
    if (sReader != null) {
      SegmentReader segmentReader = (SegmentReader) sReader;
      DocIdSet docIdSet = filter.getDocIdSet(segmentReader.getContext(), segmentReader.getLiveDocs());
      DocIdSetIterator iterator = docIdSet.iterator();
      if (iterator == null) {
        return true;
      }
      if (iterator.advance(docId) == docId) {
        return false;
      }
      return true;
    }
    throw new RuntimeException("Reader has to be a SegmentReader [" + orgReader + "]");
  } else {
    throw new RuntimeException("Reader has to be a BaseCompositeReader [" + reader + "]");
  }
}
 
Example 15
Source File: BlurUtil.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private static OpenBitSet getMask(DocIdSet docIdSet, int primeDocRowId, int numberOfDocsInRow) throws IOException {
  OpenBitSet mask = new OpenBitSet(numberOfDocsInRow);
  DocIdSetIterator iterator = docIdSet.iterator();
  if (iterator == null) {
    return mask;
  }
  int docId = iterator.advance(primeDocRowId);
  int end = numberOfDocsInRow + primeDocRowId;
  while (docId < end) {
    mask.set(docId - primeDocRowId);
    docId = iterator.nextDoc();
  }
  return mask;
}
 
Example 16
Source File: ChildrenConstantScoreQuery.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
    if (remaining == 0) {
        return null;
    }

    if (shortCircuitFilter != null) {
        DocIdSet docIdSet = shortCircuitFilter.getDocIdSet(context, null);
        if (!Lucene.isEmpty(docIdSet)) {
            DocIdSetIterator iterator = docIdSet.iterator();
            if (iterator != null) {
                return ConstantScorer.create(iterator, this, queryWeight);
            }
        }
        return null;
    }

    DocIdSet parentDocIdSet = this.parentFilter.getDocIdSet(context, null);
    if (!Lucene.isEmpty(parentDocIdSet)) {
        // We can't be sure of the fact that liveDocs have been applied, so we apply it here. The "remaining"
        // count down (short circuit) logic will then work as expected.
        parentDocIdSet = BitsFilteredDocIdSet.wrap(parentDocIdSet, context.reader().getLiveDocs());
        DocIdSetIterator innerIterator = parentDocIdSet.iterator();
        if (innerIterator != null) {
            LongBitSet parentOrds = collector.parentOrds;
            SortedDocValues globalValues = globalIfd.load(context).getOrdinalsValues(parentType);
            if (globalValues != null) {
                DocIdSetIterator parentIdIterator = new ParentOrdIterator(innerIterator, parentOrds, globalValues, this);
                return ConstantScorer.create(parentIdIterator, this, queryWeight);
            }
        }
    }
    return null;
}
 
Example 17
Source File: FacetFieldProcessorByArrayDV.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected void collectDocs() throws IOException {
  int domainSize = fcontext.base.size();

  if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
    return;
  }

  // TODO: refactor some of this logic into a base class
  boolean countOnly = collectAcc==null && allBucketsAcc==null;
  boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();

  // Are we expecting many hits per bucket?
  // FUTURE: pro-rate for nTerms?
  // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
  // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
  // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
  // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
  // this was for heap docvalues produced by UninvertingReader
  // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
  long domainMultiplier = multiValuedField ? 4L : 2L;
  boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);  // +3 to increase test coverage with small tests

  // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
  // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
  // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
  // the docid is not used)
  boolean canDoPerSeg = countOnly && fullRange;
  boolean accumSeg = manyHitsPerBucket && canDoPerSeg;

  if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg;  // internal - override perSeg heuristic

  final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
  Filter filter = fcontext.base.getTopFilter();

  for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
    LeafReaderContext subCtx = leaves.get(subIdx);

    setNextReaderFirstPhase(subCtx);

    DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
    DocIdSetIterator disi = dis.iterator();

    SortedDocValues singleDv = null;
    SortedSetDocValues multiDv = null;
    if (multiValuedField) {
      // TODO: get sub from multi?
      multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
      if (multiDv == null) {
        multiDv = DocValues.emptySortedSet();
      }
      // some codecs may optimize SortedSet storage for single-valued fields
      // this will be null if this is not a wrapped single valued docvalues.
      if (unwrap_singleValued_multiDv) {
        singleDv = DocValues.unwrapSingleton(multiDv);
      }
    } else {
      singleDv = subCtx.reader().getSortedDocValues(sf.getName());
      if (singleDv == null) {
        singleDv = DocValues.emptySorted();
      }
    }

    LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);

    if (singleDv != null) {
      if (accumSeg) {
        collectPerSeg(singleDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(singleDv, disi, toGlobal);
        } else {
          collectDocs(singleDv, disi, toGlobal);
        }
      }
    } else {
      if (accumSeg) {
        collectPerSeg(multiDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(multiDv, disi, toGlobal);
        } else {
          collectDocs(multiDv, disi, toGlobal);
        }
      }
    }
  }

  reuse = null;  // better GC
}
 
Example 18
Source File: JoinQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
  if (filter == null) {
    boolean debug = rb != null && rb.isDebug();
    RTimer timer = (debug ? new RTimer() : null);
    resultSet = getDocSet();
    if (timer != null) timer.stop();

    if (debug) {
      SimpleOrderedMap<Object> dbg = new SimpleOrderedMap<>();
      dbg.add("time", (long) timer.getTime());
      dbg.add("fromSetSize", fromSetSize);  // the input
      dbg.add("toSetSize", resultSet.size());    // the output

      dbg.add("fromTermCount", fromTermCount);
      dbg.add("fromTermTotalDf", fromTermTotalDf);
      dbg.add("fromTermDirectCount", fromTermDirectCount);
      dbg.add("fromTermHits", fromTermHits);
      dbg.add("fromTermHitsTotalDf", fromTermHitsTotalDf);
      dbg.add("toTermHits", toTermHits);
      dbg.add("toTermHitsTotalDf", toTermHitsTotalDf);
      dbg.add("toTermDirectCount", toTermDirectCount);
      dbg.add("smallSetsDeferred", smallSetsDeferred);
      dbg.add("toSetDocsAdded", resultListDocs);

      // TODO: perhaps synchronize  addDebug in the future...
      rb.addDebug(dbg, "join", JoinQuery.this.toString());
    }

    filter = resultSet.getTopFilter();
  }

  // Although this set only includes live docs, other filters can be pushed down to queries.
  DocIdSet readerSet = filter.getDocIdSet(context, null);
  if (readerSet == null) {
    return null;
  }
  DocIdSetIterator readerSetIterator = readerSet.iterator();
  if (readerSetIterator == null) {
    return null;
  }
  return new ConstantScoreScorer(this, score(), scoreMode, readerSetIterator);
}