org.apache.lucene.util.BitDocIdSet Java Examples

The following examples show how to use org.apache.lucene.util.BitDocIdSet. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestConjunctionDISI.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testConjunction() throws IOException {
  final int iters = atLeast(100);
  for (int iter = 0; iter < iters; ++iter) {
    final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
    final int numIterators = TestUtil.nextInt(random(), 2, 5);
    final FixedBitSet[] sets = new FixedBitSet[numIterators];
    final Scorer[] iterators = new Scorer[numIterators];
    for (int i = 0; i < iterators.length; ++i) {
      final FixedBitSet set = randomSet(maxDoc);
      switch (random().nextInt(3)) {
        case 0:
          // simple iterator
          sets[i] = set;
          iterators[i] = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, anonymizeIterator(new BitDocIdSet(set).iterator()));
          break;
        case 1:
          // bitSet iterator
          sets[i] = set;
          iterators[i] = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, new BitDocIdSet(set).iterator());
          break;
        default:
          // scorer with approximation
          final FixedBitSet confirmed = clearRandomBits(set);
          sets[i] = confirmed;
          final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
          iterators[i] = scorer(approximation);
          break;
      }
    }

    final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
    assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
  }
}
 
Example #2
Source File: TestConjunctionDISI.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testConjunctionApproximation() throws IOException {
  final int iters = atLeast(100);
  for (int iter = 0; iter < iters; ++iter) {
    final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
    final int numIterators = TestUtil.nextInt(random(), 2, 5);
    final FixedBitSet[] sets = new FixedBitSet[numIterators];
    final Scorer[] iterators = new Scorer[numIterators];
    boolean hasApproximation = false;
    for (int i = 0; i < iterators.length; ++i) {
      final FixedBitSet set = randomSet(maxDoc);
      if (random().nextBoolean()) {
        // simple iterator
        sets[i] = set;
        iterators[i] = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.COMPLETE_NO_SCORES, new BitDocIdSet(set).iterator());
      } else {
        // scorer with approximation
        final FixedBitSet confirmed = clearRandomBits(set);
        sets[i] = confirmed;
        final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
        iterators[i] = scorer(approximation);
        hasApproximation = true;
      }
    }

    final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
    TwoPhaseIterator twoPhaseIterator = TwoPhaseIterator.unwrap(conjunction);
    assertEquals(hasApproximation, twoPhaseIterator != null);
    if (hasApproximation) {
      assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator)));
    }
  }
}
 
Example #3
Source File: TestSort.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public DocIdSet randSet(int sz) {
  FixedBitSet obs = new FixedBitSet(sz);
  int n = r.nextInt(sz);
  for (int i=0; i<n; i++) {
    obs.set(r.nextInt(sz));
  }
  return new BitDocIdSet(obs);
}
 
Example #4
Source File: FilterableTermsEnum.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}
 
Example #5
Source File: BitsFilter.java    From SearchServices with GNU Lesser General Public License v3.0 4 votes vote down vote up
public DocIdSet getDocIdSet(LeafReaderContext context, Bits bits) {
	return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bitSets.get(context.ord)), bits);
}
 
Example #6
Source File: RandomSamplingFacetsCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Create a sampled of the given hits. */
private MatchingDocs createSample(MatchingDocs docs) {
  int maxdoc = docs.context.reader().maxDoc();
  
  // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
  FixedBitSet sampleDocs = new FixedBitSet(maxdoc);
  
  int binSize = (int) (1.0 / samplingRate);
  
  try {
    int counter = 0;
    int limit, randomIndex;
    if (leftoverBin != NOT_CALCULATED) {
      limit = leftoverBin;
      // either NOT_CALCULATED, which means we already sampled from that bin,
      // or the next document to sample
      randomIndex = leftoverIndex;
    } else {
      limit = binSize;
      randomIndex = random.nextInt(binSize);
    }
    final DocIdSetIterator it = docs.bits.iterator();
    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
      if (counter == randomIndex) {
        sampleDocs.set(doc);
      }
      counter++;
      if (counter >= limit) {
        counter = 0;
        limit = binSize;
        randomIndex = random.nextInt(binSize);
      }
    }
    
    if (counter == 0) {
      // we either exhausted the bin and the iterator at the same time, or
      // this segment had no results. in the latter case we might want to
      // carry leftover to the next segment as is, but that complicates the
      // code and doesn't seem so important.
      leftoverBin = leftoverIndex = NOT_CALCULATED;
    } else {
      leftoverBin = limit - counter;
      if (randomIndex > counter) {
        // the document to sample is in the next bin
        leftoverIndex = randomIndex - counter;
      } else if (randomIndex < counter) {
        // we sampled a document from the bin, so just skip over remaining
        // documents in the bin in the next segment.
        leftoverIndex = NOT_CALCULATED;
      }
    }
    
    return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example #7
Source File: TestConjunctionDISI.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testRecursiveConjunctionApproximation() throws IOException {
  final int iters = atLeast(100);
  for (int iter = 0; iter < iters; ++iter) {
    final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
    final int numIterators = TestUtil.nextInt(random(), 2, 5);
    final FixedBitSet[] sets = new FixedBitSet[numIterators];
    Scorer conjunction = null;
    boolean hasApproximation = false;
    for (int i = 0; i < numIterators; ++i) {
      final FixedBitSet set = randomSet(maxDoc);
      final Scorer newIterator;
      switch (random().nextInt(3)) {
        case 0:
          // simple iterator
          sets[i] = set;
          newIterator = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, anonymizeIterator(new BitDocIdSet(set).iterator()));
          break;
        case 1:
          // bitSet iterator
          sets[i] = set;
          newIterator = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, new BitDocIdSet(set).iterator());
          break;
        default:
          // scorer with approximation
          final FixedBitSet confirmed = clearRandomBits(set);
          sets[i] = confirmed;
          final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
          newIterator = scorer(approximation);
          hasApproximation = true;
          break;
      }
      if (conjunction == null) {
        conjunction = newIterator;
      } else {
        final DocIdSetIterator conj = ConjunctionDISI.intersectScorers(Arrays.asList(conjunction, newIterator));
        conjunction = scorer(conj, TwoPhaseIterator.unwrap(conj));
      }
    }

    TwoPhaseIterator twoPhaseIterator = conjunction.twoPhaseIterator();
    assertEquals(hasApproximation, twoPhaseIterator != null);
    if (hasApproximation) {
      assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator)));
    } else {
      assertEquals(intersect(sets), toBitSet(maxDoc, conjunction.iterator()));
    }
  }
}
 
Example #8
Source File: TestConjunctionDISI.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testCollapseSubConjunctions(boolean wrapWithScorer) throws IOException {
  final int iters = atLeast(100);
  for (int iter = 0; iter < iters; ++iter) {
    final int maxDoc = TestUtil.nextInt(random(), 100, 10000);
    final int numIterators = TestUtil.nextInt(random(), 5, 10);
    final FixedBitSet[] sets = new FixedBitSet[numIterators];
    final List<Scorer> scorers = new LinkedList<>();
    for (int i = 0; i < numIterators; ++i) {
      final FixedBitSet set = randomSet(maxDoc);
      if (random().nextBoolean()) {
        // simple iterator
        sets[i] = set;
        scorers.add(new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, new BitDocIdSet(set).iterator()));
      } else {
        // scorer with approximation
        final FixedBitSet confirmed = clearRandomBits(set);
        sets[i] = confirmed;
        final TwoPhaseIterator approximation = approximation(new BitDocIdSet(set).iterator(), confirmed);
        scorers.add(scorer(approximation));
      }
    }

    // make some sub sequences into sub conjunctions
    final int subIters = atLeast(3);
    for (int subIter = 0; subIter < subIters && scorers.size() > 3; ++subIter) {
      final int subSeqStart = TestUtil.nextInt(random(), 0, scorers.size() - 2);
      final int subSeqEnd = TestUtil.nextInt(random(), subSeqStart + 2, scorers.size());
      List<Scorer> subIterators = scorers.subList(subSeqStart, subSeqEnd);
      Scorer subConjunction;
      if (wrapWithScorer) {
        subConjunction = new ConjunctionScorer(new FakeWeight(), subIterators, Collections.emptyList());
      } else {
        subConjunction = new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, ConjunctionDISI.intersectScorers(subIterators));
      }
      scorers.set(subSeqStart, subConjunction);
      int toRemove = subSeqEnd - subSeqStart - 1;
      while (toRemove-- > 0) {
        scorers.remove(subSeqStart + 1);
      }
    }
    if (scorers.size() == 1) {
      // ConjunctionDISI needs two iterators
      scorers.add(new ConstantScoreScorer(new FakeWeight(), 0f, ScoreMode.TOP_SCORES, DocIdSetIterator.all(maxDoc)));
    }


    final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(scorers);
    assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
  }
}
 
Example #9
Source File: HashQParserPlugin.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public DocIdSet getDocIdSet(LeafReaderContext context, Bits bits) {
  return BitsFilteredDocIdSet.wrap(new BitDocIdSet(bitSets[context.ord]), bits);
}