Java Code Examples for org.apache.lucene.util.BitSet#cardinality()

The following examples show how to use org.apache.lucene.util.BitSet#cardinality() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestIndexedDISI.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void assertAdvanceBeyondEnd(BitSet set, Directory dir) throws IOException {
  final int cardinality = set.cardinality();
  final byte denseRankPower = 9; // Not tested here so fixed to isolate factors
  long length;
  int jumpTableentryCount;
  try (IndexOutput out = dir.createOutput("bar", IOContext.DEFAULT)) {
    jumpTableentryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower);
  }

  try (IndexInput in = dir.openInput("bar", IOContext.DEFAULT)) {
    BitSetIterator disi2 = new BitSetIterator(set, cardinality);
    int doc = disi2.docID();
    int index = 0;
    while (doc < cardinality) {
      doc = disi2.nextDoc();
      index++;
    }

    IndexedDISI disi = new IndexedDISI(in, 0L, in.length(), jumpTableentryCount, denseRankPower, cardinality);
    // Advance 1 docID beyond end
    assertFalse("There should be no set bit beyond the valid docID range", disi.advanceExact(set.length()));
    disi.advance(doc); // Should be the special docID signifyin NO_MORE_DOCS from the BitSetIterator
    assertEquals("The index when advancing beyond the last defined docID should be correct",
        index, disi.index()+1); // disi.index()+1 as the while-loop also counts the NO_MORE_DOCS
  }
}
 
Example 2
Source File: TestIndexedDISI.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPositionNotZero() throws IOException {
  final int BLOCKS = 10;
  final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7)+7); // sane + chance of disable

  BitSet set = createSetWithRandomBlocks(BLOCKS);
  try (Directory dir = newDirectory()) {
    final int cardinality = set.cardinality();
    int jumpTableEntryCount;
    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
      jumpTableEntryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower);
    }
    try (IndexInput fullInput = dir.openInput("foo", IOContext.DEFAULT)) {
      IndexInput blockData =
          IndexedDISI.createBlockSlice(fullInput, "blocks", 0, fullInput.length(), jumpTableEntryCount);
      blockData.seek(random().nextInt((int) blockData.length()));

      RandomAccessInput jumpTable = IndexedDISI.createJumpTable(fullInput, 0, fullInput.length(), jumpTableEntryCount);
      IndexedDISI disi = new IndexedDISI(blockData, jumpTable, jumpTableEntryCount, denseRankPower, cardinality);
      // This failed at some point during LUCENE-8585 development as it did not reset the slice position
      disi.advanceExact(BLOCKS*65536-1);
    }
  }
}
 
Example 3
Source File: RecoverySourcePruneMergePolicy.java    From crate with Apache License 2.0 6 votes vote down vote up
static CodecReader wrapReader(String recoverySourceField, CodecReader reader, Supplier<Query> retainSourceQuerySupplier)
    throws IOException {
    NumericDocValues recoverySource = reader.getNumericDocValues(recoverySourceField);
    if (recoverySource == null || recoverySource.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        return reader; // early terminate - nothing to do here since non of the docs has a recovery source anymore.
    }
    IndexSearcher s = new IndexSearcher(reader);
    s.setQueryCache(null);
    Weight weight = s.createWeight(s.rewrite(retainSourceQuerySupplier.get()), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    Scorer scorer = weight.scorer(reader.getContext());
    if (scorer != null) {
        BitSet recoverySourceToKeep = BitSet.of(scorer.iterator(), reader.maxDoc());
        // calculating the cardinality is significantly cheaper than skipping all bulk-merging we might do
        // if retentions are high we keep most of it
        if (recoverySourceToKeep.cardinality() == reader.maxDoc()) {
            return reader; // keep all source
        }
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, recoverySourceToKeep);
    } else {
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, null);
    }
}
 
Example 4
Source File: CheckJoinIndex.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Check that the given index is good to use for block joins.
 * @throws IllegalStateException if the index does not have an appropriate structure
 */
public static void check(IndexReader reader, BitSetProducer parentsFilter) throws IOException {
  for (LeafReaderContext context : reader.leaves()) {
    if (context.reader().maxDoc() == 0) {
      continue;
    }
    final BitSet parents = parentsFilter.getBitSet(context);
    if (parents == null || parents.cardinality() == 0) {
      throw new IllegalStateException("Every segment should have at least one parent, but " + context.reader() + " does not have any");
    }
    if (parents.get(context.reader().maxDoc() - 1) == false) {
      throw new IllegalStateException("The last document of a segment must always be a parent, but " + context.reader() + " has a child as a last doc");
    }
    final Bits liveDocs = context.reader().getLiveDocs();
    if (liveDocs != null) {
      int prevParentDoc = -1;
      DocIdSetIterator it = new BitSetIterator(parents, 0L);
      for (int parentDoc = it.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = it.nextDoc()) {
        final boolean parentIsLive = liveDocs.get(parentDoc);
        for (int child = prevParentDoc + 1; child != parentDoc; child++) {
          final boolean childIsLive = liveDocs.get(child);
          if (parentIsLive != childIsLive) {
            if (childIsLive) {
              throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is live but has a deleted child document " + child);
            } else {
              throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is deleted but has a live child document " + child);
            }
          }
        }
        prevParentDoc = parentDoc;
      }
    }
  }
}
 
Example 5
Source File: TestIndexedDISI.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void doTestAllSingleJump(BitSet set, Directory dir) throws IOException {
  final int cardinality = set.cardinality();
  final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7)+7); // sane + chance of disable
  long length;
  int jumpTableentryCount;
  try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
    jumpTableentryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower);
    length = out.getFilePointer();
  }

  try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
    for (int i = 0; i < set.length(); i++) {
      IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
      assertEquals("The bit at " + i + " should be correct with advanceExact", set.get(i), disi.advanceExact(i));

      IndexedDISI disi2 = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
      disi2.advance(i);
      // Proper sanity check with jump tables as an error could make them seek backwards
      assertTrue("The docID should at least be " + i + " after advance(" + i + ") but was " + disi2.docID(),
          i <= disi2.docID());
      if (set.get(i)) {
        assertEquals("The docID should be present with advance", i, disi2.docID());
      } else {
        assertNotSame("The docID should not be present with advance", i, disi2.docID());
      }
    }
  }
}
 
Example 6
Source File: FilterableTermsEnum.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}