Java Code Examples for org.apache.lucene.search.DocIdSetIterator#NO_MORE_DOCS

The following examples show how to use org.apache.lucene.search.DocIdSetIterator#NO_MORE_DOCS . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RecoverySourcePruneMergePolicy.java    From crate with Apache License 2.0 6 votes vote down vote up
static CodecReader wrapReader(String recoverySourceField, CodecReader reader, Supplier<Query> retainSourceQuerySupplier)
    throws IOException {
    NumericDocValues recoverySource = reader.getNumericDocValues(recoverySourceField);
    if (recoverySource == null || recoverySource.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        return reader; // early terminate - nothing to do here since non of the docs has a recovery source anymore.
    }
    IndexSearcher s = new IndexSearcher(reader);
    s.setQueryCache(null);
    Weight weight = s.createWeight(s.rewrite(retainSourceQuerySupplier.get()), ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    Scorer scorer = weight.scorer(reader.getContext());
    if (scorer != null) {
        BitSet recoverySourceToKeep = BitSet.of(scorer.iterator(), reader.maxDoc());
        // calculating the cardinality is significantly cheaper than skipping all bulk-merging we might do
        // if retentions are high we keep most of it
        if (recoverySourceToKeep.cardinality() == reader.maxDoc()) {
            return reader; // keep all source
        }
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, recoverySourceToKeep);
    } else {
        return new SourcePruningFilterCodecReader(recoverySourceField, reader, null);
    }
}
 
Example 2
Source File: TermsIncludingScoreQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
protected void fillDocsAndScores(FixedBitSet matchingDocs, TermsEnum termsEnum) throws IOException {
  BytesRef spare = new BytesRef();
  PostingsEnum postingsEnum = null;
  for (int i = 0; i < terms.size(); i++) {
    if (termsEnum.seekExact(terms.get(ords[i], spare))) {
      postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
      float score = TermsIncludingScoreQuery.this.scores[ords[i]];
      for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) {
        // I prefer this:
        /*if (scores[doc] < score) {
          scores[doc] = score;
          matchingDocs.set(doc);
        }*/
        // But this behaves the same as MVInnerScorer and only then the tests will pass:
        if (!matchingDocs.get(doc)) {
          scores[doc] = score;
          matchingDocs.set(doc);
        }
      }
    }
  }
}
 
Example 3
Source File: TestDocIdSetBuilder.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException {
  if (d1 == null) {
    if (d2 != null) {
      assertEquals(DocIdSetIterator.NO_MORE_DOCS, d2.iterator().nextDoc());
    }
  } else if (d2 == null) {
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, d1.iterator().nextDoc());
  } else {
    DocIdSetIterator i1 = d1.iterator();
    DocIdSetIterator i2 = d2.iterator();
    for (int doc = i1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = i1.nextDoc()) {
      assertEquals(doc, i2.nextDoc());
    }
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, i2.nextDoc());
  }
}
 
Example 4
Source File: Lucene.java    From crate with Apache License 2.0 6 votes vote down vote up
/**
 * Check whether there is one or more documents matching the provided query.
 */
public static boolean exists(IndexSearcher searcher, Query query) throws IOException {
    final Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
    // the scorer API should be more efficient at stopping after the first
    // match than the bulk scorer API
    for (LeafReaderContext context : searcher.getIndexReader().leaves()) {
        final Scorer scorer = weight.scorer(context);
        if (scorer == null) {
            continue;
        }
        final Bits liveDocs = context.reader().getLiveDocs();
        final DocIdSetIterator iterator = scorer.iterator();
        for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
            if (liveDocs == null || liveDocs.get(doc)) {
                return true;
            }
        }
    }
    return false;
}
 
Example 5
Source File: TestRTGBase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Terms terms = MultiTerms.getTerms(r, t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator();
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE);
  docs = BitsFilteredPostingsEnum.wrap(docs, MultiBits.getLiveDocs(r));
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
Example 6
Source File: TaxonomyIndexArrays.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void initParents(IndexReader reader, int first) throws IOException {
  if (reader.maxDoc() == first) {
    return;
  }
  
  // it's ok to use MultiTerms because we only iterate on one posting list.
  // breaking it to loop over the leaves() only complicates code for no
  // apparent gain.
  PostingsEnum positions = MultiTerms.getTermPostingsEnum(reader,
      Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF,
      PostingsEnum.PAYLOADS);

  // shouldn't really happen, if it does, something's wrong
  if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) {
    throw new CorruptIndexException("Missing parent data for category " + first, reader.toString());
  }
  
  int num = reader.maxDoc();
  for (int i = first; i < num; i++) {
    if (positions.docID() == i) {
      if (positions.freq() == 0) { // shouldn't happen
        throw new CorruptIndexException("Missing parent data for category " + i, reader.toString());
      }
      
      parents[i] = positions.nextPosition();
      
      if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
        if (i + 1 < num) {
          throw new CorruptIndexException("Missing parent data for category "+ (i + 1), reader.toString());
        }
        break;
      }
    } else { // this shouldn't happen
      throw new CorruptIndexException("Missing parent data for category " + i, reader.toString());
    }
  }
}
 
Example 7
Source File: RoaringDocIdSet.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Add the content of the provided {@link DocIdSetIterator}. */
public Builder add(DocIdSetIterator disi) throws IOException {
  for (int doc = disi.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi.nextDoc()) {
    add(doc);
  }
  return this;
}
 
Example 8
Source File: TestCodecs.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testDocsOnlyFreq() throws Exception {
  // tests that when fields are indexed with DOCS_ONLY, the Codec
  // returns 1 in docsEnum.freq()
  Directory dir = newDirectory();
  Random random = random();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random)));
  // we don't need many documents to assert this, but don't use one document either
  int numDocs = atLeast(random, 50);
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    doc.add(new StringField("f", "doc", Store.NO));
    writer.addDocument(doc);
  }
  writer.close();
  
  Term term = new Term("f", new BytesRef("doc"));
  DirectoryReader reader = DirectoryReader.open(dir);
  for (LeafReaderContext ctx : reader.leaves()) {
    PostingsEnum de = ctx.reader().postings(term);
    while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
      assertEquals("wrong freq for doc " + de.docID(), 1, de.freq());
    }
  }
  reader.close();
  
  dir.close();
}
 
Example 9
Source File: CodecCollector.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Compute termvector number basic.
 *
 * @param docSet
 *          the doc set
 * @param termDocId
 *          the term doc id
 * @param termsEnum
 *          the terms enum
 * @param r
 *          the r
 * @param lrc
 *          the lrc
 * @param postingsEnum
 *          the postings enum
 * @return the termvector number basic
 * @throws IOException
 *           Signals that an I/O exception has occurred.
 */
private static TermvectorNumberBasic computeTermvectorNumberBasic(
    List<Integer> docSet, int termDocId, TermsEnum termsEnum, LeafReader r,
    LeafReaderContext lrc, PostingsEnum postingsEnum) throws IOException {
  TermvectorNumberBasic result = new TermvectorNumberBasic();
  boolean hasDeletedDocuments = (r.getLiveDocs() != null);
  if ((docSet.size() == r.numDocs()) && !hasDeletedDocuments) {
    try {
      return computeTermvectorNumberBasic(termsEnum, r);
    } catch (IOException e) {
      log.debug("problem", e);
      // problem
    }
  }
  result.docNumber = 0;
  result.valueSum[0] = 0;
  int localTermDocId = termDocId;
  Iterator<Integer> docIterator = docSet.iterator();
  postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.FREQS);
  int docId;
  while (docIterator.hasNext()) {
    docId = docIterator.next() - lrc.docBase;
    if (docId >= localTermDocId && ((docId == localTermDocId)
        || ((localTermDocId = postingsEnum.advance(docId)) == docId))) {
      result.docNumber++;
      result.valueSum[0] += postingsEnum.freq();
    }
    if (localTermDocId == DocIdSetIterator.NO_MORE_DOCS) {
      break;
    }
  }
  return result;
}
 
Example 10
Source File: BitSet.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Does in-place OR of the bits provided by the iterator. The state of the
 *  iterator after this operation terminates is undefined. */
public void or(DocIdSetIterator iter) throws IOException {
  checkUnpositioned(iter);
  for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
    set(doc);
  }
}
 
Example 11
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void assertHardLiveDocs(IndexWriter writer, Set<Integer> uniqueDocs) throws IOException {
  try (DirectoryReader reader = DirectoryReader.open(writer)) {
    assertEquals(uniqueDocs.size(), reader.numDocs());
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext ctx : leaves) {
      LeafReader leaf = ctx.reader();
      assertTrue(leaf instanceof SegmentReader);
      SegmentReader sr = (SegmentReader) leaf;
      if (sr.getHardLiveDocs() != null) {
        Terms id = sr.terms("id");
        TermsEnum iterator = id.iterator();
        Bits hardLiveDocs = sr.getHardLiveDocs();
        Bits liveDocs = sr.getLiveDocs();
        for (Integer dId : uniqueDocs) {
          boolean mustBeHardDeleted = dId % 2 == 0;
          if (iterator.seekExact(new BytesRef(dId.toString()))) {
            PostingsEnum postings = iterator.postings(null);
            while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
              if (liveDocs.get(postings.docID())) {
                assertTrue(hardLiveDocs.get(postings.docID()));
              } else if (mustBeHardDeleted) {
                assertFalse(hardLiveDocs.get(postings.docID()));
              } else {
                assertTrue(hardLiveDocs.get(postings.docID()));
              }
            }
          }
        }
      }
    }
  }
}
 
Example 12
Source File: BitDocSet.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public DocIterator iterator() {
  return new DocIterator() {
    private final BitSetIterator iter = new BitSetIterator(bits, 0L); // cost is not useful here
    private int pos = iter.nextDoc();
    @Override
    public boolean hasNext() {
      return pos != DocIdSetIterator.NO_MORE_DOCS;
    }

    @Override
    public Integer next() {
      return nextDoc();
    }

    @Override
    public void remove() {
      bits.clear(pos);
    }

    @Override
    public int nextDoc() {
      int old=pos;
      pos=iter.nextDoc();
      return old;
    }

    @Override
    public float score() {
      return 0.0f;
    }
  };
}
 
Example 13
Source File: IndexImporter.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void applyDeletes(Directory directory, IndexWriter indexWriter, IndexSearcherCloseable searcher,
    String shard, boolean emitDeletes, Configuration configuration) throws IOException {
  DirectoryReader newReader = DirectoryReader.open(directory);
  try {
    List<AtomicReaderContext> newLeaves = newReader.getContext().leaves();
    BlurPartitioner blurPartitioner = new BlurPartitioner();
    Text key = new Text();
    int numberOfShards = _shardContext.getTableContext().getDescriptor().getShardCount();
    int shardId = ShardUtil.getShardIndex(shard);

    Action action = new Action() {
      @Override
      public void found(AtomicReader reader, Bits liveDocs, TermsEnum termsEnum) throws IOException {
        DocsEnum docsEnum = termsEnum.docs(liveDocs, null);
        if (docsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
          indexWriter.deleteDocuments(new Term(BlurConstants.ROW_ID, BytesRef.deepCopyOf(termsEnum.term())));
        }
      }
    };

    LOG.info("Applying deletes for table [{0}] shard [{1}] new reader [{2}]", _table, shard, newReader);
    boolean skipCheckRowIds = isInternal(newReader);
    LOG.info("Skip rowid check [{0}] for table [{1}] shard [{2}] new reader [{3}]", skipCheckRowIds, _table, shard,
        newReader);
    for (AtomicReaderContext context : newLeaves) {
      AtomicReader newAtomicReader = context.reader();
      if (isFastRowIdDeleteSupported(newAtomicReader)) {
        runNewRowIdCheckAndDelete(indexWriter, emitDeletes, blurPartitioner, key, numberOfShards, shardId,
            newAtomicReader, skipCheckRowIds);
      } else {
        runOldMergeSortRowIdCheckAndDelete(emitDeletes, searcher.getIndexReader(), blurPartitioner, key,
            numberOfShards, shardId, action, newAtomicReader);
      }
    }
  } finally {
    newReader.close();
  }
}
 
Example 14
Source File: CollapsingQParserPlugin.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void finish() throws IOException {
  if(contexts.length == 0) {
    return;
  }

  if(nullScore > -1) {
    collapsedSet.set(nullDoc);
  }

  //Handle the boosted docs.
  if(this.boostKeys != null) {
    int s = boostKeys.size();
    for(int i=0; i<s; i++) {
      int key = this.boostKeys.get(i);
      if(key != nullValue) {
        cmap.remove(key);
      }
      //Add the boosted docs to the collapsedSet
      this.collapsedSet.set(boostDocs.get(i));
    }
  }

  Iterator<IntLongCursor> it1 = cmap.iterator();

  while(it1.hasNext()) {
    IntLongCursor cursor = it1.next();
    int doc = (int)cursor.value;
    collapsedSet.set(doc);
  }

  int currentContext = 0;
  int currentDocBase = 0;

  collapseValues = DocValues.getNumeric(contexts[currentContext].reader(), this.field);
  int nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc;
  leafDelegate = delegate.getLeafCollector(contexts[currentContext]);
  ScoreAndDoc dummy = new ScoreAndDoc();
  leafDelegate.setScorer(dummy);
  DocIdSetIterator it = new BitSetIterator(collapsedSet, 0L); // cost is not useful here
  int globalDoc = -1;
  int nullScoreIndex = 0;
  while((globalDoc = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {

    while(globalDoc >= nextDocBase) {
      currentContext++;
      currentDocBase = contexts[currentContext].docBase;
      nextDocBase = currentContext+1 < contexts.length ? contexts[currentContext+1].docBase : maxDoc;
      leafDelegate = delegate.getLeafCollector(contexts[currentContext]);
      leafDelegate.setScorer(dummy);
      collapseValues = DocValues.getNumeric(contexts[currentContext].reader(), this.field);
    }

    int contextDoc = globalDoc-currentDocBase;
    int collapseValue;
    if (collapseValues.advanceExact(contextDoc)) {
      collapseValue = (int) collapseValues.longValue();
    } else {
      collapseValue = 0;
    }

    if(collapseValue != nullValue) {
      long scoreDoc = cmap.get(collapseValue);
      dummy.score = Float.intBitsToFloat((int)(scoreDoc>>32));
    } else if(boosts && mergeBoost.boost(globalDoc)) {
      //Ignore so boosted documents don't mess up the null scoring policies.
    } else if (nullPolicy == CollapsingPostFilter.NULL_POLICY_COLLAPSE) {
      dummy.score = nullScore;
    } else if(nullPolicy == CollapsingPostFilter.NULL_POLICY_EXPAND) {
      dummy.score = nullScores.get(nullScoreIndex++);
    }

    dummy.docId = contextDoc;
    leafDelegate.collect(contextDoc);
  }

  if(delegate instanceof DelegatingCollector) {
    ((DelegatingCollector) delegate).finish();
  }
}
 
Example 15
Source File: TestBackwardsCompatibility.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testDocValuesUpdatesWithNewField() throws Exception {
  Path oldIndexDir = createTempDir("dvupdates");
  TestUtil.unzip(getDataInputStream(dvUpdatesIndex), oldIndexDir);
  Directory dir = newFSDirectory(oldIndexDir);
  verifyUsesDefaultCodec(dir, dvUpdatesIndex);

  // update fields and verify index
  IndexWriterConfig conf = new IndexWriterConfig(new MockAnalyzer(random()));
  IndexWriter writer = new IndexWriter(dir, conf);
  // introduce a new field that we later update
  writer.addDocument(Arrays.asList(new StringField("id", "" + Integer.MAX_VALUE, Field.Store.NO),
      new NumericDocValuesField("new_numeric", 1),
      new BinaryDocValuesField("new_binary", toBytes(1))));
  writer.updateNumericDocValue(new Term("id", "1"), "new_numeric", 1);
  writer.updateBinaryDocValue(new Term("id", "1"), "new_binary", toBytes(1));

  writer.commit();
  Runnable assertDV = () -> {
    boolean found = false;
    try (DirectoryReader reader = DirectoryReader.open(dir)) {
      for (LeafReaderContext ctx : reader.leaves()) {
        LeafReader leafReader = ctx.reader();
        TermsEnum id = leafReader.terms("id").iterator();
        if (id.seekExact(new BytesRef("1"))) {
          PostingsEnum postings = id.postings(null, PostingsEnum.NONE);
          NumericDocValues numericDocValues = leafReader.getNumericDocValues("new_numeric");
          BinaryDocValues binaryDocValues = leafReader.getBinaryDocValues("new_binary");
          int doc;
          while ((doc = postings.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
            found = true;
            assertTrue(binaryDocValues.advanceExact(doc));
            assertTrue(numericDocValues.advanceExact(doc));
            assertEquals(1, numericDocValues.longValue());
            assertEquals(toBytes(1), binaryDocValues.binaryValue());
          }
        }
      }
    } catch (IOException e) {
      throw new AssertionError(e);
    }
    assertTrue(found);
  };
  assertDV.run();
  // merge all segments
  writer.forceMerge(1);
  writer.commit();
  assertDV.run();
  writer.close();
  dir.close();
}
 
Example 16
Source File: LongRangeFacetCounts.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void count(LongValuesSource valueSource, List<MatchingDocs> matchingDocs) throws IOException {

    LongRange[] ranges = (LongRange[]) this.ranges;

    LongRangeCounter counter = new LongRangeCounter(ranges);

    int missingCount = 0;
    for (MatchingDocs hits : matchingDocs) {
      LongValues fv = valueSource.getValues(hits.context, null);
      
      totCount += hits.totalHits;
      final DocIdSetIterator fastMatchDocs;
      if (fastMatchQuery != null) {
        final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext(hits.context);
        final IndexSearcher searcher = new IndexSearcher(topLevelContext);
        searcher.setQueryCache(null);
        final Weight fastMatchWeight = searcher.createWeight(searcher.rewrite(fastMatchQuery), ScoreMode.COMPLETE_NO_SCORES, 1);
        Scorer s = fastMatchWeight.scorer(hits.context);
        if (s == null) {
          continue;
        }
        fastMatchDocs = s.iterator();
      } else {
        fastMatchDocs = null;
      }

      DocIdSetIterator docs = hits.bits.iterator();      
      for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) {
        if (fastMatchDocs != null) {
          int fastMatchDoc = fastMatchDocs.docID();
          if (fastMatchDoc < doc) {
            fastMatchDoc = fastMatchDocs.advance(doc);
          }

          if (doc != fastMatchDoc) {
            doc = docs.advance(fastMatchDoc);
            continue;
          }
        }
        // Skip missing docs:
        if (fv.advanceExact(doc)) {
          counter.add(fv.longValue());
        } else {
          missingCount++;
        }

        doc = docs.nextDoc();
      }
    }
    
    int x = counter.fillCounts(counts);

    missingCount += x;

    //System.out.println("totCount " + totCount + " x " + x + " missingCount " + missingCount);
    totCount -= missingCount;
  }
 
Example 17
Source File: BitSetDocumentVisibilityFilterCacheStrategy.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
public static DocIdSetIterator getFullySetDocIdSetIterator(int maxDoc) {
  return new DocIdSetIterator() {

    private int _docId = -1;

    @Override
    public int advance(int target) throws IOException {
      if (_docId == DocIdSetIterator.NO_MORE_DOCS) {
        return DocIdSetIterator.NO_MORE_DOCS;
      }
      _docId = target;
      if (_docId >= maxDoc) {
        return _docId = DocIdSetIterator.NO_MORE_DOCS;
      }
      return _docId;
    }

    @Override
    public int nextDoc() throws IOException {
      if (_docId == DocIdSetIterator.NO_MORE_DOCS) {
        return DocIdSetIterator.NO_MORE_DOCS;
      }
      _docId++;
      if (_docId >= maxDoc) {
        return _docId = DocIdSetIterator.NO_MORE_DOCS;
      }
      return _docId;
    }

    @Override
    public int docID() {
      return _docId;
    }

    @Override
    public long cost() {
      return 0l;
    }

  };
}
 
Example 18
Source File: DocSetBuilder.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public static void add(FixedBitSet bitSet, DocIdSetIterator iter, int base) throws IOException {
  for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
    bitSet.set(doc + base);
  }
}
 
Example 19
Source File: IndexedDISI.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Writes the docIDs from it to out, in logical blocks, one for each 65536 docIDs in monotonically
 * increasing gap-less order.
 * The caller must keep track of the number of jump-table entries (returned by this method) as well as the
 * denseRankPower and provide them when constructing an IndexedDISI for reading.
 * @param it  the document IDs.
 * @param out destination for the blocks.
 * @param denseRankPower for {@link Method#DENSE} blocks, a rank will be written every {@code 2^denseRankPower} docIDs.
 *                       Values &lt; 7 (every 128 docIDs) or &gt; 15 (every 32768 docIDs) disables DENSE rank.
 *                       Recommended values are 8-12: Every 256-4096 docIDs or 4-64 longs.
 *                       {@link #DEFAULT_DENSE_RANK_POWER} is 9: Every 512 docIDs.
 *                       This should be stored in meta and used when creating an instance of IndexedDISI.
 * @throws IOException if there was an error writing to out.
 * @return the number of jump-table entries following the blocks, -1 for no entries.
 *         This should be stored in meta and used when creating an instance of IndexedDISI.
 */
static short writeBitSet(DocIdSetIterator it, IndexOutput out, byte denseRankPower) throws IOException {
  final long origo = out.getFilePointer(); // All jumps are relative to the origo
  if ((denseRankPower < 7 || denseRankPower > 15) && denseRankPower != -1) {
    throw new IllegalArgumentException("Acceptable values for denseRankPower are 7-15 (every 128-32768 docIDs). " +
        "The provided power was " + denseRankPower + " (every " + (int)Math.pow(2, denseRankPower) + " docIDs)");
  }
  int totalCardinality = 0;
  int blockCardinality = 0;
  final FixedBitSet buffer = new FixedBitSet(1<<16);
  int[] jumps = new int[ArrayUtil.oversize(1, Integer.BYTES*2)];
  int prevBlock = -1;
  int jumpBlockIndex = 0;

  for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
    final int block = doc >>> 16;
    if (prevBlock != -1 && block != prevBlock) {
      // Track offset+index from previous block up to current
      jumps = addJumps(jumps, out.getFilePointer()-origo, totalCardinality, jumpBlockIndex, prevBlock+1);
      jumpBlockIndex = prevBlock+1;
      // Flush block
      flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
      // Reset for next block
      buffer.clear(0, buffer.length());
      totalCardinality += blockCardinality;
      blockCardinality = 0;
    }
    buffer.set(doc & 0xFFFF);
    blockCardinality++;
    prevBlock = block;
  }
  if (blockCardinality > 0) {
    jumps = addJumps(jumps, out.getFilePointer()-origo, totalCardinality, jumpBlockIndex, prevBlock+1);
    totalCardinality += blockCardinality;
    flush(prevBlock, buffer, blockCardinality, denseRankPower, out);
    buffer.clear(0, buffer.length());
    prevBlock++;
  }
  final int lastBlock = prevBlock == -1 ? 0 : prevBlock; // There will always be at least 1 block (NO_MORE_DOCS)
  // Last entry is a SPARSE with blockIndex == 32767 and the single entry 65535, which becomes the docID NO_MORE_DOCS
  // To avoid creating 65K jump-table entries, only a single entry is created pointing to the offset of the
  // NO_MORE_DOCS block, with the jumpBlockIndex set to the logical EMPTY block after all real blocks.
  jumps = addJumps(jumps, out.getFilePointer()-origo, totalCardinality, lastBlock, lastBlock+1);
  buffer.set(DocIdSetIterator.NO_MORE_DOCS & 0xFFFF);
  flush(DocIdSetIterator.NO_MORE_DOCS >>> 16, buffer, 1, denseRankPower, out);
  // offset+index jump-table stored at the end
  return flushBlockJumps(jumps, lastBlock+1, out, origo);
}
 
Example 20
Source File: TestDirectoryReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testMultiTermDocs() throws IOException {
  Directory ramDir1=newDirectory();
  addDoc(random(), ramDir1, "test foo", true);
  Directory ramDir2=newDirectory();
  addDoc(random(), ramDir2, "test blah", true);
  Directory ramDir3=newDirectory();
  addDoc(random(), ramDir3, "test wow", true);

  IndexReader[] readers1 = new IndexReader[]{DirectoryReader.open(ramDir1), DirectoryReader.open(ramDir3)};
  IndexReader[] readers2 = new IndexReader[]{DirectoryReader.open(ramDir1), DirectoryReader.open(ramDir2), DirectoryReader.open(ramDir3)};
  MultiReader mr2 = new MultiReader(readers1);
  MultiReader mr3 = new MultiReader(readers2);

  // test mixing up TermDocs and TermEnums from different readers.
  TermsEnum te2 = MultiTerms.getTerms(mr2, "body").iterator();
  te2.seekCeil(new BytesRef("wow"));
  PostingsEnum td = TestUtil.docs(random(), mr2,
      "body",
      te2.term(),
      null,
      0);

  TermsEnum te3 = MultiTerms.getTerms(mr3, "body").iterator();
  te3.seekCeil(new BytesRef("wow"));
  td = TestUtil.docs(random(), te3,
      td,
      0);
  
  int ret = 0;

  // This should blow up if we forget to check that the TermEnum is from the same
  // reader as the TermDocs.
  while (td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) ret += td.docID();

  // really a dummy assert to ensure that we got some docs and to ensure that
  // nothing is eliminated by hotspot
  assertTrue(ret > 0);
  readers1[0].close();
  readers1[1].close();
  readers2[0].close();
  readers2[1].close();
  readers2[2].close();
  ramDir1.close();
  ramDir2.close();
  ramDir3.close();
}