Java Code Examples for org.apache.lucene.util.FixedBitSet#cardinality()

The following examples show how to use org.apache.lucene.util.FixedBitSet#cardinality() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestIndexedDISI.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testOneDocMissingFixed() throws IOException {
  int maxDoc = 9699;
  final byte denseRankPower = rarely() ? -1 : (byte) (random().nextInt(7)+7); // sane + chance of disable
  FixedBitSet set = new FixedBitSet(maxDoc);
  set.set(0, maxDoc);
  set.clear(1345);
  try (Directory dir = newDirectory()) {

    final int cardinality = set.cardinality();
    long length;
    int jumpTableentryCount;
    try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
      jumpTableentryCount = IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out, denseRankPower);
      length = out.getFilePointer();
    }

    int step = 16000;
    try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
      IndexedDISI disi = new IndexedDISI(in, 0L, length, jumpTableentryCount, denseRankPower, cardinality);
      BitSetIterator disi2 = new BitSetIterator(set, cardinality);
      assertAdvanceEquality(disi, disi2, step);
    }
  }
}
 
Example 2
Source File: TaggerRequestHandler.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException {
  //Now we must supply a Solr DocList and add it to the response.
  //  Typically this is gotten via a SolrIndexSearcher.search(), but in this case we
  //  know exactly what documents to return, the order doesn't matter nor does
  //  scoring.
  //  Ideally an implementation of DocList could be directly implemented off
  //  of a BitSet, but there are way too many methods to implement for a minor
  //  payoff.
  int matchDocs = matchDocIdsBS.cardinality();
  int[] docIds = new int[ Math.min(rows, matchDocs) ];
  DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1);
  for (int i = 0; i < docIds.length; i++) {
    docIds[i] = docIdIter.nextDoc();
  }
  return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f, TotalHits.Relation.EQUAL_TO);
}
 
Example 3
Source File: TaggerRequestHandler.java    From SolrTextTagger with Apache License 2.0 6 votes vote down vote up
private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException {
  //Now we must supply a Solr DocList and add it to the response.
  //  Typically this is gotten via a SolrIndexSearcher.search(), but in this case we
  //  know exactly what documents to return, the order doesn't matter nor does
  //  scoring.
  //  Ideally an implementation of DocList could be directly implemented off
  //  of a BitSet, but there are way too many methods to implement for a minor
  //  payoff.
  int matchDocs = matchDocIdsBS.cardinality();
  int[] docIds = new int[ Math.min(rows, matchDocs) ];
  DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1);
  for (int i = 0; i < docIds.length; i++) {
    docIds[i] = docIdIter.nextDoc();
  }
  return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f);
}
 
Example 4
Source File: Lucene50LiveDocsFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Bits readLiveDocs(Directory dir, SegmentCommitInfo info, IOContext context) throws IOException {
  long gen = info.getDelGen();
  String name = IndexFileNames.fileNameFromGeneration(info.info.name, EXTENSION, gen);
  final int length = info.info.maxDoc();
  try (ChecksumIndexInput input = dir.openChecksumInput(name, context)) {
    Throwable priorE = null;
    try {
      CodecUtil.checkIndexHeader(input, CODEC_NAME, VERSION_START, VERSION_CURRENT, 
                                   info.info.getId(), Long.toString(gen, Character.MAX_RADIX));
      long data[] = new long[FixedBitSet.bits2words(length)];
      for (int i = 0; i < data.length; i++) {
        data[i] = input.readLong();
      }
      FixedBitSet fbs = new FixedBitSet(data, length);
      if (fbs.length() - fbs.cardinality() != info.getDelCount()) {
        throw new CorruptIndexException("bits.deleted=" + (fbs.length() - fbs.cardinality()) + 
                                        " info.delcount=" + info.getDelCount(), input);
      }
      return fbs.asReadOnlyBits();
    } catch (Throwable exception) {
      priorE = exception;
    } finally {
      CodecUtil.checkFooter(input, priorE);
    }
  }
  throw new AssertionError();
}
 
Example 5
Source File: SloppyPhraseMatcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** pp was just advanced. If that caused a repeater collision, resolve by advancing the lesser
 * of the two colliding pps. Note that there can only be one collision, as by the initialization
 * there were no collisions before pp was advanced.  */
private boolean advanceRpts(PhrasePositions pp) throws IOException {
  if (pp.rptGroup < 0) {
    return true; // not a repeater
  }
  PhrasePositions[] rg = rptGroups[pp.rptGroup];
  FixedBitSet bits = new FixedBitSet(rg.length); // for re-queuing after collisions are resolved
  int k0 = pp.rptInd;
  int k;
  while((k=collide(pp)) >= 0) {
    pp = lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps
    if (!advancePP(pp)) {
      return false; // exhausted
    }
    if (k != k0) { // careful: mark only those currently in the queue
      bits = FixedBitSet.ensureCapacity(bits, k);
      bits.set(k); // mark that pp2 need to be re-queued
    }
  }
  // collisions resolved, now re-queue
  // empty (partially) the queue until seeing all pps advanced for resolving collisions
  int n = 0;
  // TODO would be good if we can avoid calling cardinality() in each iteration!
  int numBits = bits.length(); // larges bit we set
  while (bits.cardinality() > 0) {
    PhrasePositions pp2 = pq.pop();
    rptStack[n++] = pp2;
    if (pp2.rptGroup >= 0 
        && pp2.rptInd < numBits  // this bit may not have been set
        && bits.get(pp2.rptInd)) {
      bits.clear(pp2.rptInd);
    }
  }
  // add back to queue
  for (int i=n-1; i>=0; i--) {
    pq.add(rptStack[i]);
  }
  return true;
}
 
Example 6
Source File: SolrIndexSplitter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  return new ConstantScoreWeight(this, boost) {

    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      RTimerTree t = timings.sub("findDocsToDelete");
      t.resume();
      FixedBitSet set = findDocsToDelete(context);
      t.pause();
      if (log.isInfoEnabled()) {
        log.info("### partition={}, leaf={}, maxDoc={}, numDels={}, setLen={}, setCard={}"
        , partition, context, context.reader().maxDoc()
        ,context.reader().numDeletedDocs(), set.length(), set.cardinality());
      }
      Bits liveDocs = context.reader().getLiveDocs();
      if (liveDocs != null) {
        // check that we don't delete already deleted docs
        FixedBitSet dels = FixedBitSet.copyOf(liveDocs);
        dels.flip(0, dels.length());
        dels.and(set);
        if (dels.cardinality() > 0) {
          log.error("### INVALID DELS {}", dels.cardinality());
        }
      }
      return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(set, set.length()));
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return false;
    }

    @Override
    public String toString() {
      return "weight(shardSplittingQuery,part" + partition + ")";
    }
  };
}
 
Example 7
Source File: UniqueSlotAcc.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the current slot value as long
 * This is used to get non-sharded value
 */
public long getNonShardValue(int slot) {
  long res;
  if (counts != null) {  // will only be pre-populated if this was used for sorting.
    res = counts[slot];
  } else {
    FixedBitSet bs = arr[slot];
    res = bs == null ? 0 : bs.cardinality();
  }
  return res;
}
 
Example 8
Source File: UniqueSlotAcc.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"unchecked", "rawtypes"})
private Object getShardValue(int slot) throws IOException {
  if (factory != null) return getShardHLL(slot);
  FixedBitSet ords = arr[slot];
  int unique;
  if (counts != null) {
    unique = counts[slot];
  } else {
    unique = ords==null ? 0 : ords.cardinality();
  }

  SimpleOrderedMap map = new SimpleOrderedMap();
  map.add("unique", unique);
  map.add("nTerms", nTerms);

  int maxExplicit=100;
  // TODO: make configurable
  // TODO: share values across buckets
  if (unique > 0) {

    List lst = new ArrayList( Math.min(unique, maxExplicit) );

    int maxOrd = ords.length();
    if (maxOrd > 0) {
      for (int ord=0; lst.size() < maxExplicit;) {
        ord = ords.nextSetBit(ord);
        if (ord == DocIdSetIterator.NO_MORE_DOCS) break;
        BytesRef val = lookupOrd(ord);
        Object o = field.getType().toObject(field, val);
        lst.add(o);
        if (++ord >= maxOrd) break;
      }
    }

    map.add("vals", lst);
  }

  return map;
}
 
Example 9
Source File: UniqueSlotAcc.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void calcCounts() {
  counts = new int[arr.length];
  for (int i=0; i<arr.length; i++) {
    FixedBitSet bs = arr[i];
    counts[i] = bs == null ? 0 : bs.cardinality();
  }
}
 
Example 10
Source File: TestDocSet.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public DocSet getIntDocSet(FixedBitSet bs) {
  int[] docs = new int[bs.cardinality()];
  BitSetIterator iter = new BitSetIterator(bs, 0);
  for (int i=0; i<docs.length; i++) {
    docs[i] = iter.nextDoc();
  }
  return new SortedIntDocSet(docs);
}
 
Example 11
Source File: TestDocSet.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public DocSlice getDocSlice(FixedBitSet bs) {
  int len = bs.cardinality();
  int[] arr = new int[len+5];
  arr[0]=10; arr[1]=20; arr[2]=30; arr[arr.length-1]=1; arr[arr.length-2]=2;
  int offset = 3;
  int end = offset + len;

  BitSetIterator iter = new BitSetIterator(bs, 0);
  // put in opposite order... DocLists are not ordered.
  for (int i=end-1; i>=offset; i--) {
    arr[i] = iter.nextDoc();
  }

  return new DocSlice(offset, len, arr, null, len*2, 100.0f, TotalHits.Relation.EQUAL_TO);
}
 
Example 12
Source File: SimpleTextFieldsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void loadTerms() throws IOException {
  PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton();
  final FSTCompiler<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> fstCompiler;
  final PairOutputs<Long,Long> outputsInner = new PairOutputs<>(posIntOutputs, posIntOutputs);
  final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<>(posIntOutputs,
      outputsInner);
  fstCompiler = new FSTCompiler<>(FST.INPUT_TYPE.BYTE1, outputs);
  IndexInput in = SimpleTextFieldsReader.this.in.clone();
  in.seek(termsStart);
  final BytesRefBuilder lastTerm = new BytesRefBuilder();
  long lastDocsStart = -1;
  int docFreq = 0;
  long totalTermFreq = 0;
  FixedBitSet visitedDocs = new FixedBitSet(maxDoc);
  final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
  while(true) {
    SimpleTextUtil.readLine(in, scratch);
    if (scratch.get().equals(END) || StringHelper.startsWith(scratch.get(), FIELD)) {
      if (lastDocsStart != -1) {
        fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef),
            outputs.newPair(lastDocsStart,
                outputsInner.newPair((long) docFreq, totalTermFreq)));
        sumTotalTermFreq += totalTermFreq;
      }
      break;
    } else if (StringHelper.startsWith(scratch.get(), DOC)) {
      docFreq++;
      sumDocFreq++;
      totalTermFreq++;
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), DOC.length, scratch.length()-DOC.length);
      int docID = ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length());
      visitedDocs.set(docID);
    } else if (StringHelper.startsWith(scratch.get(), FREQ)) {
      scratchUTF16.copyUTF8Bytes(scratch.bytes(), FREQ.length, scratch.length()-FREQ.length);
      totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars(), 0, scratchUTF16.length()) - 1;
    } else if (StringHelper.startsWith(scratch.get(), TERM)) {
      if (lastDocsStart != -1) {
        fstCompiler.add(Util.toIntsRef(lastTerm.get(), scratchIntsRef), outputs.newPair(lastDocsStart,
            outputsInner.newPair((long) docFreq, totalTermFreq)));
      }
      lastDocsStart = in.getFilePointer();
      final int len = scratch.length() - TERM.length;
      lastTerm.grow(len);
      System.arraycopy(scratch.bytes(), TERM.length, lastTerm.bytes(), 0, len);
      lastTerm.setLength(len);
      docFreq = 0;
      sumTotalTermFreq += totalTermFreq;
      totalTermFreq = 0;
      termCount++;
    }
  }
  docCount = visitedDocs.cardinality();
  fst = fstCompiler.compile();
  /*
  PrintStream ps = new PrintStream("out.dot");
  fst.toDot(ps);
  ps.close();
  System.out.println("SAVED out.dot");
  */
  //System.out.println("FST " + fst.sizeInBytes());
}
 
Example 13
Source File: CheckIndex.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static void checkSortedDocValues(String fieldName, int maxDoc, SortedDocValues dv, SortedDocValues dv2) throws IOException {
  if (dv.docID() != -1) {
    throw new RuntimeException("sorted dv iterator for field: " + fieldName + " should start at docID=-1, but got " + dv.docID());
  }
  final int maxOrd = dv.getValueCount()-1;
  FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount());
  int maxOrd2 = -1;
  for (int doc = dv.nextDoc(); doc != NO_MORE_DOCS; doc = dv.nextDoc()) {
    int ord = dv.ordValue();
    if (ord == -1) {
      throw new RuntimeException("dv for field: " + fieldName + " has -1 ord");
    } else if (ord < -1 || ord > maxOrd) {
      throw new RuntimeException("ord out of bounds: " + ord);
    } else {
      maxOrd2 = Math.max(maxOrd2, ord);
      seenOrds.set(ord);
    }

    if (dv2.advanceExact(doc) == false) {
      throw new RuntimeException("advanceExact did not find matching doc ID: " + doc);
    }
    int ord2 = dv2.ordValue();
    if (ord != ord2) {
      throw new RuntimeException("nextDoc and advanceExact report different ords: " + ord + " != " + ord2);
    }
  }
  if (maxOrd != maxOrd2) {
    throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2);
  }
  if (seenOrds.cardinality() != dv.getValueCount()) {
    throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality());
  }
  BytesRef lastValue = null;
  for (int i = 0; i <= maxOrd; i++) {
    final BytesRef term = dv.lookupOrd(i);
    term.isValid();
    if (lastValue != null) {
      if (term.compareTo(lastValue) <= 0) {
        throw new RuntimeException("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + term);
      }
    }
    lastValue = BytesRef.deepCopyOf(term);
  }
}
 
Example 14
Source File: SolrIndexSplitter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public LiveDocsReader(CodecReader in, FixedBitSet liveDocs) {
  super(in);
  this.liveDocs = liveDocs;
  this.numDocs = liveDocs.cardinality();
}