Java Code Examples for org.apache.lucene.util.FixedBitSet#nextSetBit()

The following examples show how to use org.apache.lucene.util.FixedBitSet#nextSetBit() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BlockJoin.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** childInput may also contain parents (i.e. a parent or below will all roll up to that parent) */
public static DocSet toParents(DocSet childInput, BitDocSet parentList, QueryContext qcontext) throws IOException {
  FixedBitSet parentBits = parentList.getBits();
  DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc());
  DocIterator iter = childInput.iterator();
  int currentParent = -1;
  while (iter.hasNext()) {
    int childDoc = iter.nextDoc(); // TODO: skipping
    if (childDoc <= currentParent) { // use <= since we also allow parents in the input
      // we already visited this parent
      continue;
    }
    currentParent = parentBits.nextSetBit(childDoc);
    if (currentParent != DocIdSetIterator.NO_MORE_DOCS) {
      // only collect the parent the first time we skip to it
      collector.collect( currentParent );
    }
  }
  return collector.getDocSet();
}
 
Example 2
Source File: UniqueSlotAcc.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings({"unchecked", "rawtypes"})
private Object getShardHLL(int slot) throws IOException {
  FixedBitSet ords = arr[slot];
  if (ords == null) return HLLAgg.NO_VALUES;

  HLL hll = factory.getHLL();
  long maxOrd = ords.length();
  Hash.LongPair hashResult = new Hash.LongPair();
  for(int ord=-1; ++ord < maxOrd;) {
    ord = ords.nextSetBit(ord);
    if (ord == DocIdSetIterator.NO_MORE_DOCS) break;
    BytesRef val = lookupOrd(ord);
    // way to avoid recomputing hash across slots?  Prob not worth space
    Hash.murmurhash3_x64_128(val.bytes, val.offset, val.length, 0, hashResult);
    // idea: if the set is small enough, just send the hashes?  We can add at the top
    // level or even just do a hash table at the top level.
    hll.addRaw(hashResult.val1);
  }

  SimpleOrderedMap map = new SimpleOrderedMap();
  map.add("hll", hll.toBytes());
  return map;
}
 
Example 3
Source File: BitSetHitStream.java    From siren-join with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void next() {
  LimitedBitSetHitCollector collector = (LimitedBitSetHitCollector) this.getCollector();
  int atomicDocId = this.currentAtomicDocId;

  if (currentAtomicReaderId < collector.getFixedSets().size()) {
    do {
      FixedBitSet bitSet = collector.getFixedSets().get(currentAtomicReaderId);
      if (atomicDocId == DocIdSetIterator.NO_MORE_DOCS) { // we start a new reader, reset the doc id
        atomicDocId = -1;
      }
      atomicDocId = atomicDocId + 1 < bitSet.length() ? bitSet.nextSetBit(atomicDocId + 1) : DocIdSetIterator.NO_MORE_DOCS;
    } while (atomicDocId == DocIdSetIterator.NO_MORE_DOCS && ++currentAtomicReaderId < collector.getFixedSets().size());
  }

  this.currentAtomicDocId = atomicDocId;
  this.count++;
}
 
Example 4
Source File: BaseCompoundFormatTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testCheckIntegrity() throws IOException {
  Directory dir = newDirectory();
  String subFile = "_123.xyz";
  SegmentInfo si = newSegmentInfo(dir, "_123");
  try (IndexOutput os = dir.createOutput(subFile, newIOContext(random()))) {
    CodecUtil.writeIndexHeader(os, "Foo", 0, si.getId(), "suffix");
    for (int i = 0; i < 1024; i++) {
      os.writeByte((byte) i);
    }
    os.writeInt(CodecUtil.FOOTER_MAGIC);
    os.writeInt(0);
    long checksum = os.getChecksum();
    os.writeLong(checksum);
  }

  si.setFiles(Collections.singletonList(subFile));
  
  FileTrackingDirectoryWrapper writeTrackingDir = new FileTrackingDirectoryWrapper(dir);
  si.getCodec().compoundFormat().write(writeTrackingDir, si, IOContext.DEFAULT);
  final Set<String> createdFiles = writeTrackingDir.getFiles();

  ReadBytesDirectoryWrapper readTrackingDir = new ReadBytesDirectoryWrapper(dir);
  CompoundDirectory compoundDir = si.getCodec().compoundFormat().getCompoundReader(readTrackingDir, si, IOContext.READ);
  compoundDir.checkIntegrity();
  Map<String,FixedBitSet> readBytes = readTrackingDir.getReadBytes();
  assertEquals(createdFiles, readBytes.keySet());
  for (Map.Entry<String, FixedBitSet> entry : readBytes.entrySet()) {
    final String file = entry.getKey();
    final FixedBitSet set = entry.getValue().clone();
    set.flip(0, set.length());
    final int next = set.nextSetBit(0);
    assertEquals("Byte at offset " + next + " of " + file + " was not read", DocIdSetIterator.NO_MORE_DOCS, next);
  }
  compoundDir.close();
  dir.close();
}
 
Example 5
Source File: SloppyPhraseMatcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** map each term to the single group that contains it */ 
private HashMap<Term,Integer> termGroups(LinkedHashMap<Term,Integer> tord, ArrayList<FixedBitSet> bb) throws IOException {
  HashMap<Term,Integer> tg = new HashMap<>();
  Term[] t = tord.keySet().toArray(new Term[0]);
  for (int i=0; i<bb.size(); i++) { // i is the group no.
    FixedBitSet bits = bb.get(i);
    for (int ord = bits.nextSetBit(0); ord != DocIdSetIterator.NO_MORE_DOCS; ord = ord + 1 >= bits.length() ? DocIdSetIterator.NO_MORE_DOCS : bits.nextSetBit(ord + 1)) {
      tg.put(t[ord],i);
    }
  }
  return tg;
}
 
Example 6
Source File: UniqueSlotAcc.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings({"unchecked", "rawtypes"})
private Object getShardValue(int slot) throws IOException {
  if (factory != null) return getShardHLL(slot);
  FixedBitSet ords = arr[slot];
  int unique;
  if (counts != null) {
    unique = counts[slot];
  } else {
    unique = ords==null ? 0 : ords.cardinality();
  }

  SimpleOrderedMap map = new SimpleOrderedMap();
  map.add("unique", unique);
  map.add("nTerms", nTerms);

  int maxExplicit=100;
  // TODO: make configurable
  // TODO: share values across buckets
  if (unique > 0) {

    List lst = new ArrayList( Math.min(unique, maxExplicit) );

    int maxOrd = ords.length();
    if (maxOrd > 0) {
      for (int ord=0; lst.size() < maxExplicit;) {
        ord = ords.nextSetBit(ord);
        if (ord == DocIdSetIterator.NO_MORE_DOCS) break;
        BytesRef val = lookupOrd(ord);
        Object o = field.getType().toObject(field, val);
        lst.add(o);
        if (++ord >= maxOrd) break;
      }
    }

    map.add("vals", lst);
  }

  return map;
}
 
Example 7
Source File: SlotAcc.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public FixedBitSet resize(FixedBitSet old) {
  FixedBitSet values = new FixedBitSet(getNewSize());
  int oldSize = old.length();

  for (int oldSlot = 0; ; ) {
    oldSlot = values.nextSetBit(oldSlot);
    if (oldSlot == DocIdSetIterator.NO_MORE_DOCS) break;
    int newSlot = getNewSlot(oldSlot);
    values.set(newSlot);
    if (++oldSlot >= oldSize) break;
  }

  return values;
}
 
Example 8
Source File: DocSetUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static DocSet toSmallSet(BitDocSet bitSet) {
  int sz = bitSet.size();
  int[] docs = new int[sz];
  FixedBitSet bs = bitSet.getBits();
  int doc = -1;
  for (int i=0; i<sz; i++) {
    doc = bs.nextSetBit(doc + 1);
    docs[i] = doc;
  }
  return new SortedIntDocSet(docs);
}
 
Example 9
Source File: BaseIndexFileFormatTestCase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** This test is a best effort at verifying that checkIntegrity doesn't miss any files. It tests that the
 *  combination of opening a reader and calling checkIntegrity on it reads all bytes of all files. */
public void testCheckIntegrityReadsAllBytes() throws Exception {
  assumeFalse("SimpleText doesn't store checksums of its files", getCodec() instanceof SimpleTextCodec);
  FileTrackingDirectoryWrapper dir = new FileTrackingDirectoryWrapper(newDirectory());
  applyCreatedVersionMajor(dir);

  IndexWriterConfig cfg = new IndexWriterConfig(new MockAnalyzer(random()));
  IndexWriter w = new IndexWriter(dir, cfg);
  final int numDocs = atLeast(100);
  for (int i = 0; i < numDocs; ++i) {
    Document d = new Document();
    addRandomFields(d);
    w.addDocument(d);
  }
  w.forceMerge(1);
  w.commit();
  w.close();

  ReadBytesDirectoryWrapper readBytesWrapperDir = new ReadBytesDirectoryWrapper(dir);
  IndexReader reader = DirectoryReader.open(readBytesWrapperDir);
  LeafReader leafReader = getOnlyLeafReader(reader);
  leafReader.checkIntegrity();

  Map<String, FixedBitSet> readBytesMap = readBytesWrapperDir.getReadBytes();

  Set<String> unreadFiles = new HashSet<>(dir.getFiles());System.out.println(Arrays.toString(dir.listAll()));
  unreadFiles.removeAll(readBytesMap.keySet());
  unreadFiles.remove(IndexWriter.WRITE_LOCK_NAME);
  assertTrue("Some files have not been open: " + unreadFiles, unreadFiles.isEmpty());

  List<String> messages = new ArrayList<>();
  for (Map.Entry<String, FixedBitSet> entry : readBytesMap.entrySet()) {
    String name = entry.getKey();
    FixedBitSet unreadBytes = entry.getValue().clone();
    unreadBytes.flip(0, unreadBytes.length());
    int unread = unreadBytes.nextSetBit(0);
    if (unread != Integer.MAX_VALUE) {
      messages.add("Offset " + unread + " of file " + name + "(" + unreadBytes.length() + "bytes) was not read.");
    }
  }
  assertTrue(String.join("\n", messages), messages.isEmpty());
  reader.close();
  dir.close();
}