Java Code Examples for org.apache.lucene.util.FixedBitSet#set()

The following examples show how to use org.apache.lucene.util.FixedBitSet#set() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AbstractAuthoritySetQuery.java    From SearchServices with GNU Lesser General Public License v3.0 6 votes vote down vote up
protected BitsFilter getACLFilter(String[] auths, String field, SolrIndexSearcher searcher) throws IOException
{
    HybridBitSet aclBits = getACLSet(auths, field, searcher);
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    List<FixedBitSet> bitSets = new ArrayList<FixedBitSet>(leaves.size());

    for(LeafReaderContext readerContext :  leaves)
    {
    	LeafReader reader = readerContext.reader();
        int maxDoc = reader.maxDoc();
        FixedBitSet bits = new FixedBitSet(maxDoc);
        bitSets.add(bits);

        NumericDocValues fieldValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, reader);
        if (fieldValues != null) {
            for (int i = 0; i < maxDoc; i++) {
                long aclID = fieldValues.get(i);
                if (aclBits.get(aclID)) {
                    bits.set(i);
                }
            }
        }
    }

    return new BitsFilter(bitSets);
}
 
Example 2
Source File: SortingLeafReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public NumericDocValues getNumericDocValues(String field) throws IOException {
  final NumericDocValues oldDocValues = in.getNumericDocValues(field);
  if (oldDocValues == null) return null;
  CachedNumericDVs dvs;
  synchronized (cachedNumericDVs) {
    dvs = cachedNumericDVs.get(field);
    if (dvs == null) {
      FixedBitSet docsWithField = new FixedBitSet(maxDoc());
      long[] values = new long[maxDoc()];
      while (true) {
        int docID = oldDocValues.nextDoc();
        if (docID == NO_MORE_DOCS) {
          break;
        }
        int newDocID = docMap.oldToNew(docID);
        docsWithField.set(newDocID);
        values[newDocID] = oldDocValues.longValue();
      }
      dvs = new CachedNumericDVs(values, docsWithField);
      cachedNumericDVs.put(field, dvs);
    }
  }
  return new SortingNumericDocValues(dvs);
}
 
Example 3
Source File: TestDocCount.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void verifyCount(IndexReader ir) throws Exception {
  final Collection<String> fields = FieldInfos.getIndexedFields(ir);
  for (String field : fields) {
    Terms terms = MultiTerms.getTerms(ir, field);
    if (terms == null) {
      continue;
    }
    int docCount = terms.getDocCount();
    FixedBitSet visited = new FixedBitSet(ir.maxDoc());
    TermsEnum te = terms.iterator();
    while (te.next() != null) {
      PostingsEnum de = TestUtil.docs(random(), te, null, PostingsEnum.NONE);
      while (de.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
        visited.set(de.docID());
      }
    }
    assertEquals(visited.cardinality(), docCount);
  }
}
 
Example 4
Source File: BitSetHitStream.java    From siren-join with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
  current = new FixedBitSet(context.reader().maxDoc());
  fixedBitSets.add(context.ord, current);

  return new LeafCollector() {

    @Override
    public void setScorer(Scorer scorer) throws IOException {}

    @Override
    public void collect(int doc) throws IOException {
      current.set(doc);
      totalHits++;
    }

  };
}
 
Example 5
Source File: TestSort.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public DocIdSet randSet(int sz) {
  FixedBitSet obs = new FixedBitSet(sz);
  int n = r.nextInt(sz);
  for (int i=0; i<n; i++) {
    obs.set(r.nextInt(sz));
  }
  return new BitDocIdSet(obs);
}
 
Example 6
Source File: SlotAcc.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public FixedBitSet resize(FixedBitSet old) {
  FixedBitSet values = new FixedBitSet(getNewSize());
  int oldSize = old.length();

  for (int oldSlot = 0; ; ) {
    oldSlot = values.nextSetBit(oldSlot);
    if (oldSlot == DocIdSetIterator.NO_MORE_DOCS) break;
    int newSlot = getNewSlot(oldSlot);
    values.set(newSlot);
    if (++oldSlot >= oldSize) break;
  }

  return values;
}
 
Example 7
Source File: TestIndexedDISI.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testFewMissingDocs() throws IOException {
  try (Directory dir = newDirectory()) {
    int numIters = atLeast(10);
    for (int iter = 0; iter < numIters; ++iter) {
      int maxDoc = TestUtil.nextInt(random(), 1, 100000);
      FixedBitSet set = new FixedBitSet(maxDoc);
      set.set(0, maxDoc);
      final int numMissingDocs = TestUtil.nextInt(random(), 2, 1000);
      for (int i = 0; i < numMissingDocs; ++i) {
        set.clear(random().nextInt(maxDoc));
      }
      doTest(set, dir);
    }
  }
}
 
Example 8
Source File: SloppyPhraseMatcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set */
private ArrayList<FixedBitSet> ppTermsBitSets(PhrasePositions[] rpp, HashMap<Term,Integer> tord) {
  ArrayList<FixedBitSet> bb = new ArrayList<>(rpp.length);
  for (PhrasePositions pp : rpp) {
    FixedBitSet b = new FixedBitSet(tord.size());
    Integer ord;
    for (Term t: pp.terms) {
      if ((ord=tord.get(t))!=null) {
        b.set(ord);
      }
    }
    bb.add(b);
  }
  return bb;
}
 
Example 9
Source File: TestFieldCacheSortRandom.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  return new ConstantScoreWeight(this, boost) {
    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      Random random = new Random(seed ^ context.docBase);
      final int maxDoc = context.reader().maxDoc();
      final NumericDocValues idSource = DocValues.getNumeric(context.reader(), "id");
      assertNotNull(idSource);
      final FixedBitSet bits = new FixedBitSet(maxDoc);
      for(int docID=0;docID<maxDoc;docID++) {
        if (random.nextFloat() <= density) {
          bits.set(docID);
          //System.out.println("  acc id=" + idSource.getInt(docID) + " docID=" + docID);
          assertEquals(docID, idSource.advance(docID));
          matchValues.add(docValues.get((int) idSource.longValue()));
        }
      }

      return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(bits, bits.approximateCardinality()));
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return true;
    }
  };
}
 
Example 10
Source File: BinaryDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private SortingLeafReader.CachedBinaryDVs sortDocValues(int maxDoc, Sorter.DocMap sortMap, BinaryDocValues oldValues) throws IOException {
  FixedBitSet docsWithField = new FixedBitSet(maxDoc);
  BytesRef[] values = new BytesRef[maxDoc];
  while (true) {
    int docID = oldValues.nextDoc();
    if (docID == NO_MORE_DOCS) {
      break;
    }
    int newDocID = sortMap.oldToNew(docID);
    docsWithField.set(newDocID);
    values[newDocID] = BytesRef.deepCopyOf(oldValues.binaryValue());
  }
  return new SortingLeafReader.CachedBinaryDVs(values, docsWithField);
}
 
Example 11
Source File: NumericDocValuesWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static SortingLeafReader.CachedNumericDVs sortDocValues(int maxDoc, Sorter.DocMap sortMap, NumericDocValues oldDocValues) throws IOException {
  FixedBitSet docsWithField = new FixedBitSet(maxDoc);
  long[] values = new long[maxDoc];
  while (true) {
    int docID = oldDocValues.nextDoc();
    if (docID == NO_MORE_DOCS) {
      break;
    }
    int newDocID = sortMap.oldToNew(docID);
    docsWithField.set(newDocID);
    values[newDocID] = oldDocValues.longValue();
  }
  return new SortingLeafReader.CachedNumericDVs(values, docsWithField);
}
 
Example 12
Source File: TestConjunctionDISI.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static FixedBitSet toBitSet(int maxDoc, DocIdSetIterator iterator) throws IOException {
  final FixedBitSet set = new FixedBitSet(maxDoc);
  for (int doc = iterator.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iterator.nextDoc()) {
    set.set(doc);
  }
  return set;
}
 
Example 13
Source File: TestConjunctionDISI.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static FixedBitSet randomSet(int maxDoc) {
  final int step = TestUtil.nextInt(random(), 1, 10);
  FixedBitSet set = new FixedBitSet(maxDoc);
  for (int doc = random().nextInt(step); doc < maxDoc; doc += TestUtil.nextInt(random(), 1, step)) {
    set.set(doc);
  }
  return set;
}
 
Example 14
Source File: OrdinalsBuilder.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
/**
 * Builds a {@link BitSet} where each documents bit is that that has one or more ordinals associated with it.
 * if every document has an ordinal associated with it this method returns <code>null</code>
 */
public BitSet buildDocsWithValuesSet() {
    if (numDocsWithValue == maxDoc) {
        return null;
    }
    final FixedBitSet bitSet = new FixedBitSet(maxDoc);
    for (int docID = 0; docID < maxDoc; ++docID) {
        if (ordinals.firstOrdinals.get(docID) != 0) {
            bitSet.set(docID);
        }
    }
    return bitSet;
}
 
Example 15
Source File: SolrIndexSearcher.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private BitDocSet makeBitDocSet(DocSet answer) {
  // TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
  // or make DocSet instances remember maxDoc
  if (answer instanceof BitDocSet) {
    return (BitDocSet) answer;
  }
  FixedBitSet bs = new FixedBitSet(maxDoc());
  DocIterator iter = answer.iterator();
  while (iter.hasNext()) {
    bs.set(iter.nextDoc());
  }

  return new BitDocSet(bs, answer.size());
}
 
Example 16
Source File: TestIndexedDISI.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testDocRange() throws IOException {
  try (Directory dir = newDirectory()) {
    for (int iter = 0; iter < 10; ++iter) {
      int maxDoc = TestUtil.nextInt(random(), 1, 1000000);
      FixedBitSet set = new FixedBitSet(maxDoc);
      final int start = random().nextInt(maxDoc);
      final int end = TestUtil.nextInt(random(), start + 1, maxDoc);
      set.set(start, end);
      doTest(set, dir);
    }
  }
}
 
Example 17
Source File: Main.java    From aparapi-examples with Apache License 2.0 4 votes vote down vote up
/**
 * NumTerms and NumLongs (documents) need to be adjusted manually right now to force 'striping' to occur (see Host code for details)
 *
 * @param _args The command-line arguments.
 */
public static void main(String[] _args) {
    final List<Pair<FixedBitSet, FixedBitSet>> obsPairs = new ArrayList<Pair<FixedBitSet, FixedBitSet>>();
    ;

    final Random rand = new Random();

    int[][] obsResultMatrix;

    /*
     * Populate test data
     */
    System.out.println("----------");
    System.out.println("Populating test matrix data using settings from build.xml...");
    System.out.println("----------");

    final int numTerms = Integer.getInteger("numRows", 300); // # Rows
    // numLongs*64 for number of actual documents since these are 'packed' longs
    final int numLongs = Integer.getInteger("numColumns", 10000); // # Columns

    for (int i = 0; i < numTerms; ++i) {
        final FixedBitSet first = new FixedBitSet(numLongs);
        final FixedBitSet second = new FixedBitSet(numLongs);

        //final long[] bits = new long[numLongs];
        for (int j = 0; j < numLongs; ++j) {
            if (rand.nextBoolean()) 
               first.set(j);
            if (rand.nextBoolean()) 
               second.set(j);
        }

        obsPairs.add(i, new ImmutablePair<FixedBitSet, FixedBitSet>(first, second));
    }

    /*
     * FixedBitSet calculations
     */
    System.out.println("Executing FixedBitSet intersectionCount");

    final long startTime = System.currentTimeMillis();

    obsResultMatrix = new int[obsPairs.size()][obsPairs.size()];

    // This is an N^2 comparison loop
    // FIXME This entire loop needs to be parallelized to show an apples-to-apples comparison to Aparapi
    for (int i = 0; i < obsPairs.size(); i++) {
        final Pair<FixedBitSet, FixedBitSet> docFreqVector1 = obsPairs.get(i);

        for (int j = 0; j < obsPairs.size(); j++) {
            final Pair<FixedBitSet, FixedBitSet> docFreqVector2 = obsPairs.get(j);

            // # of matches in both sets of documents
            final int result = (int) FixedBitSet.intersectionCount(docFreqVector1.getLeft(), docFreqVector2.getRight());
            obsResultMatrix[i][j] = result;
        }
    }

    final long endTime = System.currentTimeMillis() - startTime;

    System.out.println("FixedBitSet Gross Execution Time: " + endTime + " ms <------FixedBitSet");
    System.out.println("----------");

    /*
     * GPU calculations
     */
    System.out.println("Executing Aparapi intersectionCount");

    final long[][] matrixA = new long[obsPairs.size()][];
    final long[][] matrixB = new long[obsPairs.size()][];

    // Convert FixedBitSet pairs to long primitive arrays for use with Aparapi
    // TODO It would be nice if we could find a way to put the obsPairs onto the GPU directly :)
    for (int i = 0; i < obsPairs.size(); i++) {
        final FixedBitSet obsA = obsPairs.get(i).getLeft();
        final FixedBitSet obsB = obsPairs.get(i).getRight();

        matrixA[i] = obsA.getBits();
        matrixB[i] = obsB.getBits();
    }

    // The reason for setting this property is because the CorrMatrix host/kernel code
    // came from a GUI where a user could select "Use Hardware Acceleration" instead
    // of the application forcing the setting globally on the command-line
    final int[][] gpuResultMatrix;
    gpuResultMatrix = CorrMatrixHost.intersectionMatrix(matrixA, matrixB, EXECUTION_MODE.GPU);

    // Compare the two result arrays to make sure we are generating the same output
    System.out.println("[i][j] -> FixedBitSet Result : GPU Result Array");
    for (int i = 0; i < obsResultMatrix.length; i++) {
        for (int j = 0; j < obsResultMatrix[i].length; j++)
            if (obsResultMatrix[i][j] != gpuResultMatrix[i][j]) {
                System.out.println("[" + i + "][" + j + "] -> " + obsResultMatrix[i][j] + " : " + gpuResultMatrix[i][j]);
            }
    }
    System.out.println("Any elements not listed matched!");
}
 
Example 18
Source File: RandomSamplingFacetsCollector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Create a sampled of the given hits. */
private MatchingDocs createSample(MatchingDocs docs) {
  int maxdoc = docs.context.reader().maxDoc();
  
  // TODO: we could try the WAH8DocIdSet here as well, as the results will be sparse
  FixedBitSet sampleDocs = new FixedBitSet(maxdoc);
  
  int binSize = (int) (1.0 / samplingRate);
  
  try {
    int counter = 0;
    int limit, randomIndex;
    if (leftoverBin != NOT_CALCULATED) {
      limit = leftoverBin;
      // either NOT_CALCULATED, which means we already sampled from that bin,
      // or the next document to sample
      randomIndex = leftoverIndex;
    } else {
      limit = binSize;
      randomIndex = random.nextInt(binSize);
    }
    final DocIdSetIterator it = docs.bits.iterator();
    for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
      if (counter == randomIndex) {
        sampleDocs.set(doc);
      }
      counter++;
      if (counter >= limit) {
        counter = 0;
        limit = binSize;
        randomIndex = random.nextInt(binSize);
      }
    }
    
    if (counter == 0) {
      // we either exhausted the bin and the iterator at the same time, or
      // this segment had no results. in the latter case we might want to
      // carry leftover to the next segment as is, but that complicates the
      // code and doesn't seem so important.
      leftoverBin = leftoverIndex = NOT_CALCULATED;
    } else {
      leftoverBin = limit - counter;
      if (randomIndex > counter) {
        // the document to sample is in the next bin
        leftoverIndex = randomIndex - counter;
      } else if (randomIndex < counter) {
        // we sampled a document from the bin, so just skip over remaining
        // documents in the bin in the next segment.
        leftoverIndex = NOT_CALCULATED;
      }
    }
    
    return new MatchingDocs(docs.context, new BitDocIdSet(sampleDocs), docs.totalHits, null);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 19
Source File: IndexFileBitSetTest.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
private void populate(Random random, int numBits, FixedBitSet fixedBitSet) {
  int population = random.nextInt(numBits);
  for (int i = 0; i < population; i++) {
    fixedBitSet.set(random.nextInt(numBits));
  }
}
 
Example 20
Source File: SortedIntDocSet.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void addAllTo(FixedBitSet target) {
  for (int doc : docs) {
    target.set(doc);
  }
}