org.apache.lucene.util.FixedBitSet Java Examples
The following examples show how to use
org.apache.lucene.util.FixedBitSet.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IndexFileBitSetTest.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
@Test public void test() throws IOException { Random random = new Random(_seed); int numBits = random.nextInt(10000000); FixedBitSet fixedBitSet = new FixedBitSet(numBits); populate(random, numBits, fixedBitSet); String id = "id"; String segmentName = "seg1"; RAMDirectory directory = new RAMDirectory(); IndexFileBitSet indexFileBitSet = new IndexFileBitSet(numBits, id, segmentName, directory); assertFalse(indexFileBitSet.exists()); indexFileBitSet.create(fixedBitSet.iterator()); indexFileBitSet.load(); checkEquals(fixedBitSet.iterator(), indexFileBitSet.iterator(), numBits); indexFileBitSet.close(); String[] listAll = directory.listAll(); for (String s : listAll) { System.out.println(s + " " + directory.fileLength(s)); } }
Example #2
Source File: BlockJoin.java From lucene-solr with Apache License 2.0 | 6 votes |
/** childInput may also contain parents (i.e. a parent or below will all roll up to that parent) */ public static DocSet toParents(DocSet childInput, BitDocSet parentList, QueryContext qcontext) throws IOException { FixedBitSet parentBits = parentList.getBits(); DocSetCollector collector = new DocSetCollector(qcontext.searcher().maxDoc()); DocIterator iter = childInput.iterator(); int currentParent = -1; while (iter.hasNext()) { int childDoc = iter.nextDoc(); // TODO: skipping if (childDoc <= currentParent) { // use <= since we also allow parents in the input // we already visited this parent continue; } currentParent = parentBits.nextSetBit(childDoc); if (currentParent != DocIdSetIterator.NO_MORE_DOCS) { // only collect the parent the first time we skip to it collector.collect( currentParent ); } } return collector.getDocSet(); }
Example #3
Source File: SolrOwnerScorer.java From SearchServices with GNU Lesser General Public License v3.0 | 6 votes |
public static SolrOwnerScorer createOwnerScorer(Weight weight, LeafReaderContext context, SolrIndexSearcher searcher, String authority) throws IOException { if (AuthorityType.getAuthorityType(authority) == AuthorityType.USER) { DocSet ownedDocs = (DocSet) searcher.cacheLookup(CacheConstants.ALFRESCO_OWNERLOOKUP_CACHE, authority); if (ownedDocs == null) { // Cache miss: query the index for docs where the owner matches the authority. ownedDocs = searcher.getDocSet(new TermQuery(new Term(QueryConstants.FIELD_OWNER, authority))); searcher.cacheInsert(CacheConstants.ALFRESCO_OWNERLOOKUP_CACHE, authority, ownedDocs); } return new SolrOwnerScorer(weight, ownedDocs, context, searcher); } // Return an empty doc set, as the authority isn't a user. return new SolrOwnerScorer(weight, new BitDocSet(new FixedBitSet(0)), context, searcher); }
Example #4
Source File: ExpandComponent.java From lucene-solr with Apache License 2.0 | 6 votes |
public GroupExpandCollector(SortedDocValues docValues, FixedBitSet groupBits, IntHashSet collapsedSet, int limit, Sort sort) throws IOException { int numGroups = collapsedSet.size(); groups = new LongObjectHashMap<>(numGroups); DocIdSetIterator iterator = new BitSetIterator(groupBits, 0); // cost is not useful here int group; while ((group = iterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { groups.put(group, getCollector(limit, sort)); } this.collapsedSet = collapsedSet; this.groupBits = groupBits; this.docValues = docValues; if(docValues instanceof MultiDocValues.MultiSortedDocValues) { this.multiSortedDocValues = (MultiDocValues.MultiSortedDocValues)docValues; this.ordinalMap = multiSortedDocValues.mapping; } }
Example #5
Source File: NumberRangeFacetsTest.java From lucene-solr with Apache License 2.0 | 6 votes |
private Bits searchForDocBits(Query query) throws IOException { FixedBitSet bitSet = new FixedBitSet(indexSearcher.getIndexReader().maxDoc()); indexSearcher.search(query, new SimpleCollector() { int leafDocBase; @Override public void collect(int doc) throws IOException { bitSet.set(leafDocBase + doc); } @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { leafDocBase = context.docBase; } @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } }); return bitSet; }
Example #6
Source File: BitSetHitStream.java From siren-join with GNU Affero General Public License v3.0 | 6 votes |
@Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { current = new FixedBitSet(context.reader().maxDoc()); fixedBitSets.add(context.ord, current); return new LeafCollector() { @Override public void setScorer(Scorer scorer) throws IOException {} @Override public void collect(int doc) throws IOException { current.set(doc); totalHits++; } }; }
Example #7
Source File: BitDocSet.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public DocSet andNot(DocSet other) { FixedBitSet newbits = bits.clone(); if (other instanceof BitDocSet) { newbits.andNot(((BitDocSet) other).bits); } else { DocIterator iter = other.iterator(); while (iter.hasNext()) { int doc = iter.nextDoc(); if (doc < newbits.length()) { newbits.clear(doc); } } } return new BitDocSet(newbits); }
Example #8
Source File: TaggerRequestHandler.java From lucene-solr with Apache License 2.0 | 6 votes |
private DocList getDocList(int rows, FixedBitSet matchDocIdsBS) throws IOException { //Now we must supply a Solr DocList and add it to the response. // Typically this is gotten via a SolrIndexSearcher.search(), but in this case we // know exactly what documents to return, the order doesn't matter nor does // scoring. // Ideally an implementation of DocList could be directly implemented off // of a BitSet, but there are way too many methods to implement for a minor // payoff. int matchDocs = matchDocIdsBS.cardinality(); int[] docIds = new int[ Math.min(rows, matchDocs) ]; DocIdSetIterator docIdIter = new BitSetIterator(matchDocIdsBS, 1); for (int i = 0; i < docIds.length; i++) { docIds[i] = docIdIter.nextDoc(); } return new DocSlice(0, docIds.length, docIds, null, matchDocs, 1f, TotalHits.Relation.EQUAL_TO); }
Example #9
Source File: TermsIncludingScoreQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
protected void fillDocsAndScores(FixedBitSet matchingDocs, TermsEnum termsEnum) throws IOException { BytesRef spare = new BytesRef(); PostingsEnum postingsEnum = null; for (int i = 0; i < terms.size(); i++) { if (termsEnum.seekExact(terms.get(ords[i], spare))) { postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); float score = TermsIncludingScoreQuery.this.scores[ords[i]]; for (int doc = postingsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = postingsEnum.nextDoc()) { matchingDocs.set(doc); // In the case the same doc is also related to a another doc, a score might be overwritten. I think this // can only happen in a many-to-many relation scores[doc] = score; } } } }
Example #10
Source File: TestScorerPerf.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testConjunctions() throws Exception { // test many small sets... the bugs will be found on boundary conditions try (Directory d = newDirectory()) { IndexWriter iw = new IndexWriter(d, newIndexWriterConfig(new MockAnalyzer(random()))); iw.addDocument(new Document()); iw.close(); try (DirectoryReader r = DirectoryReader.open(d)) { IndexSearcher s = newSearcher(r); s.setQueryCache(null); FixedBitSet[] sets = randBitSets(atLeast(1000), atLeast(10)); int iterations = TEST_NIGHTLY ? atLeast(10000) : atLeast(500); doConjunctions(s, sets, iterations, atLeast(5)); doNestedConjunctions(s, sets, iterations, atLeast(3), atLeast(3)); } } }
Example #11
Source File: TestTermAutomatonQuery.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { int maxDoc = context.reader().maxDoc(); FixedBitSet bits = new FixedBitSet(maxDoc); Random random = new Random(seed ^ context.docBase); for(int docID=0;docID<maxDoc;docID++) { if (random.nextFloat() <= density) { bits.set(docID); //System.out.println(" acc id=" + idSource.getInt(docID) + " docID=" + docID); } } return new ConstantScoreScorer(this, score(), scoreMode, new BitSetIterator(bits, bits.approximateCardinality())); } @Override public boolean isCacheable(LeafReaderContext ctx) { return false; } }; }
Example #12
Source File: IndexedDISI.java From lucene-solr with Apache License 2.0 | 6 votes |
private static void flush( int block, FixedBitSet buffer, int cardinality, byte denseRankPower, IndexOutput out) throws IOException { assert block >= 0 && block < 65536; out.writeShort((short) block); assert cardinality > 0 && cardinality <= 65536; out.writeShort((short) (cardinality - 1)); if (cardinality > MAX_ARRAY_LENGTH) { if (cardinality != 65536) { // all docs are set if (denseRankPower != -1) { final byte[] rank = createRank(buffer, denseRankPower); out.writeBytes(rank, rank.length); } for (long word : buffer.getBits()) { out.writeLong(word); } } } else { BitSetIterator it = new BitSetIterator(buffer, cardinality); for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) { out.writeShort((short) doc); } } }
Example #13
Source File: BinaryVectorUtils.java From semanticvectors with BSD 3-Clause "New" or "Revised" License | 6 votes |
public static Vector weightedSuperposition( BinaryVector v1, double weight1, BinaryVector v2, double weight2) { BinaryVector conclusion = (BinaryVector) VectorFactory.createZeroVector(VectorType.BINARY, v1.getDimension()); FixedBitSet cVote = conclusion.bitSet; FixedBitSet v1vote = v1.bitSet; FixedBitSet v2vote = v2.bitSet; Random random = new Random(); random.setSeed(Bobcat.asLong(v1.writeLongToString())); for (int x = 0; x < v1.getDimension(); x++) { double probability = 0; if (v1vote.get(x)) probability += weight1 / (weight1 + weight2); if (v2vote.get(x)) probability += weight2 / (weight1 + weight2); if (random.nextDouble() <= probability) cVote.set(x); } return conclusion; }
Example #14
Source File: HashQParserPlugin.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { SolrIndexSearcher solrIndexSearcher = (SolrIndexSearcher)searcher; IndexReaderContext context = solrIndexSearcher.getTopReaderContext(); List<LeafReaderContext> leaves = context.leaves(); FixedBitSet[] fixedBitSets = new FixedBitSet[leaves.size()]; for(LeafReaderContext leaf : leaves) { try { SegmentPartitioner segmentPartitioner = new SegmentPartitioner(leaf,worker,workers, keys, solrIndexSearcher); segmentPartitioner.run(); fixedBitSets[segmentPartitioner.context.ord] = segmentPartitioner.docs; } catch(Exception e) { throw new IOException(e); } } ConstantScoreQuery constantScoreQuery = new ConstantScoreQuery(new BitsFilter(fixedBitSets)); return searcher.rewrite(constantScoreQuery).createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, boost); }
Example #15
Source File: SortingLeafReader.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public BinaryDocValues getBinaryDocValues(String field) throws IOException { final BinaryDocValues oldDocValues = in.getBinaryDocValues(field); if (oldDocValues == null) return null; CachedBinaryDVs dvs; synchronized (cachedBinaryDVs) { dvs = cachedBinaryDVs.get(field); if (dvs == null) { FixedBitSet docsWithField = new FixedBitSet(maxDoc()); BytesRef[] values = new BytesRef[maxDoc()]; while (true) { int docID = oldDocValues.nextDoc(); if (docID == NO_MORE_DOCS) { break; } int newDocID = docMap.oldToNew(docID); docsWithField.set(newDocID); values[newDocID] = BytesRef.deepCopyOf(oldDocValues.binaryValue()); } dvs = new CachedBinaryDVs(values, docsWithField); cachedBinaryDVs.put(field, dvs); } } return new SortingBinaryDocValues(dvs); }
Example #16
Source File: DocSetBuilder.java From lucene-solr with Apache License 2.0 | 5 votes |
private static int dedup(int[] arr, int length, FixedBitSet acceptDocs) { int pos = 0; int previous = -1; for (int i = 0; i < length; ++i) { final int value = arr[i]; // assert value >= previous; if (value != previous && (acceptDocs == null || acceptDocs.get(value))) { arr[pos++] = value; previous = value; } } return pos; }
Example #17
Source File: BitDocSet.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public int intersectionSize(DocSet other) { if (other instanceof BitDocSet) { return (int) FixedBitSet.intersectionCount(this.bits, ((BitDocSet) other).bits); } else { // they had better not call us back! return other.intersectionSize(this); } }
Example #18
Source File: SolrAuthoritySetQuery.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
@Override public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { set = new FixedBitSet(context.reader().maxDoc()); sets.add(set); return this; }
Example #19
Source File: BinaryVector.java From semanticvectors with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Performs superposition from a particular row by sweeping a bitset across the voting record * such that for any column in which the incoming bitset contains a '1', 1's are changed * to 0's until a new 1 can be added, facilitating incrementation of the * binary number represented in this column. * * @param incomingBitSet the bitset to be added * @param rowfloor the index of the place in the voting record to start the sweep at */ protected synchronized void superposeBitSetFromRowFloor(FixedBitSet incomingBitSet, int rowfloor) { // Attempt to save space when minimum value across all columns > 0 // by decrementing across the board and raising the minimum where possible. int max = getMaximumSharedWeight(); if (max > 0) { decrement(max); } // Handle overflow: if any column that will be incremented // contains all 1's, add a new row to the voting record. tempSet.xor(tempSet); tempSet.xor(incomingBitSet); for (int x = rowfloor; x < votingRecord.size() && tempSet.cardinality() > 0; x++) { tempSet.and(votingRecord.get(x)); } if (tempSet.cardinality() > 0) { votingRecord.add(new FixedBitSet(dimension)); } // Sweep copy of bitset to be added across rows of voting record. // If a new '1' is added, this position in the copy is changed to zero // and will not affect future rows. // The xor step will transform 1's to 0's or vice versa for // dimension in which the temporary bitset contains a '1'. votingRecord.get(rowfloor).xor(incomingBitSet); tempSet.xor(tempSet); tempSet.xor(incomingBitSet); for (int x = rowfloor + 1; x < votingRecord.size(); x++) { tempSet.andNot(votingRecord.get(x-1)); //if 1 already added, eliminate dimension from tempSet votingRecord.get(x).xor(tempSet); // votingRecord.get(x).trimTrailingZeros(); //attempt to save in sparsely populated rows } }
Example #20
Source File: BitsFilter.java From SearchServices with GNU Lesser General Public License v3.0 | 5 votes |
public void or(BitsFilter bitsFilter) { List<FixedBitSet> andSets = bitsFilter.bitSets; for(int i=0; i<bitSets.size(); i++) { FixedBitSet a = bitSets.get(i); FixedBitSet b = andSets.get(i); a.or(b); } }
Example #21
Source File: DocSetUtil.java From lucene-solr with Apache License 2.0 | 5 votes |
public static DocSet toSmallSet(BitDocSet bitSet) { int sz = bitSet.size(); int[] docs = new int[sz]; FixedBitSet bs = bitSet.getBits(); int doc = -1; for (int i=0; i<sz; i++) { doc = bs.nextSetBit(doc + 1); docs[i] = doc; } return new SortedIntDocSet(docs); }
Example #22
Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public void reset() throws IOException { counts = null; for (FixedBitSet bits : arr) { if (bits == null) continue; bits.clear(0, bits.length()); } }
Example #23
Source File: BinaryVector.java From semanticvectors with BSD 3-Clause "New" or "Revised" License | 5 votes |
protected synchronized FixedBitSet concludeVote(long target) { long target2 = (long) Math.ceil((double) target / (double) 2); target2 = target2 - minimum; // Unlikely other than in testing: minimum more than half the votes if (target2 < 0) { FixedBitSet ans = new FixedBitSet(dimension); ans.set(0, dimension); return ans; } boolean even = (target % 2 == 0); FixedBitSet result = concludeVote(target2, votingRecord.size() - 1); if (even) { setTempSetToExactMatches(target2); boolean switcher = true; // 50% chance of being true with split vote. int q = tempSet.nextSetBit(0); while (q != DocIdSetIterator.NO_MORE_DOCS) { switcher = !switcher; if (switcher) tempSet.clear(q); if (q+1 >= tempSet.length()) q = DocIdSetIterator.NO_MORE_DOCS; else q = tempSet.nextSetBit(q+1); } result.andNot(tempSet); } return result; }
Example #24
Source File: UniqueSlotAcc.java From lucene-solr with Apache License 2.0 | 5 votes |
@SuppressWarnings({"unchecked", "rawtypes"}) private Object getShardValue(int slot) throws IOException { if (factory != null) return getShardHLL(slot); FixedBitSet ords = arr[slot]; int unique; if (counts != null) { unique = counts[slot]; } else { unique = ords==null ? 0 : ords.cardinality(); } SimpleOrderedMap map = new SimpleOrderedMap(); map.add("unique", unique); map.add("nTerms", nTerms); int maxExplicit=100; // TODO: make configurable // TODO: share values across buckets if (unique > 0) { List lst = new ArrayList( Math.min(unique, maxExplicit) ); int maxOrd = ords.length(); if (maxOrd > 0) { for (int ord=0; lst.size() < maxExplicit;) { ord = ords.nextSetBit(ord); if (ord == DocIdSetIterator.NO_MORE_DOCS) break; BytesRef val = lookupOrd(ord); Object o = field.getType().toObject(field, val); lst.add(o); if (++ord >= maxOrd) break; } } map.add("vals", lst); } return map; }
Example #25
Source File: TestSort.java From lucene-solr with Apache License 2.0 | 5 votes |
public DocIdSet randSet(int sz) { FixedBitSet obs = new FixedBitSet(sz); int n = r.nextInt(sz); for (int i=0; i<n; i++) { obs.set(r.nextInt(sz)); } return new BitDocIdSet(obs); }
Example #26
Source File: PointInSetIncludingScoreQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
private MergePointVisitor(PrefixCodedTerms sortedPackedPoints, FixedBitSet result, float[] scores) throws IOException { this.result = result; this.scores = scores; scratch.length = bytesPerDim; this.iterator = sortedPackedPoints.iterator(); this.scoreIterator = aggregatedJoinScores.iterator(); nextQueryPoint = iterator.next(); if (scoreIterator.hasNext()) { nextScore = scoreIterator.next(); } }
Example #27
Source File: GraphTermsQParserPlugin.java From lucene-solr with Apache License 2.0 | 5 votes |
public DocSet getDocSet(IndexSearcher searcher) throws IOException { IndexReaderContext top = ReaderUtil.getTopLevelContext(searcher.getTopReaderContext()); List<LeafReaderContext> segs = top.leaves(); DocSetBuilder builder = new DocSetBuilder(top.reader().maxDoc(), Math.min(64,(top.reader().maxDoc()>>>10)+4)); PointValues[] segPoints = new PointValues[segs.size()]; for (int i=0; i<segPoints.length; i++) { segPoints[i] = segs.get(i).reader().getPointValues(field); } int maxCollect = Math.min(maxDocFreq, top.reader().maxDoc()); PointSetQuery.CutoffPointVisitor visitor = new PointSetQuery.CutoffPointVisitor(maxCollect); PrefixCodedTerms.TermIterator iterator = sortedPackedPoints.iterator(); outer: for (BytesRef point = iterator.next(); point != null; point = iterator.next()) { visitor.setPoint(point); for (int i=0; i<segs.size(); i++) { if (segPoints[i] == null) continue; visitor.setBase(segs.get(i).docBase); segPoints[i].intersect(visitor); if (visitor.getCount() > maxDocFreq) { continue outer; } } int collected = visitor.getCount(); int[] ids = visitor.getGlobalIds(); for (int i=0; i<collected; i++) { builder.add( ids[i] ); } } FixedBitSet liveDocs = getLiveDocs(searcher); DocSet set = builder.build(liveDocs); return set; }
Example #28
Source File: TestBlockJoinSelector.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testDocsWithValue() { final BitSet parents = new FixedBitSet(20); parents.set(0); parents.set(5); parents.set(6); parents.set(10); parents.set(15); parents.set(19); final BitSet children = new FixedBitSet(20); children.set(2); children.set(3); children.set(4); children.set(12); children.set(17); final BitSet childDocsWithValue = new FixedBitSet(20); childDocsWithValue.set(2); childDocsWithValue.set(3); childDocsWithValue.set(4); childDocsWithValue.set(8); childDocsWithValue.set(16); final Bits docsWithValue = BlockJoinSelector.wrap(childDocsWithValue, parents, children); assertFalse(docsWithValue.get(0)); assertTrue(docsWithValue.get(5)); assertFalse(docsWithValue.get(6)); assertFalse(docsWithValue.get(10)); assertFalse(docsWithValue.get(15)); assertFalse(docsWithValue.get(19)); }
Example #29
Source File: TestBlockJoinSelector.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSortedSelector() throws IOException { final BitSet parents = new FixedBitSet(20); parents.set(0); parents.set(5); parents.set(6); parents.set(10); parents.set(15); parents.set(19); final BitSet children = new FixedBitSet(20); children.set(2); children.set(3); children.set(4); children.set(12); children.set(17); final int[] ords = new int[20]; Arrays.fill(ords, -1); ords[2] = 5; ords[3] = 7; ords[4] = 3; ords[12] = 10; ords[18] = 10; final SortedDocValues mins = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MIN, parents, toIter(children)); assertEquals(5, nextDoc(mins,5)); assertEquals(3, mins.ordValue()); assertEquals(15, nextDoc(mins,15)); assertEquals(10, mins.ordValue()); assertNoMoreDoc(mins, 20); final SortedDocValues maxs = BlockJoinSelector.wrap(DocValues.singleton(new CannedSortedDocValues(ords)), BlockJoinSelector.Type.MAX, parents, toIter(children)); assertEquals(5, nextDoc(maxs,5)); assertEquals(7, maxs.ordValue()); assertEquals(15, nextDoc(maxs,15)); assertEquals(10, maxs.ordValue()); assertNoMoreDoc( maxs,20); }
Example #30
Source File: BitDocSet.java From lucene-solr with Apache License 2.0 | 5 votes |
@Override public int unionSize(DocSet other) { if (other instanceof BitDocSet) { // if we don't know our current size, this is faster than // size + other.size - intersection_size return (int) FixedBitSet.unionCount(this.bits, ((BitDocSet) other).bits); } else { // they had better not call us back! return other.unionSize(this); } }