Java Code Examples for org.apache.lucene.index.SortedSetDocValues#getValueCount()

The following examples show how to use org.apache.lucene.index.SortedSetDocValues#getValueCount() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: LegacyDocValuesIterables.java From lucene-solr with Apache License 2.0

6 votes

/** Converts {@link SortedSetDocValues} into an {@code Iterable&lt;BytesRef&gt;} for all the values.
 *
 * @deprecated Consume {@link SortedSetDocValues} instead. */
@Deprecated
public static Iterable<BytesRef> valuesIterable(final SortedSetDocValues values) {
  return new Iterable<BytesRef>() {
    @Override
    public Iterator<BytesRef> iterator() {
      return new Iterator<BytesRef>() {
        private long nextOrd;
  
        @Override
        public boolean hasNext() {
          return nextOrd < values.getValueCount();
        }

        @Override
        public BytesRef next() {
          try {
            return values.lookupOrd(nextOrd++);
          } catch (IOException e) {
            throw new RuntimeException(e);
          }
        }
      };
    }
  };
}

Example 2

Source File: SortedSetSelector.java From lucene-solr with Apache License 2.0

6 votes

/** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) {
  if (sortedSet.getValueCount() >= Integer.MAX_VALUE) {
    throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported");
  }
  
  SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet);
  if (singleton != null) {
    // it's actually single-valued in practice, but indexed as multi-valued,
    // so just sort on the underlying single-valued dv directly.
    // regardless of selector type, this optimization is safe!
    return singleton;
  } else {
    switch(selector) {
      case MIN: return new MinValue(sortedSet);
      case MAX: return new MaxValue(sortedSet);
      case MIDDLE_MIN: return new MiddleMinValue(sortedSet);
      case MIDDLE_MAX: return new MiddleMaxValue(sortedSet);
      default: 
        throw new AssertionError();
    }
  }
}

Example 3

Source File: TermsQParserPlugin.java From lucene-solr with Apache License 2.0

6 votes

private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
  long low = startOrd;
  long high = docValues.getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = docValues.lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}

Example 4

Source File: TopLevelJoinQuery.java From lucene-solr with Apache License 2.0

6 votes

private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
  long low = startOrd;
  long high = docValues.getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = docValues.lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}

Example 5

Source File: FacetFieldProcessorByArrayDV.java From lucene-solr with Apache License 2.0

6 votes

private void collectPerSeg(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
  int segMax = (int)multiDv.getValueCount();
  final int[] counts = getCountArr( segMax );

  int doc;
  while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (multiDv.advanceExact(doc)) {
      for(;;) {
        int segOrd = (int)multiDv.nextOrd();
        if (segOrd < 0) break;
        counts[segOrd]++;
      }
    }
  }

  for (int i=0; i<segMax; i++) {
    int segCount = counts[i];
    if (segCount > 0) {
      int slot = toGlobal == null ? (i) : (int) toGlobal.get(i);
      countAcc.incrementCount(slot, segCount);
    }
  }
}

Example 6

Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0

5 votes

/** accumulates per-segment multi-valued facet counts */
static void accumMulti(int counts[], int startTermIndex, SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
  if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) {
    // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): 
    //   collect separately per-segment, then map to global ords
    accumMultiSeg(counts, si, disi, subIndex, map);
  } else {
    // otherwise: do collect+map on the fly
    accumMultiGeneric(counts, startTermIndex, si, disi, subIndex, map);
  }
}

Example 7

Source File: DocValuesFacets.java From lucene-solr with Apache License 2.0

5 votes

/** "typical" multi-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */
static void accumMultiSeg(int counts[], SortedSetDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
  // First count in seg-ord space:
  final int segCounts[];
  if (map == null) {
    segCounts = counts;
  } else {
    segCounts = new int[1+(int)si.getValueCount()];
  }
  
  int doc;
  while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (si.advanceExact(doc)) {
      int term = (int) si.nextOrd();
      do {
        segCounts[1+term]++;
      } while ((term = (int)si.nextOrd()) >= 0);
    } else {
      counts[0]++; // missing
    }
  }
  
  // migrate to global ords (if necessary)
  if (map != null) {
    migrateGlobal(counts, segCounts, subIndex, map);
  }
}

Example 8

Source File: TopLevelJoinQuery.java From lucene-solr with Apache License 2.0

5 votes

private static LongBitSet findFieldOrdinalsMatchingQuery(Query q, String field, SolrIndexSearcher searcher, SortedSetDocValues docValues) throws IOException {
  final LongBitSet fromOrdBitSet = new LongBitSet(docValues.getValueCount());
  final Collector fromCollector = new MultiValueTermOrdinalCollector(field, docValues, fromOrdBitSet);

  searcher.search(q, fromCollector);

  return fromOrdBitSet;
}

Example 9

Source File: TestFieldCacheVsDocValues.java From lucene-solr with Apache License 2.0

5 votes

private void assertEquals(int maxDoc, SortedSetDocValues expected, SortedSetDocValues actual) throws Exception {
  // can be null for the segment if no docs actually had any SortedDocValues
  // in this case FC.getDocTermsOrds returns EMPTY
  if (actual == null) {
    assertEquals(expected.getValueCount(), 0);
    return;
  }
  assertEquals(expected.getValueCount(), actual.getValueCount());
  while (true) {
    int docID = expected.nextDoc();
    assertEquals(docID, actual.nextDoc());
    if (docID == NO_MORE_DOCS) {
      break;
    }
    long expectedOrd;
    while ((expectedOrd = expected.nextOrd()) != NO_MORE_ORDS) {
      assertEquals(expectedOrd, actual.nextOrd());
    }
    assertEquals(NO_MORE_ORDS, actual.nextOrd());
  }
  
  // compare ord dictionary
  for (long i = 0; i < expected.getValueCount(); i++) {
    final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd(i));
    final BytesRef actualBytes = actual.lookupOrd(i);
    assertEquals(expectedBytes, actualBytes);
  }
  
  // compare termsenum
  assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}

Example 10

Source File: DefaultSortedSetDocValuesReaderState.java From lucene-solr with Apache License 2.0

4 votes

/** Creates this, pulling doc values from the specified
 *  field. */
public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException {
  this.field = field;
  this.reader = reader;

  // We need this to create thread-safe MultiSortedSetDV
  // per collector:
  SortedSetDocValues dv = getDocValues();
  if (dv == null) {
    throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues");
  }
  if (dv.getValueCount() > Integer.MAX_VALUE) {
    throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount());
  }
  valueCount = (int) dv.getValueCount();

  // TODO: we can make this more efficient if eg we can be
  // "involved" when OrdinalMap is being created?  Ie see
  // each term/ord it's assigning as it goes...
  String lastDim = null;
  int startOrd = -1;

  // TODO: this approach can work for full hierarchy?;
  // TaxoReader can't do this since ords are not in
  // "sorted order" ... but we should generalize this to
  // support arbitrary hierarchy:
  for(int ord=0;ord<valueCount;ord++) {
    final BytesRef term = dv.lookupOrd(ord);
    String[] components = FacetsConfig.stringToPath(term.utf8ToString());
    if (components.length != 2) {
      throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + term.utf8ToString());
    }
    if (!components[0].equals(lastDim)) {
      if (lastDim != null) {
        prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord-1));
      }
      startOrd = ord;
      lastDim = components[0];
    }
  }

  if (lastDim != null) {
    prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount-1));
  }
}

Example 11

Source File: DocValuesTermsQuery.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  return new ConstantScoreWeight(this, boost) {

    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
      final LongBitSet bits = new LongBitSet(values.getValueCount());
      boolean matchesAtLeastOneTerm = false;
      TermIterator iterator = termData.iterator();
      for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
        final long ord = values.lookupTerm(term);
        if (ord >= 0) {
          matchesAtLeastOneTerm = true;
          bits.set(ord);
        }
      }
      if (matchesAtLeastOneTerm == false) {
        return null;
      }
      return new ConstantScoreScorer(this, score(), scoreMode, new TwoPhaseIterator(values) {

        @Override
        public boolean matches() throws IOException {
          for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
            if (bits.get(ord)) {
              return true;
            }
          }
          return false;
        }

        @Override
        public float matchCost() {
          return 3; // lookup in a bitset
        }

      });
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return DocValues.isCacheable(ctx, field);
    }

  };
}

Example 12

Source File: Lucene80DocValuesConsumer.java From lucene-solr with Apache License 2.0

4 votes

private void writeTermsIndex(SortedSetDocValues values) throws IOException {
  final long size = values.getValueCount();
  meta.writeInt(Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT);
  long start = data.getFilePointer();

  long numBlocks = 1L + ((size + Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) >>> Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_SHIFT);
  ByteBuffersDataOutput addressBuffer = new ByteBuffersDataOutput();
  DirectMonotonicWriter writer;
  try (ByteBuffersIndexOutput addressOutput = new ByteBuffersIndexOutput(addressBuffer, "temp", "temp")) {
    writer = DirectMonotonicWriter.getInstance(meta, addressOutput, numBlocks, DIRECT_MONOTONIC_BLOCK_SHIFT);
    TermsEnum iterator = values.termsEnum();
    BytesRefBuilder previous = new BytesRefBuilder();
    long offset = 0;
    long ord = 0;
    for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
      if ((ord & Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) == 0) {
        writer.add(offset);
        final int sortKeyLength;
        if (ord == 0) {
          // no previous term: no bytes to write
          sortKeyLength = 0;
        } else {
          sortKeyLength = StringHelper.sortKeyLength(previous.get(), term);
        }
        offset += sortKeyLength;
        data.writeBytes(term.bytes, term.offset, sortKeyLength);
      } else if ((ord & Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) == Lucene80DocValuesFormat.TERMS_DICT_REVERSE_INDEX_MASK) {
        previous.copyBytes(term);
      }
      ++ord;
    }
    writer.add(offset);
    writer.finish();
    meta.writeLong(start);
    meta.writeLong(data.getFilePointer() - start);
    start = data.getFilePointer();
    addressBuffer.copyTo(data);
    meta.writeLong(start);
    meta.writeLong(data.getFilePointer() - start);
  }
}

Example 13

Source File: TopLevelJoinQuery.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  if (! (searcher instanceof SolrIndexSearcher)) {
    log.debug("Falling back to JoinQueryWeight because searcher [{}] is not the required SolrIndexSearcher", searcher);
    return super.createWeight(searcher, scoreMode, boost);
  }

  final SolrIndexSearcher solrSearcher = (SolrIndexSearcher) searcher;
  final JoinQueryWeight weight = new JoinQueryWeight(solrSearcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
  final SolrIndexSearcher fromSearcher = weight.fromSearcher;
  final SolrIndexSearcher toSearcher = weight.toSearcher;

  try {
    final SortedSetDocValues topLevelFromDocValues = validateAndFetchDocValues(fromSearcher, fromField, "from");
    final SortedSetDocValues topLevelToDocValues = validateAndFetchDocValues(toSearcher, toField, "to");
    if (topLevelFromDocValues.getValueCount() == 0 || topLevelToDocValues.getValueCount() == 0) {
      return createNoMatchesWeight(boost);
    }

    final LongBitSet fromOrdBitSet = findFieldOrdinalsMatchingQuery(q, fromField, fromSearcher, topLevelFromDocValues);
    final LongBitSet toOrdBitSet = new LongBitSet(topLevelToDocValues.getValueCount());
    final BitsetBounds toBitsetBounds = convertFromOrdinalsIntoToField(fromOrdBitSet, topLevelFromDocValues, toOrdBitSet, topLevelToDocValues);

    final boolean toMultivalued = toSearcher.getSchema().getFieldOrNull(toField).multiValued();
    return new ConstantScoreWeight(this, boost) {
      public Scorer scorer(LeafReaderContext context) throws IOException {
        if (toBitsetBounds.lower == BitsetBounds.NO_MATCHES) {
          return null;
        }

        final DocIdSetIterator toApproximation = (toMultivalued) ? context.reader().getSortedSetDocValues(toField) :
            context.reader().getSortedDocValues(toField);
        if (toApproximation == null) {
          return null;
        }

        final int docBase = context.docBase;
        return new ConstantScoreScorer(this, this.score(), scoreMode, new TwoPhaseIterator(toApproximation) {
          public boolean matches() throws IOException {
            final boolean hasDoc = topLevelToDocValues.advanceExact(docBase + approximation.docID());
            if (hasDoc) {
              for (long ord = topLevelToDocValues.nextOrd(); ord != -1L; ord = topLevelToDocValues.nextOrd()) {
                if (toOrdBitSet.get(ord)) {
                  return true;
                }
              }
            }
            return false;
          }

          public float matchCost() {
            return 10.0F;
          }
        });

      }

      public boolean isCacheable(LeafReaderContext ctx) {
        return false;
      }
    };
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}

Example 14

Source File: SecureAtomicReader.java From incubator-retired-blur with Apache License 2.0

4 votes

@Override
public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
  final SortedSetDocValues sortedSetDocValues = in.getSortedSetDocValues(field);
  if (sortedSetDocValues == null) {
    return null;
  }
  return new SortedSetDocValues() {

    private boolean _access;

    @Override
    public void setDocument(int docID) {
      try {
        if (_access = _accessControl.hasAccess(ReadType.SORTED_SET_DOC_VALUE, docID)) {
          sortedSetDocValues.setDocument(docID);
        }
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }

    @Override
    public long nextOrd() {
      if (_access) {
        return sortedSetDocValues.nextOrd();
      }
      return NO_MORE_ORDS;
    }

    @Override
    public void lookupOrd(long ord, BytesRef result) {
      if (_access) {
        sortedSetDocValues.lookupOrd(ord, result);
      } else {
        result.bytes = BinaryDocValues.MISSING;
        result.length = 0;
        result.offset = 0;
      }
    }

    @Override
    public long getValueCount() {
      return sortedSetDocValues.getValueCount();
    }
  };
}