Java Code Examples for org.apache.lucene.index.SortedDocValues#getValueCount()

The following examples show how to use org.apache.lucene.index.SortedDocValues#getValueCount() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
/** Converts {@link SortedDocValues} into an {@code Iterable<BytesRef>} for all the values.
 *
 * @deprecated Consume {@link SortedDocValues} instead. */
@Deprecated
public static Iterable<BytesRef> valuesIterable(final SortedDocValues values) {
  return new Iterable<BytesRef>() {
    @Override
    public Iterator<BytesRef> iterator() {
      return new Iterator<BytesRef>() {
        private int nextOrd;
  
        @Override
        public boolean hasNext() {
          return nextOrd < values.getValueCount();
        }

        @Override
        public BytesRef next() {
          try {
            return values.lookupOrd(nextOrd++);
          } catch (IOException e) {
            throw new RuntimeException(e);
          }
        }
      };
    }
  };
}
 
Example 2
Source Project: lucene-solr   File: DocValuesFacets.java    License: Apache License 2.0 6 votes vote down vote up
/** "typical" single-valued faceting: not too many unique values, no prefixing. maps to global ordinals as a separate step */
static void accumSingleSeg(int counts[], SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
  // First count in seg-ord space:
  final int segCounts[];
  if (map == null) {
    segCounts = counts;
  } else {
    segCounts = new int[1+si.getValueCount()];
  }
  
  int doc;
  while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    if (si.advanceExact(doc)) {
      segCounts[1+si.ordValue()]++;
    } else {
      segCounts[0]++;
    }
  }
  
  // migrate to global ords (if necessary)
  if (map != null) {
    migrateGlobal(counts, segCounts, subIndex, map);
  }
}
 
Example 3
Source Project: lucene-solr   File: DocValuesFacets.java    License: Apache License 2.0 5 votes vote down vote up
/** accumulates per-segment single-valued facet counts */
static void accumSingle(int counts[], int startTermIndex, SortedDocValues si, DocIdSetIterator disi, int subIndex, OrdinalMap map) throws IOException {
  if (startTermIndex == -1 && (map == null || si.getValueCount() < disi.cost()*10)) {
    // no prefixing, not too many unique values wrt matching docs (lucene/facets heuristic): 
    //   collect separately per-segment, then map to global ords
    accumSingleSeg(counts, si, disi, subIndex, map);
  } else {
    // otherwise: do collect+map on the fly
    accumSingleGeneric(counts, startTermIndex, si, disi, subIndex, map);
  }
}
 
Example 4
private void collectPerSeg(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
  int segMax = singleDv.getValueCount() + 1;
  final int[] counts = getCountArr( segMax );

  /** alternate trial implementations
   // ord
   // FieldUtil.visitOrds(singleDv, disi,  (doc,ord)->{counts[ord+1]++;} );

  FieldUtil.OrdValues ordValues = FieldUtil.getOrdValues(singleDv, disi);
  while (ordValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
    counts[ ordValues.getOrd() + 1]++;
  }
   **/


  // calculate segment-local counts
  int doc;
  if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
    FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      counts[fc.getOrd(doc) + 1]++;
    }
  } else {
    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
      if (singleDv.advanceExact(doc)) {
        counts[singleDv.ordValue() + 1]++;
      }
    }
  }

  // convert segment-local counts to global counts
  for (int i=1; i<segMax; i++) {
    int segCount = counts[i];
    if (segCount > 0) {
      int slot = toGlobal == null ? (i - 1) : (int) toGlobal.get(i - 1);
      countAcc.incrementCount(slot, segCount);
    }
  }
}
 
Example 5
private void assertEquals(int maxDoc, SortedDocValues expected, SortedDocValues actual) throws Exception {
  // can be null for the segment if no docs actually had any SortedDocValues
  // in this case FC.getDocTermsOrds returns EMPTY
  if (actual == null) {
    assertEquals(expected.getValueCount(), 0);
    return;
  }
  assertEquals(expected.getValueCount(), actual.getValueCount());

  // compare ord lists
  while (true) {
    int docID = expected.nextDoc();
    if (docID == NO_MORE_DOCS) {
      assertEquals(NO_MORE_DOCS, actual.nextDoc());
      break;
    }
    assertEquals(docID, actual.nextDoc());
    assertEquals(expected.ordValue(), actual.ordValue());
    assertEquals(expected.binaryValue(), actual.binaryValue());
  }
  
  // compare ord dictionary
  for (long i = 0; i < expected.getValueCount(); i++) {
    final BytesRef expectedBytes = BytesRef.deepCopyOf(expected.lookupOrd((int) i));
    final BytesRef actualBytes = actual.lookupOrd((int) i);
    assertEquals(expectedBytes, actualBytes);
  }
  
  // compare termsenum
  assertEquals(expected.getValueCount(), expected.termsEnum(), actual.termsEnum());
}
 
Example 6
@Override
public SortedDocValues getSortedDocValues(String field) throws IOException {
  final SortedDocValues sortedDocValues = in.getSortedDocValues(field);
  if (sortedDocValues == null) {
    return null;
  }
  return new SortedDocValues() {

    @Override
    public void lookupOrd(int ord, BytesRef result) {
      sortedDocValues.lookupOrd(ord, result);
    }

    @Override
    public int getValueCount() {
      return sortedDocValues.getValueCount();
    }

    @Override
    public int getOrd(int docID) {
      try {
        if (_accessControl.hasAccess(ReadType.SORTED_DOC_VALUE, docID)) {
          return sortedDocValues.getOrd(docID);
        }
        return -1; // Default missing value.
      } catch (IOException e) {
        throw new RuntimeException(e);
      }
    }
  };
}
 
Example 7
Source Project: lucene-solr   File: JoinUtil.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * A query time join using global ordinals over a dedicated join field.
 *
 * This join has certain restrictions and requirements:
 * 1) A document can only refer to one other document. (but can be referred by one or more documents)
 * 2) Documents on each side of the join must be distinguishable. Typically this can be done by adding an extra field
 *    that identifies the "from" and "to" side and then the fromQuery and toQuery must take the this into account.
 * 3) There must be a single sorted doc values join field used by both the "from" and "to" documents. This join field
 *    should store the join values as UTF-8 strings.
 * 4) An ordinal map must be provided that is created on top of the join field.
 *
 * Note: min and max filtering and the avg score mode will require this join to keep track of the number of times
 * a document matches per join value. This will increase the per join cost in terms of execution time and memory.
 *
 * @param joinField   The {@link SortedDocValues} field containing the join values
 * @param fromQuery   The query containing the actual user query. Also the fromQuery can only match "from" documents.
 * @param toQuery     The query identifying all documents on the "to" side.
 * @param searcher    The index searcher used to execute the from query
 * @param scoreMode   Instructs how scores from the fromQuery are mapped to the returned query
 * @param ordinalMap  The ordinal map constructed over the joinField. In case of a single segment index, no ordinal map
 *                    needs to be provided.
 * @param min         Optionally the minimum number of "from" documents that are required to match for a "to" document
 *                    to be a match. The min is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
 *                    disables the min and max "from" documents filtering
 * @param max         Optionally the maximum number of "from" documents that are allowed to match for a "to" document
 *                    to be a match. The max is inclusive. Setting min to 0 and max to <code>Interger.MAX_VALUE</code>
 *                    disables the min and max "from" documents filtering
 * @return a {@link Query} instance that can be used to join documents based on the join field
 * @throws IOException If I/O related errors occur
 */
public static Query createJoinQuery(String joinField,
                                    Query fromQuery,
                                    Query toQuery,
                                    IndexSearcher searcher,
                                    ScoreMode scoreMode,
                                    OrdinalMap ordinalMap,
                                    int min,
                                    int max) throws IOException {
  int numSegments = searcher.getIndexReader().leaves().size();
  final long valueCount;
  if (numSegments == 0) {
    return new MatchNoDocsQuery("JoinUtil.createJoinQuery with no segments");
  } else if (numSegments == 1) {
    // No need to use the ordinal map, because there is just one segment.
    ordinalMap = null;
    LeafReader leafReader = searcher.getIndexReader().leaves().get(0).reader();
    SortedDocValues joinSortedDocValues = leafReader.getSortedDocValues(joinField);
    if (joinSortedDocValues != null) {
      valueCount = joinSortedDocValues.getValueCount();
    } else {
      return new MatchNoDocsQuery("JoinUtil.createJoinQuery: no join values");
    }
  } else {
    if (ordinalMap == null) {
      throw new IllegalArgumentException("OrdinalMap is required, because there is more than 1 segment");
    }
    valueCount = ordinalMap.getValueCount();
  }

  final Query rewrittenFromQuery = searcher.rewrite(fromQuery);
  final Query rewrittenToQuery = searcher.rewrite(toQuery);
  GlobalOrdinalsWithScoreCollector globalOrdinalsWithScoreCollector;
  switch (scoreMode) {
    case Total:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Sum(joinField, ordinalMap, valueCount, min, max);
      break;
    case Min:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Min(joinField, ordinalMap, valueCount, min, max);
      break;
    case Max:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Max(joinField, ordinalMap, valueCount, min, max);
      break;
    case Avg:
      globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.Avg(joinField, ordinalMap, valueCount, min, max);
      break;
    case None:
      if (min <= 0 && max == Integer.MAX_VALUE) {
        GlobalOrdinalsCollector globalOrdinalsCollector = new GlobalOrdinalsCollector(joinField, ordinalMap, valueCount);
        searcher.search(rewrittenFromQuery, globalOrdinalsCollector);
        return new GlobalOrdinalsQuery(globalOrdinalsCollector.getCollectorOrdinals(), joinField, ordinalMap, rewrittenToQuery,
            rewrittenFromQuery, searcher.getTopReaderContext().id());
      } else {
        globalOrdinalsWithScoreCollector = new GlobalOrdinalsWithScoreCollector.NoScore(joinField, ordinalMap, valueCount, min, max);
        break;
      }
    default:
      throw new IllegalArgumentException(String.format(Locale.ROOT, "Score mode %s isn't supported.", scoreMode));
  }
  searcher.search(rewrittenFromQuery, globalOrdinalsWithScoreCollector);
  return new GlobalOrdinalsWithScoreQuery(globalOrdinalsWithScoreCollector, scoreMode, joinField, ordinalMap, rewrittenToQuery,
      rewrittenFromQuery, min, max, searcher.getTopReaderContext().id());
}
 
Example 8
private void doAddSortedField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
  SortedDocValues values = valuesProducer.getSorted(field);
  int numDocsWithField = 0;
  for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
    numDocsWithField++;
  }

  if (numDocsWithField == 0) {
    meta.writeLong(-2); // docsWithFieldOffset
    meta.writeLong(0L); // docsWithFieldLength
    meta.writeShort((short) -1); // jumpTableEntryCount
    meta.writeByte((byte) -1);   // denseRankPower
  } else if (numDocsWithField == maxDoc) {
    meta.writeLong(-1); // docsWithFieldOffset
    meta.writeLong(0L); // docsWithFieldLength
    meta.writeShort((short) -1); // jumpTableEntryCount
    meta.writeByte((byte) -1);   // denseRankPower
  } else {
    long offset = data.getFilePointer();
    meta.writeLong(offset); // docsWithFieldOffset
    values = valuesProducer.getSorted(field);
    final short jumpTableentryCount = IndexedDISI.writeBitSet(values, data, IndexedDISI.DEFAULT_DENSE_RANK_POWER);
    meta.writeLong(data.getFilePointer() - offset); // docsWithFieldLength
    meta.writeShort(jumpTableentryCount);
    meta.writeByte(IndexedDISI.DEFAULT_DENSE_RANK_POWER);
  }

  meta.writeInt(numDocsWithField);
  if (values.getValueCount() <= 1) {
    meta.writeByte((byte) 0); // bitsPerValue
    meta.writeLong(0L); // ordsOffset
    meta.writeLong(0L); // ordsLength
  } else {
    int numberOfBitsPerOrd = DirectWriter.unsignedBitsRequired(values.getValueCount() - 1);
    meta.writeByte((byte) numberOfBitsPerOrd); // bitsPerValue
    long start = data.getFilePointer();
    meta.writeLong(start); // ordsOffset
    DirectWriter writer = DirectWriter.getInstance(data, numDocsWithField, numberOfBitsPerOrd);
    values = valuesProducer.getSorted(field);
    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
      writer.add(values.ordValue());
    }
    writer.finish();
    meta.writeLong(data.getFilePointer() - start); // ordsLength
  }

  addTermsDict(DocValues.singleton(valuesProducer.getSorted(field)));
}
 
Example 9
@Override
@SuppressWarnings({"rawtypes"})
public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException {
  final int off = readerContext.docBase;
  final LeafReader r;
  Object o = context.get("searcher");
  if (o instanceof SolrIndexSearcher) {
    @SuppressWarnings("resource")  final SolrIndexSearcher is = (SolrIndexSearcher) o;
    SchemaField sf = is.getSchema().getFieldOrNull(field);
    if (sf != null && sf.getType().isPointField()) {
      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
          "rord() is not supported over Points based field " + field);
    }
    if (sf != null && sf.hasDocValues() == false && sf.multiValued() == false && sf.getType().getNumberType() != null) {
      // it's a single-valued numeric field: we must currently create insanity :(
      List<LeafReaderContext> leaves = is.getIndexReader().leaves();
      LeafReader insaneLeaves[] = new LeafReader[leaves.size()];
      int upto = 0;
      for (LeafReaderContext raw : leaves) {
        insaneLeaves[upto++] = Insanity.wrapInsanity(raw.reader(), field);
      }
      r = SlowCompositeReaderWrapper.wrap(new MultiReader(insaneLeaves));
    } else {
      // reuse ordinalmap
      r = ((SolrIndexSearcher)o).getSlowAtomicReader();
    }
  } else {
    IndexReader topReader = ReaderUtil.getTopLevelContext(readerContext).reader();
    r = SlowCompositeReaderWrapper.wrap(topReader);
  }
  // if it's e.g. tokenized/multivalued, emulate old behavior of single-valued fc
  final SortedDocValues sindex = SortedSetSelector.wrap(DocValues.getSortedSet(r, field), SortedSetSelector.Type.MIN);
  final int end = sindex.getValueCount();

  return new IntDocValues(this) {
    @Override
    public int intVal(int doc) throws IOException {
      if (doc+off > sindex.docID()) {
        sindex.advance(doc+off);
      }
      if (doc+off == sindex.docID()) {
        return (end - sindex.ordValue() - 1);
      } else {
        return end;
      }
    }
  };
}