Java Code Examples for org.apache.lucene.index.DocValues#unwrapSingleton()

The following examples show how to use org.apache.lucene.index.DocValues#unwrapSingleton() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LongValueFacetCounts.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void countAllMultiValued(IndexReader reader, String field) throws IOException {

    for (LeafReaderContext context : reader.leaves()) {

      SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
      if (values == null) {
        // this field has no doc values for this segment
        continue;
      }
      NumericDocValues singleValues = DocValues.unwrapSingleton(values);
      if (singleValues != null) {
        countAllOneSegment(singleValues);
      } else {
        int doc;
        while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
          int limit = values.docValueCount();
          totCount += limit;
          for (int i = 0; i < limit; i++) {
            increment(values.nextValue());
          }
        }
      }
    }
  }
 
Example 2
Source File: SortedSetSelector.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Wraps a multi-valued SortedSetDocValues as a single-valued view, using the specified selector */
public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selector) {
  if (sortedSet.getValueCount() >= Integer.MAX_VALUE) {
    throw new UnsupportedOperationException("fields containing more than " + (Integer.MAX_VALUE-1) + " unique terms are unsupported");
  }
  
  SortedDocValues singleton = DocValues.unwrapSingleton(sortedSet);
  if (singleton != null) {
    // it's actually single-valued in practice, but indexed as multi-valued,
    // so just sort on the underlying single-valued dv directly.
    // regardless of selector type, this optimization is safe!
    return singleton;
  } else {
    switch(selector) {
      case MIN: return new MinValue(sortedSet);
      case MAX: return new MaxValue(sortedSet);
      case MIDDLE_MIN: return new MiddleMinValue(sortedSet);
      case MIDDLE_MAX: return new MiddleMaxValue(sortedSet);
      default: 
        throw new AssertionError();
    }
  }
}
 
Example 3
Source File: SortedNumericDVIndexFieldData.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public SortedNumericDoubleValues getDoubleValues() {
    try {
        SortedNumericDocValues raw = DocValues.getSortedNumeric(reader, field);

        NumericDocValues single = DocValues.unwrapSingleton(raw);
        if (single != null) {
            return FieldData.singleton(new SingleFloatValues(single), DocValues.unwrapSingletonBits(raw));
        } else {
            return new MultiFloatValues(raw);
        }
    } catch (IOException e) {
        throw new IllegalStateException("Cannot load doc values", e);
    }
}
 
Example 4
Source File: GeoPointArrayAtomicFieldData.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public MultiGeoPointValues getGeoPointValues() {
    final RandomAccessOrds ords = ordinals.ordinals();
    final SortedDocValues singleOrds = DocValues.unwrapSingleton(ords);
    final GeoPoint point = new GeoPoint(Double.NaN, Double.NaN);
    if (singleOrds != null) {
        final GeoPointValues values = new GeoPointValues() {
            @Override
            public GeoPoint get(int docID) {
                final int ord = singleOrds.getOrd(docID);
                if (ord >= 0) {
                    return point.resetFromIndexHash(indexedPoints.get(ord));
                }
                return point.reset(Double.NaN, Double.NaN);
            }
        };
        return FieldData.singleton(values, DocValues.docsWithValue(singleOrds, maxDoc));
    }
    return new MultiGeoPointValues() {
        @Override
        public GeoPoint valueAt(int index) {
            return point.resetFromIndexHash(indexedPoints.get(ords.ordAt(index)));
        }

        @Override
        public void setDocument(int docId) {
            ords.setDocument(docId);
        }

        @Override
        public int count() {
            return ords.cardinality();
        }
    };
}
 
Example 5
Source File: LongValueFacetCounts.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Counts directly from SortedNumericDocValues. */
private void countMultiValued(String field, List<MatchingDocs> matchingDocs) throws IOException {

  for (MatchingDocs hits : matchingDocs) {
    SortedNumericDocValues values = hits.context.reader().getSortedNumericDocValues(field);
    if (values == null) {
      // this field has no doc values for this segment
      continue;
    }

    NumericDocValues singleValues = DocValues.unwrapSingleton(values);

    if (singleValues != null) {
      countOneSegment(singleValues, hits);
    } else {

      DocIdSetIterator it = ConjunctionDISI.intersectIterators(
                               Arrays.asList(hits.bits.iterator(), values));
    
      for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
        int limit = values.docValueCount();
        totCount += limit;
        for (int i = 0; i < limit; i++) {
          increment(values.nextValue());
        }
      }
    }
  }
}
 
Example 6
Source File: IndexSortSortedNumericDocValuesRangeQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {
  Weight fallbackWeight = fallbackQuery.createWeight(searcher, scoreMode, boost);

  return new ConstantScoreWeight(this, boost) {
    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      SortedNumericDocValues sortedNumericValues = DocValues.getSortedNumeric(context.reader(), field);
      NumericDocValues numericValues = DocValues.unwrapSingleton(sortedNumericValues);

      if (numericValues != null) {
        Sort indexSort = context.reader().getMetaData().getSort();
        if (indexSort != null
            && indexSort.getSort().length > 0
            && indexSort.getSort()[0].getField().equals(field)) {

          SortField sortField = indexSort.getSort()[0];
          DocIdSetIterator disi = getDocIdSetIterator(sortField, context, numericValues);
          return new ConstantScoreScorer(this, score(), scoreMode, disi);
        }
      }
      return fallbackWeight.scorer(context);
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      // Both queries should always return the same values, so we can just check
      // if the fallback query is cacheable.
      return fallbackWeight.isCacheable(ctx);
    }
  };
}
 
Example 7
Source File: SortedNumericSelector.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** 
 * Wraps a multi-valued SortedNumericDocValues as a single-valued view, using the specified selector 
 * and numericType.
 */
public static NumericDocValues wrap(SortedNumericDocValues sortedNumeric, Type selector, SortField.Type numericType) {
  if (numericType != SortField.Type.INT &&
      numericType != SortField.Type.LONG && 
      numericType != SortField.Type.FLOAT &&
      numericType != SortField.Type.DOUBLE) {
    throw new IllegalArgumentException("numericType must be a numeric type");
  }
  final NumericDocValues view;
  NumericDocValues singleton = DocValues.unwrapSingleton(sortedNumeric);
  if (singleton != null) {
    // it's actually single-valued in practice, but indexed as multi-valued,
    // so just sort on the underlying single-valued dv directly.
    // regardless of selector type, this optimization is safe!
    view = singleton;
  } else { 
    switch(selector) {
      case MIN: 
        view = new MinValue(sortedNumeric);
        break;
      case MAX:
        view = new MaxValue(sortedNumeric);
        break;
      default: 
        throw new AssertionError();
    }
  }
  // undo the numericutils sortability
  switch(numericType) {
    case FLOAT:
      return new FilterNumericDocValues(view) {
        @Override
        public long longValue() throws IOException {
          return NumericUtils.sortableFloatBits((int) in.longValue());
        }
      };
    case DOUBLE:
      return new FilterNumericDocValues(view) {
        @Override
        public long longValue() throws IOException {
          return NumericUtils.sortableDoubleBits(in.longValue());
        }
      };
    default:
      return view;
  }
}
 
Example 8
Source File: FacetFieldProcessorByArrayDV.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected void collectDocs() throws IOException {
  int domainSize = fcontext.base.size();

  if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
    return;
  }

  // TODO: refactor some of this logic into a base class
  boolean countOnly = collectAcc==null && allBucketsAcc==null;
  boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();

  // Are we expecting many hits per bucket?
  // FUTURE: pro-rate for nTerms?
  // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
  // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
  // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
  // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
  // this was for heap docvalues produced by UninvertingReader
  // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
  long domainMultiplier = multiValuedField ? 4L : 2L;
  boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);  // +3 to increase test coverage with small tests

  // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
  // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
  // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
  // the docid is not used)
  boolean canDoPerSeg = countOnly && fullRange;
  boolean accumSeg = manyHitsPerBucket && canDoPerSeg;

  if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg;  // internal - override perSeg heuristic

  final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
  Filter filter = fcontext.base.getTopFilter();

  for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
    LeafReaderContext subCtx = leaves.get(subIdx);

    setNextReaderFirstPhase(subCtx);

    DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
    DocIdSetIterator disi = dis.iterator();

    SortedDocValues singleDv = null;
    SortedSetDocValues multiDv = null;
    if (multiValuedField) {
      // TODO: get sub from multi?
      multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
      if (multiDv == null) {
        multiDv = DocValues.emptySortedSet();
      }
      // some codecs may optimize SortedSet storage for single-valued fields
      // this will be null if this is not a wrapped single valued docvalues.
      if (unwrap_singleValued_multiDv) {
        singleDv = DocValues.unwrapSingleton(multiDv);
      }
    } else {
      singleDv = subCtx.reader().getSortedDocValues(sf.getName());
      if (singleDv == null) {
        singleDv = DocValues.emptySorted();
      }
    }

    LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);

    if (singleDv != null) {
      if (accumSeg) {
        collectPerSeg(singleDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(singleDv, disi, toGlobal);
        } else {
          collectDocs(singleDv, disi, toGlobal);
        }
      }
    } else {
      if (accumSeg) {
        collectPerSeg(multiDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(multiDv, disi, toGlobal);
        } else {
          collectDocs(multiDv, disi, toGlobal);
        }
      }
    }
  }

  reuse = null;  // better GC
}