Java Code Examples for org.apache.lucene.index.DocValues#emptySorted()

The following examples show how to use org.apache.lucene.index.DocValues#emptySorted() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ToParentBlockJoinSortField.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private FieldComparator<?> getStringComparator(int numHits) {
  return new FieldComparator.TermOrdValComparator(numHits, getField(), missingValue == STRING_LAST) {

    @Override
    protected SortedDocValues getSortedDocValues(LeafReaderContext context, String field) throws IOException {
      SortedSetDocValues sortedSet = DocValues.getSortedSet(context.reader(), field);
      final BlockJoinSelector.Type type = order
          ? BlockJoinSelector.Type.MAX
          : BlockJoinSelector.Type.MIN;
      final BitSet parents = parentFilter.getBitSet(context);
      final BitSet children = childFilter.getBitSet(context);
      if (children == null) {
        return DocValues.emptySorted();
      }
      return BlockJoinSelector.wrap(sortedSet, type, parents, toIter(children));
    }

  };
}
 
Example 2
Source File: FieldCacheImpl.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public SortedDocValues getTermsIndex(LeafReader reader, String field, float acceptableOverheadRatio) throws IOException {
  SortedDocValues valuesIn = reader.getSortedDocValues(field);
  if (valuesIn != null) {
    // Not cached here by FieldCacheImpl (cached instead
    // per-thread by SegmentReader):
    return valuesIn;
  } else {
    final FieldInfo info = reader.getFieldInfos().fieldInfo(field);
    if (info == null) {
      return DocValues.emptySorted();
    } else if (info.getDocValuesType() != DocValuesType.NONE) {
      // we don't try to build a sorted instance from numeric/binary doc
      // values because dedup can be very costly
      throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType());
    } else if (info.getIndexOptions() == IndexOptions.NONE) {
      return DocValues.emptySorted();
    }
    SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
    return impl.iterator();
  }
}
 
Example 3
Source File: AbstractAtomicParentChildFieldData.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
public static AtomicParentChildFieldData empty() {
    return new AbstractAtomicParentChildFieldData() {

        @Override
        public long ramBytesUsed() {
            return 0;
        }
        
        @Override
        public Collection<Accountable> getChildResources() {
            return Collections.emptyList();
        }

        @Override
        public void close() {
        }

        @Override
        public SortedDocValues getOrdinalsValues(String type) {
            return DocValues.emptySorted();
        }

        @Override
        public Set<String> types() {
            return ImmutableSet.of();
        }
    };
}
 
Example 4
Source File: ParentChildAtomicFieldData.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public SortedDocValues getOrdinalsValues(String type) {
    AtomicOrdinalsFieldData atomicFieldData = typeToIds.get(type);
    if (atomicFieldData != null) {
        return MultiValueMode.MIN.select(atomicFieldData.getOrdinalsValues());
    } else {
        return DocValues.emptySorted();
    }
}
 
Example 5
Source File: FieldUtil.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public static SortedDocValues getSortedDocValues(QueryContext context, SchemaField field, QParser qparser) throws IOException {
  SortedDocValues si = context.searcher().getSlowAtomicReader().getSortedDocValues( field.getName() );
  // if (!field.hasDocValues() && (field.getType() instanceof StrField || field.getType() instanceof TextField)) {
  // }

  return si == null ? DocValues.emptySorted() : si;
}
 
Example 6
Source File: FacetFieldProcessorByArrayDV.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected void collectDocs() throws IOException {
  int domainSize = fcontext.base.size();

  if (nTerms <= 0 || domainSize < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
    return;
  }

  // TODO: refactor some of this logic into a base class
  boolean countOnly = collectAcc==null && allBucketsAcc==null;
  boolean fullRange = startTermIndex == 0 && endTermIndex == si.getValueCount();

  // Are we expecting many hits per bucket?
  // FUTURE: pro-rate for nTerms?
  // FUTURE: better take into account number of values in multi-valued fields.  This info is available for indexed fields.
  // FUTURE: take into account that bigger ord maps are more expensive than smaller ones
  // One test: 5M doc index, faceting on a single-valued field with almost 1M unique values, crossover point where global counting was slower
  // than per-segment counting was a domain of 658k docs.  At that point, top 10 buckets had 6-7 matches each.
  // this was for heap docvalues produced by UninvertingReader
  // Since these values were randomly distributed, lets round our domain multiplier up to account for less random real world data.
  long domainMultiplier = multiValuedField ? 4L : 2L;
  boolean manyHitsPerBucket = domainSize * domainMultiplier > (si.getValueCount() + 3);  // +3 to increase test coverage with small tests

  // If we're only calculating counts, we're not prefixing, and we expect to collect many documents per unique value,
  // then collect per-segment before mapping to global ords at the end.  This will save redundant seg->global ord mappings.
  // FUTURE: there are probably some other non "countOnly" cases where we can use this as well (i.e. those where
  // the docid is not used)
  boolean canDoPerSeg = countOnly && fullRange;
  boolean accumSeg = manyHitsPerBucket && canDoPerSeg;

  if (freq.perSeg != null) accumSeg = canDoPerSeg && freq.perSeg;  // internal - override perSeg heuristic

  final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
  Filter filter = fcontext.base.getTopFilter();

  for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
    LeafReaderContext subCtx = leaves.get(subIdx);

    setNextReaderFirstPhase(subCtx);

    DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
    DocIdSetIterator disi = dis.iterator();

    SortedDocValues singleDv = null;
    SortedSetDocValues multiDv = null;
    if (multiValuedField) {
      // TODO: get sub from multi?
      multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
      if (multiDv == null) {
        multiDv = DocValues.emptySortedSet();
      }
      // some codecs may optimize SortedSet storage for single-valued fields
      // this will be null if this is not a wrapped single valued docvalues.
      if (unwrap_singleValued_multiDv) {
        singleDv = DocValues.unwrapSingleton(multiDv);
      }
    } else {
      singleDv = subCtx.reader().getSortedDocValues(sf.getName());
      if (singleDv == null) {
        singleDv = DocValues.emptySorted();
      }
    }

    LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);

    if (singleDv != null) {
      if (accumSeg) {
        collectPerSeg(singleDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(singleDv, disi, toGlobal);
        } else {
          collectDocs(singleDv, disi, toGlobal);
        }
      }
    } else {
      if (accumSeg) {
        collectPerSeg(multiDv, disi, toGlobal);
      } else {
        if (canDoPerSeg && toGlobal != null) {
          collectCounts(multiDv, disi, toGlobal);
        } else {
          collectDocs(multiDv, disi, toGlobal);
        }
      }
    }
  }

  reuse = null;  // better GC
}