Java Code Examples for org.apache.lucene.index.LeafReader#maxDoc()

The following examples show how to use org.apache.lucene.index.LeafReader#maxDoc() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: Lucene.java From crate with Apache License 2.0

6 votes

DirectoryReaderWithAllLiveDocs(DirectoryReader in) throws IOException {
    super(in, new SubReaderWrapper() {
        @Override
        public LeafReader wrap(LeafReader leaf) {
            SegmentReader segmentReader = segmentReader(leaf);
            Bits hardLiveDocs = segmentReader.getHardLiveDocs();
            if (hardLiveDocs == null) {
                return new LeafReaderWithLiveDocs(leaf, null, leaf.maxDoc());
            }
            // TODO: Can we avoid calculate numDocs by using SegmentReader#getSegmentInfo with LUCENE-8458?
            int numDocs = 0;
            for (int i = 0; i < hardLiveDocs.length(); i++) {
                if (hardLiveDocs.get(i)) {
                    numDocs++;
                }
            }
            return new LeafReaderWithLiveDocs(segmentReader, hardLiveDocs, numDocs);
        }
    });
}

Example 2

Source File: TestGeo3DPoint.java From lucene-solr with Apache License 2.0

6 votes

public static String explain(String fieldName, GeoShape shape, GeoPoint targetDocPoint, GeoPoint scaledDocPoint, IndexReader reader, int docID) throws Exception {

    final XYZBounds bounds = new XYZBounds();
    shape.getBounds(bounds);
    
    // First find the leaf reader that owns this doc:
    int subIndex = ReaderUtil.subIndex(docID, reader.leaves());
    LeafReader leafReader = reader.leaves().get(subIndex).reader();

    StringBuilder b = new StringBuilder();
    b.append("target is in leaf " + leafReader + " of full reader " + reader + "\n");

    DocIdSetBuilder hits = new DocIdSetBuilder(leafReader.maxDoc());
    ExplainingVisitor visitor = new ExplainingVisitor(shape, targetDocPoint, scaledDocPoint,
      new PointInShapeIntersectVisitor(hits, shape, bounds),
      docID - reader.leaves().get(subIndex).docBase, 3, Integer.BYTES, b);

    // Do first phase, where we just figure out the "path" that leads to the target docID:
    leafReader.getPointValues(fieldName).intersect(visitor);

    // Do second phase, where we we see how the wrapped visitor responded along that path:
    visitor.startSecondPhase();
    leafReader.getPointValues(fieldName).intersect(visitor);

    return b.toString();
  }

Example 3

Source File: IndexSizeEstimator.java From lucene-solr with Apache License 2.0

6 votes

private void estimateTermVectors(Map<String, Object> result) throws IOException {
  log.info("- estimating term vectors...");
  Map<String, Map<String, Object>> stats = new HashMap<>();
  for (LeafReaderContext leafReaderContext : reader.leaves()) {
    LeafReader leafReader = leafReaderContext.reader();
    Bits liveDocs = leafReader.getLiveDocs();
    for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
      if (liveDocs != null && !liveDocs.get(docId)) {
        continue;
      }
      Fields termVectors = leafReader.getTermVectors(docId);
      if (termVectors == null) {
        continue;
      }
      for (String field : termVectors) {
        Terms terms = termVectors.terms(field);
        if (terms == null) {
          continue;
        }
        estimateTermStats(field, terms, stats, true);
      }
    }
  }
  result.put(TERM_VECTORS, stats);
}

Example 4

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

6 votes

@Override
public Bits getDocsWithField(LeafReader reader, String field, Parser parser) throws IOException {
  final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
  if (fieldInfo == null) {
    // field does not exist or has no value
    return new Bits.MatchNoBits(reader.maxDoc());
  } 
  
  if (fieldInfo.getDocValuesType() != DocValuesType.NONE) {
    // doc values case
  } else if (parser instanceof PointParser) {
    // points case
  } else {
    // postings case
    if (fieldInfo.getIndexOptions() == IndexOptions.NONE) {
      return new Bits.MatchNoBits(reader.maxDoc());
    }
  }
  BitsEntry bitsEntry = (BitsEntry) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, parser));
  return bitsEntry.bits;
}

Example 5

Source File: AbstractAuthoritySetQuery.java From SearchServices with GNU Lesser General Public License v3.0

6 votes

protected BitsFilter getACLFilter(String[] auths, String field, SolrIndexSearcher searcher) throws IOException
{
    HybridBitSet aclBits = getACLSet(auths, field, searcher);
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    List<FixedBitSet> bitSets = new ArrayList<FixedBitSet>(leaves.size());

    for(LeafReaderContext readerContext :  leaves)
    {
    	LeafReader reader = readerContext.reader();
        int maxDoc = reader.maxDoc();
        FixedBitSet bits = new FixedBitSet(maxDoc);
        bitSets.add(bits);

        NumericDocValues fieldValues = DocValuesCache.getNumericDocValues(QueryConstants.FIELD_ACLID, reader);
        if (fieldValues != null) {
            for (int i = 0; i < maxDoc; i++) {
                long aclID = fieldValues.get(i);
                if (aclBits.get(aclID)) {
                    bits.set(i);
                }
            }
        }
    }

    return new BitsFilter(bitSets);
}

Example 6

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

5 votes

private void assertDVAdvance(Directory dir, int jumpStep) throws IOException {
  DirectoryReader ir = DirectoryReader.open(dir);
  TestUtil.checkReader(ir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();


    for (int jump = jumpStep; jump < r.maxDoc(); jump += jumpStep) {
      // Create a new instance each time to ensure jumps from the beginning
      NumericDocValues docValues = DocValues.getNumeric(r, "dv");
      for (int docID = 0; docID < r.maxDoc(); docID += jump) {
        String base = "document #" + docID + "/" + r.maxDoc() + ", jumping " + jump + " from #" + (docID-jump);
        String storedValue = r.document(docID).get("stored");
        if (storedValue == null) {
          assertFalse("There should be no DocValue for " + base,
              docValues.advanceExact(docID));
        } else {
          assertTrue("There should be a DocValue for " + base,
              docValues.advanceExact(docID));
          assertEquals("The doc value should be correct for " + base,
              Long.parseLong(storedValue), docValues.longValue());
        }
      }
    }
  }
  ir.close();
}

Example 7

Source File: IndexSizeEstimator.java From lucene-solr with Apache License 2.0

5 votes

private void estimateStoredFields(Map<String, Object> result) throws IOException {
  log.info("- estimating stored fields...");
  Map<String, Map<String, Object>> stats = new HashMap<>();
  for (LeafReaderContext context : reader.leaves()) {
    LeafReader leafReader = context.reader();
    EstimatingVisitor visitor = new EstimatingVisitor(stats, topN, maxLength, samplingStep);
    Bits liveDocs = leafReader.getLiveDocs();
    if (leafReader instanceof CodecReader) {
      CodecReader codecReader = (CodecReader)leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      // this instance may be faster for a full sequential pass
      StoredFieldsReader mergeInstance = storedFieldsReader.getMergeInstance();
      for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        mergeInstance.visitDocument(docId, visitor);
      }
      if (mergeInstance != storedFieldsReader) {
        mergeInstance.close();
      }
    } else {
      for (int docId = 0; docId < leafReader.maxDoc(); docId += samplingStep) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        leafReader.document(docId, visitor);
      }
    }
  }
  result.put(STORED_FIELDS, stats);
}

Example 8

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

5 votes

private BitsEntry createValueDocValues(LeafReader reader, String field) throws IOException {
  FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
  
  DocValuesType dvType = fieldInfo.getDocValuesType();
  DocIdSetIterator iterator;
  switch(dvType) {
  case NUMERIC:
    iterator = reader.getNumericDocValues(field);
    break;
  case BINARY:
    iterator = reader.getBinaryDocValues(field);
    break;
  case SORTED:
    iterator = reader.getSortedDocValues(field);
    break;
  case SORTED_NUMERIC:
    iterator = reader.getSortedNumericDocValues(field);
    break;
  case SORTED_SET:
    iterator = reader.getSortedSetDocValues(field);
    break;
  default:
    throw new AssertionError();
  }

  FixedBitSet bits = new FixedBitSet(reader.maxDoc());
  while (true) {
    int docID = iterator.nextDoc();
    if (docID == DocIdSetIterator.NO_MORE_DOCS) {
      break;
    }
    bits.set(docID);
  }

  return new BitsEntry(bits);
}

Example 9

Source File: ShapeQuery.java From lucene-solr with Apache License 2.0

5 votes

private Scorer getContainsDenseScorer(LeafReader reader, Weight weight, final float boost, ScoreMode scoreMode) throws IOException {
  final FixedBitSet result = new FixedBitSet(reader.maxDoc());
  final long[] cost = new long[]{0};
  // Get potential  documents.
  final FixedBitSet excluded = new FixedBitSet(reader.maxDoc());
  values.intersect(getContainsDenseVisitor(query, result, excluded, cost));
  result.andNot(excluded);
  final DocIdSetIterator iterator = new BitSetIterator(result, cost[0]);
  return new ConstantScoreScorer(weight, boost, scoreMode, iterator);
}

Example 10

Source File: TermFilteredPresearcher.java From lucene-solr with Apache License 2.0

5 votes

private Query buildFilterClause(LeafReader reader, String field) throws IOException {

    Terms terms = reader.terms(field);
    if (terms == null)
      return null;

    BooleanQuery.Builder bq = new BooleanQuery.Builder();

    int docsInBatch = reader.maxDoc();

    BytesRef term;
    TermsEnum te = terms.iterator();
    while ((term = te.next()) != null) {
      // we need to check that every document in the batch has the same field values, otherwise
      // this filtering will not work
      if (te.docFreq() != docsInBatch)
        throw new IllegalArgumentException("Some documents in this batch do not have a term value of "
            + field + ":" + Term.toString(term));
      bq.add(new TermQuery(new Term(field, BytesRef.deepCopyOf(term))), BooleanClause.Occur.SHOULD);
    }

    BooleanQuery built = bq.build();

    if (built.clauses().size() == 0)
      return null;

    return built;
  }

Example 11

Source File: DirectoryTaxonomyWriter.java From lucene-solr with Apache License 2.0

5 votes

/**
 * Takes the categories from the given taxonomy directory, and adds the
 * missing ones to this taxonomy. Additionally, it fills the given
 * {@link OrdinalMap} with a mapping from the original ordinal to the new
 * ordinal.
 */
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
  ensureOpen();
  DirectoryReader r = DirectoryReader.open(taxoDir);
  try {
    final int size = r.numDocs();
    final OrdinalMap ordinalMap = map;
    ordinalMap.setSize(size);
    int base = 0;
    PostingsEnum docs = null;
    for (final LeafReaderContext ctx : r.leaves()) {
      final LeafReader ar = ctx.reader();
      final Terms terms = ar.terms(Consts.FULL);
      // TODO: share per-segment TermsEnum here!
      TermsEnum te = terms.iterator();
      while (te.next() != null) {
        FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
        final int ordinal = addCategory(cp);
        docs = te.postings(docs, PostingsEnum.NONE);
        ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
      }
      base += ar.maxDoc(); // no deletions, so we're ok
    }
    ordinalMap.addDone();
  } finally {
    r.close();
  }
}

Example 12

Source File: AbstractPrefixTreeQuery.java From lucene-solr with Apache License 2.0

5 votes

public BaseTermsEnumTraverser(LeafReaderContext context) throws IOException {
  this.context = context;
  LeafReader reader = context.reader();
  this.maxDoc = reader.maxDoc();
  terms = reader.terms(fieldName);
  if (terms != null) {
    this.termsEnum = terms.iterator();
  } else {
    this.termsEnum = null;
  }
}

Example 13

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

4 votes

private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  
  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier values = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    
    int valueCount = (int) counts.getAsLong();
    long valueArray[] = new long[valueCount];
    for (int j = 0; j < valueCount; j++) {
      long value = values.getAsLong();
      valueArray[j] = value;
      doc.add(new SortedNumericDocValuesField("dv", value));
    }
    Arrays.sort(valueArray);
    for (int j = 0; j < valueCount; j++) {
      doc.add(new StoredField("stored", Long.toString(valueArray[j])));
    }
    writer.addDocument(doc);
    if (random().nextInt(31) == 0) {
      writer.commit();
    }
  }
  writer.forceMerge(1);

  writer.close();
  
  // compare
  DirectoryReader ir = DirectoryReader.open(dir);
  TestUtil.checkReader(ir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();
    SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
    for (int i = 0; i < r.maxDoc(); i++) {
      if (i > docValues.docID()) {
        docValues.nextDoc();
      }
      String expected[] = r.document(i).getValues("stored");
      if (i < docValues.docID()) {
        assertEquals(0, expected.length);
      } else {
        String actual[] = new String[docValues.docValueCount()];
        for (int j = 0; j < actual.length; j++) {
          actual[j] = Long.toString(docValues.nextValue());
        }
        assertArrayEquals(expected, actual);
      }
    }
  }
  ir.close();
  dir.close();
}

Example 14

Source File: PointInSetQuery.java From lucene-solr with Apache License 2.0

4 votes

@Override
public final Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

  // We don't use RandomAccessWeight here: it's no good to approximate with "match all docs".
  // This is an inverted structure and should be used in the first pass:

  return new ConstantScoreWeight(this, boost) {

    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      LeafReader reader = context.reader();

      PointValues values = reader.getPointValues(field);
      if (values == null) {
        // No docs in this segment/field indexed any points
        return null;
      }

      if (values.getNumIndexDimensions() != numDims) {
        throw new IllegalArgumentException("field=\"" + field + "\" was indexed with numIndexDims=" + values.getNumIndexDimensions() + " but this query has numIndexDims=" + numDims);
      }
      if (values.getBytesPerDimension() != bytesPerDim) {
        throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + values.getBytesPerDimension() + " but this query has bytesPerDim=" + bytesPerDim);
      }

      DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);

      if (numDims == 1) {

        // We optimize this common case, effectively doing a merge sort of the indexed values vs the queried set:
        values.intersect(new MergePointVisitor(sortedPackedPoints, result));

      } else {
        // NOTE: this is naive implementation, where for each point we re-walk the KD tree to intersect.  We could instead do a similar
        // optimization as the 1D case, but I think it'd mean building a query-time KD tree so we could efficiently intersect against the
        // index, which is probably tricky!
        SinglePointVisitor visitor = new SinglePointVisitor(result);
        TermIterator iterator = sortedPackedPoints.iterator();
        for (BytesRef point = iterator.next(); point != null; point = iterator.next()) {
          visitor.setPoint(point);
          values.intersect(visitor);
        }
      }

      return new ConstantScoreScorer(this, score(), scoreMode, result.build().iterator());
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return true;
    }

  };
}

Example 15

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

4 votes

@Override
protected Accountable createValue(LeafReader reader, CacheKey key)
    throws IOException {

  final int maxDoc = reader.maxDoc();

  Terms terms = reader.terms(key.field);

  final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

  final PagedBytes bytes = new PagedBytes(15);

  int startTermsBPV;

  // TODO: use Uninvert?
  if (terms != null) {
    // Try for coarse estimate for number of bits; this
    // should be an underestimate most of the time, which
    // is fine -- GrowableWriter will reallocate as needed
    long numUniqueTerms = terms.size();
    if (numUniqueTerms != -1L) {
      if (numUniqueTerms > maxDoc) {
        throw new IllegalStateException("Type mismatch: " + key.field + " was indexed with multiple values per document, use SORTED_SET instead");
      }

      startTermsBPV = PackedInts.bitsRequired(numUniqueTerms);
    } else {
      startTermsBPV = 1;
    }
  } else {
    startTermsBPV = 1;
  }

  PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
  final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio);

  int termOrd = 0;

  // TODO: use Uninvert?

  if (terms != null) {
    final TermsEnum termsEnum = terms.iterator();
    PostingsEnum docs = null;

    while(true) {
      final BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      if (termOrd >= maxDoc) {
        throw new IllegalStateException("Type mismatch: " + key.field + " was indexed with multiple values per document, use SORTED_SET instead");
      }

      termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term));
      docs = termsEnum.postings(docs, PostingsEnum.NONE);
      while (true) {
        final int docID = docs.nextDoc();
        if (docID == DocIdSetIterator.NO_MORE_DOCS) {
          break;
        }
        // Store 1+ ord into packed bits
        docToTermOrd.set(docID, 1+termOrd);
      }
      termOrd++;
    }
  }

  // maybe an int-only impl?
  return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.build(), docToTermOrd.getMutable(), termOrd);
}

Example 16

Source File: FieldCacheImpl.java From lucene-solr with Apache License 2.0

4 votes

@Override
protected Accountable createValue(LeafReader reader, CacheKey key)
    throws IOException {

  // TODO: would be nice to first check if DocTermsIndex
  // was already cached for this field and then return
  // that instead, to avoid insanity

  final int maxDoc = reader.maxDoc();
  Terms terms = reader.terms(key.field);

  final float acceptableOverheadRatio = ((Float) key.custom).floatValue();

  final int termCountHardLimit = maxDoc;

  // Holds the actual term data, expanded.
  final PagedBytes bytes = new PagedBytes(15);

  int startBPV;

  if (terms != null) {
    // Try for coarse estimate for number of bits; this
    // should be an underestimate most of the time, which
    // is fine -- GrowableWriter will reallocate as needed
    long numUniqueTerms = terms.size();
    if (numUniqueTerms != -1L) {
      if (numUniqueTerms > termCountHardLimit) {
        numUniqueTerms = termCountHardLimit;
      }
      startBPV = PackedInts.bitsRequired(numUniqueTerms*4);
    } else {
      startBPV = 1;
    }
  } else {
    startBPV = 1;
  }

  final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio);
  
  // pointer==0 means not set
  bytes.copyUsingLengthPrefix(new BytesRef());

  if (terms != null) {
    int termCount = 0;
    final TermsEnum termsEnum = terms.iterator();
    PostingsEnum docs = null;
    while(true) {
      if (termCount++ == termCountHardLimit) {
        // app is misusing the API (there is more than
        // one term per doc); in this case we make best
        // effort to load what we can (see LUCENE-2142)
        break;
      }

      final BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }
      final long pointer = bytes.copyUsingLengthPrefix(term);
      docs = termsEnum.postings(docs, PostingsEnum.NONE);
      while (true) {
        final int docID = docs.nextDoc();
        if (docID == DocIdSetIterator.NO_MORE_DOCS) {
          break;
        }
        docToOffset.set(docID, pointer);
      }
    }
  }

  final PackedInts.Reader offsetReader = docToOffset.getMutable();
  Bits docsWithField = new Bits() {
    @Override
    public boolean get(int index) {
      return offsetReader.get(index) != 0;
    }

    @Override
    public int length() {
      return maxDoc;
    }
  };

  wrapper.setDocsWithField(reader, key.field, docsWithField, null);
  // maybe an int-only impl?
  return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader, docsWithField);
}

Example 17

Source File: DocTermOrds.java From lucene-solr with Apache License 2.0

4 votes

Iterator(LeafReader reader) throws IOException {
  this.reader = reader;
  this.maxDoc = reader.maxDoc();
  this.te = termsEnum();
}

Example 18

Source File: PointInGeo3DShapeQuery.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

  // I don't use RandomAccessWeight here: it's no good to approximate with "match all docs"; this is an inverted structure and should be
  // used in the first pass:

  return new ConstantScoreWeight(this, boost) {

    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      LeafReader reader = context.reader();
      PointValues values = reader.getPointValues(field);
      if (values == null) {
        return null;
      }

      /*
      XYZBounds bounds = new XYZBounds();
      shape.getBounds(bounds);

      final double planetMax = planetModel.getMaximumMagnitude();
      if (planetMax != treeDV.planetMax) {
        throw new IllegalStateException(planetModel + " is not the same one used during indexing: planetMax=" + planetMax + " vs indexing planetMax=" + treeDV.planetMax);
      }
      */

      /*
      GeoArea xyzSolid = GeoAreaFactory.makeGeoArea(planetModel,
                                                    bounds.getMinimumX(),
                                                    bounds.getMaximumX(),
                                                    bounds.getMinimumY(),
                                                    bounds.getMaximumY(),
                                                    bounds.getMinimumZ(),
                                                    bounds.getMaximumZ());

      assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid;
      */

      DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);

      values.intersect(new PointInShapeIntersectVisitor(result, shape, shapeBounds));

      return new ConstantScoreScorer(this, score(), scoreMode, result.build().iterator());
    }

    @Override
    public boolean isCacheable(LeafReaderContext ctx) {
      return true;
    }

  };
}

Example 19

Source File: IndexSizeEstimatorTest.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testEstimator() throws Exception {
  JettySolrRunner jetty = cluster.getRandomJetty(random());
  String randomCoreName = jetty.getCoreContainer().getAllCoreNames().iterator().next();
  SolrCore core = jetty.getCoreContainer().getCore(randomCoreName);
  RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
  try {
    SolrIndexSearcher searcher = searcherRef.get();
    // limit the max length
    IndexSizeEstimator estimator = new IndexSizeEstimator(searcher.getRawReader(), 20, 50, true, true);
    IndexSizeEstimator.Estimate estimate = estimator.estimate();
    Map<String, Long> fieldsBySize = estimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", fieldsBySize.isEmpty());
    assertEquals(fieldsBySize.toString(), fields.size(), fieldsBySize.size());
    fieldsBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Long> typesBySize = estimate.getTypesBySize();
    assertFalse("empty typesBySize", typesBySize.isEmpty());
    assertTrue("expected at least 8 types: " + typesBySize.toString(), typesBySize.size() >= 8);
    typesBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Object> summary = estimate.getSummary();
    assertNotNull("summary", summary);
    assertFalse("empty summary", summary.isEmpty());
    assertEquals(summary.keySet().toString(), fields.size(), summary.keySet().size());
    Map<String, Object> details = estimate.getDetails();
    assertNotNull("details", details);
    assertFalse("empty details", details.isEmpty());
    // by type
    assertEquals(details.keySet().toString(), 6, details.keySet().size());

    // check sampling
    estimator.setSamplingThreshold(searcher.getRawReader().maxDoc() / 2);
    IndexSizeEstimator.Estimate sampledEstimate = estimator.estimate();
    Map<String, Long> sampledFieldsBySize = sampledEstimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", sampledFieldsBySize.isEmpty());
    // verify that the sampled values are within 50% of the original values
    fieldsBySize.forEach((field, size) -> {
      Long sampledSize = sampledFieldsBySize.get(field);
      assertNotNull("sampled size for " + field + " is missing in " + sampledFieldsBySize, sampledSize);
      double delta = (double) size * 0.5;
      assertEquals("sampled size of " + field + " is wildly off", (double)size, (double)sampledSize, delta);
    });
    // verify the reader is still usable - SOLR-13694
    IndexReader reader = searcher.getRawReader();
    for (LeafReaderContext context : reader.leaves()) {
      LeafReader leafReader = context.reader();
      assertTrue("unexpected LeafReader class: " + leafReader.getClass().getName(), leafReader instanceof CodecReader);
      Bits liveDocs = leafReader.getLiveDocs();
      CodecReader codecReader = (CodecReader) leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      StoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
      assertNotNull(storedFieldsReader);
      for (int docId = 0; docId < leafReader.maxDoc(); docId++) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        storedFieldsReader.visitDocument(docId, visitor);
      }
    }
  } finally {
    searcherRef.decref();
    core.close();
  }
}

Example 20

Source File: DocValuesCache.java From SearchServices with GNU Lesser General Public License v3.0

4 votes

public static synchronized NumericDocValues getNumericDocValues(String field, LeafReader reader) throws IOException
{
    WeakHashMap<Object, NumericDocValues> fieldCache = cache.get(field);

    if(fieldCache == null)
    {
        fieldCache = new WeakHashMap<Object, NumericDocValues>();
        cache.put(field, fieldCache);
    }

    Object cacheKey = reader.getCoreCacheKey();
    NumericDocValues cachedValues = fieldCache.get(cacheKey);

    if(cachedValues == null)
    {
        NumericDocValues fieldValues = reader.getNumericDocValues(field);
        if(fieldValues == null)
        {
            return null;
        }
        else
        {
            int maxDoc = reader.maxDoc();
            boolean longs = false;
            int[] intValues = new int[maxDoc]; //Always start off with an int array.
            SettableDocValues settableValues = new IntValues(intValues);

            for(int i=0; i<maxDoc; i++)
            {
                long value = fieldValues.get(i);
                if(value > Integer.MAX_VALUE && !longs)
                {
                    longs = true;
                    settableValues = new LongValues(intValues);
                }

                settableValues.set(i, value);
            }
            fieldCache.put(cacheKey, settableValues);
            return settableValues;
        }
    }
    else
    {
        return cachedValues;
    }
}