Java Code Examples for org.apache.lucene.index.IndexReader#leaves()

The following examples show how to use org.apache.lucene.index.IndexReader#leaves() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: crate   File: VersionsAndSeqNoResolver.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Load the internal doc ID and version for the uid from the reader, returning<ul>
 * <li>null if the uid wasn't found,
 * <li>a doc ID and a version otherwise
 * </ul>
 */
public static DocIdAndVersion loadDocIdAndVersion(IndexReader reader, Term term, boolean loadSeqNo) throws IOException {
    PerThreadIDVersionAndSeqNoLookup[] lookups = getLookupState(reader, term.field());
    List<LeafReaderContext> leaves = reader.leaves();
    // iterate backwards to optimize for the frequently updated documents
    // which are likely to be in the last segments
    for (int i = leaves.size() - 1; i >= 0; i--) {
        final LeafReaderContext leaf = leaves.get(i);
        PerThreadIDVersionAndSeqNoLookup lookup = lookups[leaf.ord];
        DocIdAndVersion result = lookup.lookupVersion(term.bytes(), loadSeqNo, leaf);
        if (result != null) {
            return result;
        }
    }
    return null;
}
 
Example 2
private IterableRow getIterableRow(String rowId, IndexSearcherCloseable searcher) throws IOException {
  IndexReader indexReader = searcher.getIndexReader();
  BytesRef rowIdRef = new BytesRef(rowId);
  List<AtomicReaderTermsEnum> possibleRowIds = new ArrayList<AtomicReaderTermsEnum>();
  for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
    AtomicReader atomicReader = atomicReaderContext.reader();
    Fields fields = atomicReader.fields();
    if (fields == null) {
      continue;
    }
    Terms terms = fields.terms(BlurConstants.ROW_ID);
    if (terms == null) {
      continue;
    }
    TermsEnum termsEnum = terms.iterator(null);
    if (!termsEnum.seekExact(rowIdRef, true)) {
      continue;
    }
    // need atomic read as well...
    possibleRowIds.add(new AtomicReaderTermsEnum(atomicReader, termsEnum));
  }
  if (possibleRowIds.isEmpty()) {
    return null;
  }
  return new IterableRow(rowId, getRecords(possibleRowIds));
}
 
Example 3
private void countAllMultiValued(IndexReader reader, String field) throws IOException {

    for (LeafReaderContext context : reader.leaves()) {

      SortedNumericDocValues values = context.reader().getSortedNumericDocValues(field);
      if (values == null) {
        // this field has no doc values for this segment
        continue;
      }
      NumericDocValues singleValues = DocValues.unwrapSingleton(values);
      if (singleValues != null) {
        countAllOneSegment(singleValues);
      } else {
        int doc;
        while ((doc = values.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
          int limit = values.docValueCount();
          totCount += limit;
          for (int i = 0; i < limit; i++) {
            increment(values.nextValue());
          }
        }
      }
    }
  }
 
Example 4
Source Project: crate   File: Engine.java    License: Apache License 2.0 6 votes vote down vote up
protected final DocsStats docsStats(IndexReader indexReader) {
    long numDocs = 0;
    long numDeletedDocs = 0;
    long sizeInBytes = 0;
    // we don't wait for a pending refreshes here since it's a stats call instead we mark it as accessed only which will cause
    // the next scheduled refresh to go through and refresh the stats as well
    for (LeafReaderContext readerContext : indexReader.leaves()) {
        // we go on the segment level here to get accurate numbers
        final SegmentReader segmentReader = Lucene.segmentReader(readerContext.reader());
        SegmentCommitInfo info = segmentReader.getSegmentInfo();
        numDocs += readerContext.reader().numDocs();
        numDeletedDocs += readerContext.reader().numDeletedDocs();
        try {
            sizeInBytes += info.sizeInBytes();
        } catch (IOException e) {
            logger.trace(() -> new ParameterizedMessage("failed to get size for [{}]", info.info.name), e);
        }
    }
    return new DocsStats(numDocs, numDeletedDocs, sizeInBytes);
}
 
Example 5
Source Project: lucene-solr   File: NearestFuzzyQuery.java    License: Apache License 2.0 6 votes vote down vote up
private Query newTermQuery(IndexReader reader, Term term) throws IOException {
  // we build an artificial TermStates that will give an overall df and ttf
  // equal to 1
  TermStates termStates = new TermStates(reader.getContext());
  for (LeafReaderContext leafContext : reader.leaves()) {
    Terms terms = leafContext.reader().terms(term.field());
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator();
      if (termsEnum.seekExact(term.bytes())) {
        int freq = 1 - termStates.docFreq(); // we want the total df and ttf to be 1
        termStates.register(termsEnum.termState(), leafContext.ord, freq, freq);
      }
    }
  }
  return new TermQuery(term, termStates);
}
 
Example 6
/**
 * Given an IndexReader, asserts that there is at least one AtomcReader leaf,
 * and that all LeafReader leaves are SegmentReader's that have a compound 
 * file status that matches the expected input.
 */
private static void assertCompoundSegments(IndexReader reader, 
                                           boolean compound) {

  assertNotNull("Null leaves", reader.leaves());
  assertTrue("no leaves", 0 < reader.leaves().size());

  for (LeafReaderContext atomic : reader.leaves()) {
    assertTrue("not a segment reader: " + atomic.reader().toString(), 
               atomic.reader() instanceof SegmentReader);
    
    assertEquals("Compound status incorrect for: " + 
                 atomic.reader().toString(),
                 compound,
                 ((SegmentReader)atomic.reader()).getSegmentInfo().info.getUseCompoundFile());
  }
}
 
Example 7
/**
 * Returns a Document representing the specified document ID (combination of resource and context), or null when no
 * such Document exists yet.
 */
private Document getDocument(Term idTerm) throws IOException {
	IndexReader reader = getIndexReader();
	List<LeafReaderContext> leaves = reader.leaves();
	int size = leaves.size();
	for (int i = 0; i < size; i++) {
		LeafReader lreader = leaves.get(i).reader();
		Document document = getDocument(lreader, idTerm);
		if (document != null) {
			return document;
		}
	}
	// no such Document
	return null;
}
 
Example 8
/**
 * Returns a list of Documents representing the specified Resource (empty when no such Document exists yet). Each
 * document represent a set of statements with the specified Resource as a subject, which are stored in a specific
 * context
 */
private List<Document> getDocuments(Term uriTerm) throws IOException {
	List<Document> result = new ArrayList<>();

	IndexReader reader = getIndexReader();
	List<LeafReaderContext> leaves = reader.leaves();
	int size = leaves.size();
	for (int i = 0; i < size; i++) {
		LeafReader lreader = leaves.get(i).reader();
		addDocuments(lreader, uriTerm, result);
	}

	return result;
}
 
Example 9
Source Project: lucene-solr   File: CommonTermsQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Query rewrite(IndexReader reader) throws IOException {
  if (this.terms.isEmpty()) {
    return new MatchNoDocsQuery("CommonTermsQuery with no terms");
  } else if (this.terms.size() == 1) {
    return newTermQuery(this.terms.get(0), null);
  }
  final List<LeafReaderContext> leaves = reader.leaves();
  final int maxDoc = reader.maxDoc();
  final TermStates[] contextArray = new TermStates[terms.size()];
  final Term[] queryTerms = this.terms.toArray(new Term[0]);
  collectTermStates(reader, leaves, contextArray, queryTerms);
  return buildQuery(maxDoc, contextArray, queryTerms);
}
 
Example 10
private void countAll(LongValuesSource valueSource, String field, IndexReader reader) throws IOException {

    for (LeafReaderContext context : reader.leaves()) {
      LongValues fv = valueSource.getValues(context, null);
      int maxDoc = context.reader().maxDoc();

      for (int doc = 0; doc < maxDoc; doc++) {
        // Skip missing docs:
        if (fv.advanceExact(doc)) {
          increment(fv.longValue());
          totCount++;
        }
      }
    }
  }
 
Example 11
Source Project: lucene-solr   File: CheckJoinIndex.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Check that the given index is good to use for block joins.
 * @throws IllegalStateException if the index does not have an appropriate structure
 */
public static void check(IndexReader reader, BitSetProducer parentsFilter) throws IOException {
  for (LeafReaderContext context : reader.leaves()) {
    if (context.reader().maxDoc() == 0) {
      continue;
    }
    final BitSet parents = parentsFilter.getBitSet(context);
    if (parents == null || parents.cardinality() == 0) {
      throw new IllegalStateException("Every segment should have at least one parent, but " + context.reader() + " does not have any");
    }
    if (parents.get(context.reader().maxDoc() - 1) == false) {
      throw new IllegalStateException("The last document of a segment must always be a parent, but " + context.reader() + " has a child as a last doc");
    }
    final Bits liveDocs = context.reader().getLiveDocs();
    if (liveDocs != null) {
      int prevParentDoc = -1;
      DocIdSetIterator it = new BitSetIterator(parents, 0L);
      for (int parentDoc = it.nextDoc(); parentDoc != DocIdSetIterator.NO_MORE_DOCS; parentDoc = it.nextDoc()) {
        final boolean parentIsLive = liveDocs.get(parentDoc);
        for (int child = prevParentDoc + 1; child != parentDoc; child++) {
          final boolean childIsLive = liveDocs.get(child);
          if (parentIsLive != childIsLive) {
            if (childIsLive) {
              throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is live but has a deleted child document " + child);
            } else {
              throw new IllegalStateException("Parent doc " + parentDoc + " of segment " + context.reader() + " is deleted but has a live child document " + child);
            }
          }
        }
        prevParentDoc = parentDoc;
      }
    }
  }
}
 
Example 12
Source Project: lucene-solr   File: TestBlockJoin.java    License: Apache License 2.0 5 votes vote down vote up
private Document getParentDoc(IndexReader reader, BitSetProducer parents, int childDocID) throws IOException {
  final List<LeafReaderContext> leaves = reader.leaves();
  final int subIndex = ReaderUtil.subIndex(childDocID, leaves);
  final LeafReaderContext leaf = leaves.get(subIndex);
  final BitSet bits = parents.getBitSet(leaf);
  return leaf.reader().document(bits.nextSetBit(childDocID - leaf.docBase));
}
 
Example 13
Source Project: lucene-solr   File: QueryIndex.java    License: Apache License 2.0 5 votes vote down vote up
QueryTermFilter(IndexReader reader) throws IOException {
  for (LeafReaderContext ctx : reader.leaves()) {
    for (FieldInfo fi : ctx.reader().getFieldInfos()) {
      BytesRefHash terms = termsHash.computeIfAbsent(fi.name, f -> new BytesRefHash());
      Terms t = ctx.reader().terms(fi.name);
      if (t != null) {
        TermsEnum te = t.iterator();
        BytesRef term;
        while ((term = te.next()) != null) {
          terms.add(term);
        }
      }
    }
  }
}
 
Example 14
Source Project: crate   File: MultiPhrasePrefixQuery.java    License: Apache License 2.0 5 votes vote down vote up
private void getPrefixTerms(ObjectHashSet<Term> terms, final Term prefix, final IndexReader reader) throws IOException {
    // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms
    // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually.
    List<LeafReaderContext> leaves = reader.leaves();
    for (LeafReaderContext leaf : leaves) {
        Terms _terms = leaf.reader().terms(field);
        if (_terms == null) {
            continue;
        }

        TermsEnum termsEnum = _terms.iterator();
        TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes());
        if (TermsEnum.SeekStatus.END == seekStatus) {
            continue;
        }

        for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) {
            if (!StringHelper.startsWith(term, prefix.bytes())) {
                break;
            }

            terms.add(new Term(field, BytesRef.deepCopyOf(term)));
            if (terms.size() >= maxExpansions) {
                return;
            }
        }
    }
}
 
Example 15
public FilterableTermsEnum(IndexReader reader, String field, int docsEnumFlag, @Nullable Query filter) throws IOException {
    if ((docsEnumFlag != PostingsEnum.FREQS) && (docsEnumFlag != PostingsEnum.NONE)) {
        throw new IllegalArgumentException("invalid docsEnumFlag of " + docsEnumFlag);
    }
    this.docsEnumFlag = docsEnumFlag;
    if (filter == null) {
        // Important - need to use the doc count that includes deleted docs
        // or we have this issue: https://github.com/elasticsearch/elasticsearch/issues/7951
        numDocs = reader.maxDoc();
    }
    List<LeafReaderContext> leaves = reader.leaves();
    List<Holder> enums = new ArrayList<>(leaves.size());
    final Weight weight;
    if (filter == null) {
        weight = null;
    } else {
        final IndexSearcher searcher = new IndexSearcher(reader);
        searcher.setQueryCache(null);
        weight = searcher.createNormalizedWeight(filter, false);
    }
    for (LeafReaderContext context : leaves) {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
            continue;
        }
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum == null) {
            continue;
        }
        BitSet bits = null;
        if (weight != null) {
            Scorer scorer = weight.scorer(context);
            if (scorer == null) {
                // fully filtered, none matching, no need to iterate on this
                continue;
            }
            DocIdSetIterator docs = scorer.iterator();

            // we want to force apply deleted docs
            final Bits liveDocs = context.reader().getLiveDocs();
            if (liveDocs != null) {
                docs = new FilteredDocIdSetIterator(docs) {
                    @Override
                    protected boolean match(int doc) {
                        return liveDocs.get(doc);
                    }
                };
            }

            BitDocIdSet.Builder builder = new BitDocIdSet.Builder(context.reader().maxDoc());
            builder.or(docs);
            bits = builder.build().bits();

            // Count how many docs are in our filtered set
            // TODO make this lazy-loaded only for those that need it?
            numDocs += bits.cardinality();
        }
        enums.add(new Holder(termsEnum, bits));
    }
    this.enums = enums.toArray(new Holder[enums.size()]);
}
 
Example 16
Source Project: lucene-solr   File: CompletionQuery.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public Query rewrite(IndexReader reader) throws IOException {
  byte type = 0;
  boolean first = true;
  Terms terms;
  for (LeafReaderContext context : reader.leaves()) {
    LeafReader leafReader = context.reader();
    try {
      if ((terms = leafReader.terms(getField())) == null) {
        continue;
      }
    } catch (IOException e) {
      continue;
    }
    if (terms instanceof CompletionTerms) {
      CompletionTerms completionTerms = (CompletionTerms) terms;
      byte t = completionTerms.getType();
      if (first) {
        type = t;
        first = false;
      } else if (type != t) {
        throw new IllegalStateException(getField() + " has values of multiple types");
      }
    }
  }

  if (first == false) {
    if (this instanceof ContextQuery) {
      if (type == SuggestField.TYPE) {
        throw new IllegalStateException(this.getClass().getSimpleName()
            + " can not be executed against a non context-enabled SuggestField: "
            + getField());
      }
    } else {
      if (type == ContextSuggestField.TYPE) {
        return new ContextQuery(this);
      }
    }
  }
  return super.rewrite(reader);
}
 
Example 17
Source Project: lucene-solr   File: SpellChecker.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Indexes the data from the given {@link Dictionary}.
 * @param dict Dictionary to index
 * @param config {@link IndexWriterConfig} to use
 * @param fullMerge whether or not the spellcheck index should be fully merged
 * @throws AlreadyClosedException if the Spellchecker is already closed
 * @throws IOException If there is a low-level I/O error.
 */
public final void indexDictionary(Dictionary dict, IndexWriterConfig config, boolean fullMerge) throws IOException {
  synchronized (modifyCurrentIndexLock) {
    ensureOpen();
    final Directory dir = this.spellIndex;
    final IndexWriter writer = new IndexWriter(dir, config);
    IndexSearcher indexSearcher = obtainSearcher();
    final List<TermsEnum> termsEnums = new ArrayList<>();

    final IndexReader reader = searcher.getIndexReader();
    if (reader.maxDoc() > 0) {
      for (final LeafReaderContext ctx : reader.leaves()) {
        Terms terms = ctx.reader().terms(F_WORD);
        if (terms != null)
          termsEnums.add(terms.iterator());
      }
    }
    
    boolean isEmpty = termsEnums.isEmpty();

    try { 
      BytesRefIterator iter = dict.getEntryIterator();
      BytesRef currentTerm;
      
      terms: while ((currentTerm = iter.next()) != null) {

        String word = currentTerm.utf8ToString();
        int len = word.length();
        if (len < 3) {
          continue; // too short we bail but "too long" is fine...
        }

        if (!isEmpty) {
          for (TermsEnum te : termsEnums) {
            if (te.seekExact(currentTerm)) {
              continue terms;
            }
          }
        }

        // ok index the word
        Document doc = createDocument(word, getMin(len), getMax(len));
        writer.addDocument(doc);
      }
    } finally {
      releaseSearcher(indexSearcher);
    }
    if (fullMerge) {
      writer.forceMerge(1);
    }
    // close writer
    writer.close();
    // TODO: this isn't that great, maybe in the future SpellChecker should take
    // IWC in its ctor / keep its writer open?
    
    // also re-open the spell index to see our own changes when the next suggestion
    // is fetched:
    swapSearcher(dir);
  }
}
 
Example 18
Source Project: crate   File: VersionsAndSeqNoResolver.java    License: Apache License 2.0 4 votes vote down vote up
private static PerThreadIDVersionAndSeqNoLookup[] getLookupState(IndexReader reader, String uidField) throws IOException {
    // We cache on the top level
    // This means cache entries have a shorter lifetime, maybe as low as 1s with the
    // default refresh interval and a steady indexing rate, but on the other hand it
    // proved to be cheaper than having to perform a CHM and a TL get for every segment.
    // See https://github.com/elastic/elasticsearch/pull/19856.
    IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper();
    CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> ctl = LOOKUP_STATES.get(cacheHelper.getKey());
    if (ctl == null) {
        // First time we are seeing this reader's core; make a new CTL:
        ctl = new CloseableThreadLocal<>();
        CloseableThreadLocal<PerThreadIDVersionAndSeqNoLookup[]> other = LOOKUP_STATES.putIfAbsent(cacheHelper.getKey(), ctl);
        if (other == null) {
            // Our CTL won, we must remove it when the reader is closed:
            cacheHelper.addClosedListener(REMOVE_LOOKUP_STATE);
        } else {
            // Another thread beat us to it: just use their CTL:
            ctl = other;
        }
    }

    PerThreadIDVersionAndSeqNoLookup[] lookupState = ctl.get();
    if (lookupState == null) {
        lookupState = new PerThreadIDVersionAndSeqNoLookup[reader.leaves().size()];
        for (LeafReaderContext leaf : reader.leaves()) {
            lookupState[leaf.ord] = new PerThreadIDVersionAndSeqNoLookup(leaf.reader(), uidField);
        }
        ctl.set(lookupState);
    }

    if (lookupState.length != reader.leaves().size()) {
        throw new AssertionError("Mismatched numbers of leaves: " + lookupState.length + " != " + reader.leaves().size());
    }

    if (lookupState.length > 0 && Objects.equals(lookupState[0].uidField, uidField) == false) {
        throw new AssertionError("Index does not consistently use the same uid field: ["
                + uidField + "] != [" + lookupState[0].uidField + "]");
    }

    return lookupState;
}
 
Example 19
Source Project: lucene-solr   File: TestUtil.java    License: Apache License 2.0 4 votes vote down vote up
/** This runs the CheckIndex tool on the Reader.  If any
 *  issues are hit, a RuntimeException is thrown */
public static void checkReader(IndexReader reader) throws IOException {
  for (LeafReaderContext context : reader.leaves()) {
    checkReader(context.reader(), true);
  }
}
 
Example 20
@Test
public void testEstimator() throws Exception {
  JettySolrRunner jetty = cluster.getRandomJetty(random());
  String randomCoreName = jetty.getCoreContainer().getAllCoreNames().iterator().next();
  SolrCore core = jetty.getCoreContainer().getCore(randomCoreName);
  RefCounted<SolrIndexSearcher> searcherRef = core.getSearcher();
  try {
    SolrIndexSearcher searcher = searcherRef.get();
    // limit the max length
    IndexSizeEstimator estimator = new IndexSizeEstimator(searcher.getRawReader(), 20, 50, true, true);
    IndexSizeEstimator.Estimate estimate = estimator.estimate();
    Map<String, Long> fieldsBySize = estimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", fieldsBySize.isEmpty());
    assertEquals(fieldsBySize.toString(), fields.size(), fieldsBySize.size());
    fieldsBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Long> typesBySize = estimate.getTypesBySize();
    assertFalse("empty typesBySize", typesBySize.isEmpty());
    assertTrue("expected at least 8 types: " + typesBySize.toString(), typesBySize.size() >= 8);
    typesBySize.forEach((k, v) -> assertTrue("unexpected size of " + k + ": " + v, v > 0));
    Map<String, Object> summary = estimate.getSummary();
    assertNotNull("summary", summary);
    assertFalse("empty summary", summary.isEmpty());
    assertEquals(summary.keySet().toString(), fields.size(), summary.keySet().size());
    Map<String, Object> details = estimate.getDetails();
    assertNotNull("details", details);
    assertFalse("empty details", details.isEmpty());
    // by type
    assertEquals(details.keySet().toString(), 6, details.keySet().size());

    // check sampling
    estimator.setSamplingThreshold(searcher.getRawReader().maxDoc() / 2);
    IndexSizeEstimator.Estimate sampledEstimate = estimator.estimate();
    Map<String, Long> sampledFieldsBySize = sampledEstimate.getFieldsBySize();
    assertFalse("empty fieldsBySize", sampledFieldsBySize.isEmpty());
    // verify that the sampled values are within 50% of the original values
    fieldsBySize.forEach((field, size) -> {
      Long sampledSize = sampledFieldsBySize.get(field);
      assertNotNull("sampled size for " + field + " is missing in " + sampledFieldsBySize, sampledSize);
      double delta = (double) size * 0.5;
      assertEquals("sampled size of " + field + " is wildly off", (double)size, (double)sampledSize, delta);
    });
    // verify the reader is still usable - SOLR-13694
    IndexReader reader = searcher.getRawReader();
    for (LeafReaderContext context : reader.leaves()) {
      LeafReader leafReader = context.reader();
      assertTrue("unexpected LeafReader class: " + leafReader.getClass().getName(), leafReader instanceof CodecReader);
      Bits liveDocs = leafReader.getLiveDocs();
      CodecReader codecReader = (CodecReader) leafReader;
      StoredFieldsReader storedFieldsReader = codecReader.getFieldsReader();
      StoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
      assertNotNull(storedFieldsReader);
      for (int docId = 0; docId < leafReader.maxDoc(); docId++) {
        if (liveDocs != null && !liveDocs.get(docId)) {
          continue;
        }
        storedFieldsReader.visitDocument(docId, visitor);
      }
    }
  } finally {
    searcherRef.decref();
    core.close();
  }
}