Java Code Examples for org.apache.lucene.index.IndexReader#numDocs()

The following examples show how to use org.apache.lucene.index.IndexReader#numDocs() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HasChildQueryParser.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public Query rewrite(IndexReader reader) throws IOException {
    if (getBoost() != 1.0F) {
        return super.rewrite(reader);
    }
    if (reader instanceof DirectoryReader) {
        String joinField = ParentFieldMapper.joinField(parentType);
        IndexSearcher indexSearcher = new IndexSearcher(reader);
        indexSearcher.setQueryCache(null);
        indexSearcher.setSimilarity(similarity);
        IndexParentChildFieldData indexParentChildFieldData = parentChildIndexFieldData.loadGlobal((DirectoryReader) reader);
        MultiDocValues.OrdinalMap ordinalMap = ParentChildIndexFieldData.getOrdinalMap(indexParentChildFieldData, parentType);
        return JoinUtil.createJoinQuery(joinField, innerQuery, toQuery, indexSearcher, scoreMode, ordinalMap, minChildren, maxChildren);
    } else {
        if (reader.leaves().isEmpty() && reader.numDocs() == 0) {
            // asserting reader passes down a MultiReader during rewrite which makes this
            // blow up since for this query to work we have to have a DirectoryReader otherwise
            // we can't load global ordinals - for this to work we simply check if the reader has no leaves
            // and rewrite to match nothing
            return new MatchNoDocsQuery();
        }
        throw new IllegalStateException("can't load global ordinals for reader of type: " + reader.getClass() + " must be a DirectoryReader");
    }
}
 
Example 2
Source File: LuceneTranslationMemory.java    From modernmt with Apache License 2.0 6 votes vote down vote up
@Override
public void dump(long memory, Consumer<Entry> consumer) throws IOException {
    IndexSearcher searcher = getIndexSearcher();
    IndexReader reader = getIndexReader();

    int size = reader.numDocs();
    if (size == 0)
        return;

    Query memoryQuery = new TermQuery(documentBuilder.makeMemoryTerm(memory));
    TopDocs docs = searcher.search(memoryQuery, size);

    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        Document document = reader.document(scoreDoc.doc);
        if (documentBuilder.getMemory(document) > 0) {
            TranslationMemory.Entry entry = documentBuilder.asEntry(document);
            consumer.accept(entry);
        }
    }
}
 
Example 3
Source File: LuceneTranslationMemory.java    From modernmt with Apache License 2.0 6 votes vote down vote up
@Override
public void dumpAll(Consumer<Entry> consumer) throws IOException {
    IndexSearcher searcher = getIndexSearcher();
    IndexReader reader = getIndexReader();

    int size = reader.numDocs();
    if (size == 0)
        return;

    TopDocs docs = searcher.search(new MatchAllDocsQuery(), size);

    for (ScoreDoc scoreDoc : docs.scoreDocs) {
        Document document = reader.document(scoreDoc.doc);
        if (documentBuilder.getMemory(document) > 0) {
            TranslationMemory.Entry entry = documentBuilder.asEntry(document);
            consumer.accept(entry);
        }
    }
}
 
Example 4
Source File: IndexInfo.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
public int getNumberOfDocuments() throws IOException
{
    IndexReader reader = getMainIndexReferenceCountingReadOnlyIndexReader();
    try
    {
        return reader.numDocs();
    }
    finally
    {
        reader.close();
    }
}
 
Example 5
Source File: SignificantStringTermsAggregator.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public SignificantStringTerms buildEmptyAggregation() {
    // We need to account for the significance of a miss in our global stats - provide corpus size as context
    ContextIndexSearcher searcher = context.searchContext().searcher();
    IndexReader topReader = searcher.getIndexReader();
    int supersetSize = topReader.numDocs();
    return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
}
 
Example 6
Source File: GlobalOrdinalsSignificantTermsAggregator.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public SignificantStringTerms buildEmptyAggregation() {
    // We need to account for the significance of a miss in our global stats - provide corpus size as context
    ContextIndexSearcher searcher = context.searchContext().searcher();
    IndexReader topReader = searcher.getIndexReader();
    int supersetSize = topReader.numDocs();
    return new SignificantStringTerms(0, supersetSize, name, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
}
 
Example 7
Source File: SignificantLongTermsAggregator.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public SignificantLongTerms buildEmptyAggregation() {
    // We need to account for the significance of a miss in our global stats - provide corpus size as context
    ContextIndexSearcher searcher = context.searchContext().searcher();
    IndexReader topReader = searcher.getIndexReader();
    int supersetSize = topReader.numDocs();
    return new SignificantLongTerms(0, supersetSize, name, formatter, bucketCountThresholds.getRequiredSize(),
            bucketCountThresholds.getMinDocCount(), termsAggFactory.getSignificanceHeuristic(),
            Collections.<InternalSignificantTerms.Bucket> emptyList(), pipelineAggregators(), metaData());
}
 
Example 8
Source File: TestSpellChecker.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private int numdoc() throws IOException {
  IndexReader rs = DirectoryReader.open(spellindex);
  int num = rs.numDocs();
  assertTrue(num != 0);
  //System.out.println("num docs: " + num);
  rs.close();
  return num;
}
 
Example 9
Source File: TestFieldCacheTermsFilter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMissingTerms() throws Exception {
  String fieldName = "field1";
  Directory rd = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), rd);
  for (int i = 0; i < 100; i++) {
    Document doc = new Document();
    int term = i * 10; //terms are units of 10;
    doc.add(newStringField(fieldName, "" + term, Field.Store.YES));
    doc.add(new SortedDocValuesField(fieldName, new BytesRef("" + term)));
    w.addDocument(doc);
  }
  IndexReader reader = w.getReader();
  w.close();

  IndexSearcher searcher = newSearcher(reader);
  int numDocs = reader.numDocs();
  ScoreDoc[] results;

  List<String> terms = new ArrayList<>();
  terms.add("5");
  results = searcher.search(new DocValuesTermsQuery(fieldName,  terms.toArray(new String[0])), numDocs).scoreDocs;
  assertEquals("Must match nothing", 0, results.length);

  terms = new ArrayList<>();
  terms.add("10");
  results = searcher.search(new DocValuesTermsQuery(fieldName,  terms.toArray(new String[0])), numDocs).scoreDocs;
  assertEquals("Must match 1", 1, results.length);

  terms = new ArrayList<>();
  terms.add("10");
  terms.add("20");
  results = searcher.search(new DocValuesTermsQuery(fieldName,  terms.toArray(new String[0])), numDocs).scoreDocs;
  assertEquals("Must match 2", 2, results.length);

  reader.close();
  rd.close();
}
 
Example 10
Source File: OLuceneIndexManagerAbstract.java    From orientdb-lucene with Apache License 2.0 5 votes vote down vote up
public long size(final ValuesTransformer<V> transformer) {

    IndexReader reader = null;
    IndexSearcher searcher = null;
    try {
      reader = getSearcher().getIndexReader();
    } catch (IOException e) {
      OLogManager.instance().error(this, "Error on getting size of Lucene index", e);
    } finally {
      if (searcher != null) {
        release(searcher);
      }
    }
    return reader.numDocs();
  }
 
Example 11
Source File: DocFreq.java    From lumongo with Apache License 2.0 5 votes vote down vote up
public DocFreq(IndexReader indexReader, String field) {
	this.indexReader = indexReader;
	this.field = field;
	this.docFreqMap = new HashMap<>();
	this.similarity = new ClassicSimilarity();
	this.numDocs = indexReader.numDocs();
}
 
Example 12
Source File: TermQuery.java    From alfresco-repository with GNU Lesser General Public License v3.0 4 votes vote down vote up
public Explanation explain(IndexReader reader, int doc)
  throws IOException {

  ComplexExplanation result = new ComplexExplanation();
  result.setDescription("weight("+getQuery()+" in "+doc+"), product of:");

  Explanation idfExpl =
    new Explanation(idf, "idf(docFreq=" + reader.docFreq(term) +
        ", numDocs=" + reader.numDocs() + ")");

  // explain query weight
  Explanation queryExpl = new Explanation();
  queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:");

  Explanation boostExpl = new Explanation(getBoost(), "boost");
  if (getBoost() != 1.0f)
    queryExpl.addDetail(boostExpl);
  queryExpl.addDetail(idfExpl);

  Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm");
  queryExpl.addDetail(queryNormExpl);

  queryExpl.setValue(boostExpl.getValue() *
                     idfExpl.getValue() *
                     queryNormExpl.getValue());

  result.addDetail(queryExpl);

  // explain field weight
  String field = term.field();
  ComplexExplanation fieldExpl = new ComplexExplanation();
  fieldExpl.setDescription("fieldWeight("+term+" in "+doc+
                           "), product of:");

  Explanation tfExpl = scorer(reader).explain(doc);
  fieldExpl.addDetail(tfExpl);
  fieldExpl.addDetail(idfExpl);

  Explanation fieldNormExpl = new Explanation();
  byte[] fieldNorms = reader.norms(field);
  float fieldNorm =
    fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f;
  fieldNormExpl.setValue(fieldNorm);
  fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")");
  fieldExpl.addDetail(fieldNormExpl);
  
  fieldExpl.setMatch(Boolean.valueOf(tfExpl.isMatch()));
  fieldExpl.setValue(tfExpl.getValue() *
                     idfExpl.getValue() *
                     fieldNormExpl.getValue());

  result.addDetail(fieldExpl);
  result.setMatch(fieldExpl.getMatch());
  
  // combine them
  result.setValue(queryExpl.getValue() * fieldExpl.getValue());

  if (queryExpl.getValue() == 1.0f)
    return fieldExpl;

  return result;
}
 
Example 13
Source File: TermInSetQueryTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testDuel() throws IOException {
  final int iters = atLeast(2);
  final String field = "f";
  for (int iter = 0; iter < iters; ++iter) {
    final List<BytesRef> allTerms = new ArrayList<>();
    final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
    for (int i = 0; i < numTerms; ++i) {
      final String value = TestUtil.randomAnalysisString(random(), 10, true);
      allTerms.add(new BytesRef(value));
    }
    Directory dir = newDirectory();
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
    final int numDocs = atLeast(100);
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      final BytesRef term = allTerms.get(random().nextInt(allTerms.size()));
      doc.add(new StringField(field, term, Store.NO));
      iw.addDocument(doc);
    }
    if (numTerms > 1 && random().nextBoolean()) {
      iw.deleteDocuments(new TermQuery(new Term(field, allTerms.get(0))));
    }
    iw.commit();
    final IndexReader reader = iw.getReader();
    final IndexSearcher searcher = newSearcher(reader);
    iw.close();

    if (reader.numDocs() == 0) {
      // may occasionally happen if all documents got the same term
      IOUtils.close(reader, dir);
      continue;
    }

    for (int i = 0; i < 100; ++i) {
      final float boost = random().nextFloat() * 10;
      final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
      List<BytesRef> queryTerms = new ArrayList<>();
      for (int j = 0; j < numQueryTerms; ++j) {
        queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
      }
      final BooleanQuery.Builder bq = new BooleanQuery.Builder();
      for (BytesRef t : queryTerms) {
        bq.add(new TermQuery(new Term(field, t)), Occur.SHOULD);
      }
      final Query q1 = new ConstantScoreQuery(bq.build());
      final Query q2 = new TermInSetQuery(field, queryTerms);
      assertSameMatches(searcher, new BoostQuery(q1, boost), new BoostQuery(q2, boost), true);
    }

    reader.close();
    dir.close();
  }
}
 
Example 14
Source File: TestMultiTermConstantScore.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testRangeQueryRand() throws IOException {
  // NOTE: uses index build in *super* setUp

  IndexReader reader = signedIndexReader;
  IndexSearcher search = newSearcher(reader);

  String minRP = pad(signedIndexDir.minR);
  String maxRP = pad(signedIndexDir.maxR);

  int numDocs = reader.numDocs();

  assertEquals("num of docs", numDocs, 1 + maxId - minId);

  ScoreDoc[] result;

  // test extremes, bounded on both ends

  result = search.search(csrq("rand", minRP, maxRP, T, T), numDocs).scoreDocs;
  assertEquals("find all", numDocs, result.length);

  result = search.search(csrq("rand", minRP, maxRP, T, F), numDocs).scoreDocs;
  assertEquals("all but biggest", numDocs - 1, result.length);

  result = search.search(csrq("rand", minRP, maxRP, F, T), numDocs).scoreDocs;
  assertEquals("all but smallest", numDocs - 1, result.length);

  result = search.search(csrq("rand", minRP, maxRP, F, F), numDocs).scoreDocs;
  assertEquals("all but extremes", numDocs - 2, result.length);

  // unbounded

  result = search.search(csrq("rand", minRP, null, T, F), numDocs).scoreDocs;
  assertEquals("smallest and up", numDocs, result.length);

  result = search.search(csrq("rand", null, maxRP, F, T), numDocs).scoreDocs;
  assertEquals("biggest and down", numDocs, result.length);

  result = search.search(csrq("rand", minRP, null, F, F), numDocs).scoreDocs;
  assertEquals("not smallest, but up", numDocs - 1, result.length);

  result = search.search(csrq("rand", null, maxRP, F, F), numDocs).scoreDocs;
  assertEquals("not biggest, but down", numDocs - 1, result.length);

  // very small sets

  result = search.search(csrq("rand", minRP, minRP, F, F), numDocs).scoreDocs;
  assertEquals("min,min,F,F", 0, result.length);
  result = search.search(csrq("rand", maxRP, maxRP, F, F), numDocs).scoreDocs;
  assertEquals("max,max,F,F", 0, result.length);

  result = search.search(csrq("rand", minRP, minRP, T, T), numDocs).scoreDocs;
  assertEquals("min,min,T,T", 1, result.length);
  result = search.search(csrq("rand", null, minRP, F, T), numDocs).scoreDocs;
  assertEquals("nul,min,F,T", 1, result.length);

  result = search.search(csrq("rand", maxRP, maxRP, T, T), numDocs).scoreDocs;
  assertEquals("max,max,T,T", 1, result.length);
  result = search.search(csrq("rand", maxRP, null, T, F), numDocs).scoreDocs;
  assertEquals("max,nul,T,T", 1, result.length);
}
 
Example 15
Source File: QueryAutoStopWordAnalyzer.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
/**
 * Creates a new QueryAutoStopWordAnalyzer with stopwords calculated for the
 * given selection of fields from terms with a document frequency percentage
 * greater than the given maxPercentDocs
 *
 * @param delegate Analyzer whose TokenStream will be filtered
 * @param indexReader IndexReader to identify the stopwords from
 * @param fields Selection of fields to calculate stopwords for
 * @param maxPercentDocs The maximum percentage (between 0.0 and 1.0) of index documents which
 *                      contain a term, after which the word is considered to be a stop word
 * @throws IOException Can be thrown while reading from the IndexReader
 */
public QueryAutoStopWordAnalyzer(
    Analyzer delegate,
    IndexReader indexReader,
    Collection<String> fields,
    float maxPercentDocs) throws IOException {
  this(delegate, indexReader, fields, (int) (indexReader.numDocs() * maxPercentDocs));
}
 
Example 16
Source File: LuceneHelper.java    From dexter with Apache License 2.0 3 votes vote down vote up
/**
 * @return the number of documents indexed
 */
public int numDocs() {
	IndexReader reader = getReader();

	return reader.numDocs();

}