Java Code Examples for org.apache.lucene.index.Terms#getDocCount()

The following examples show how to use org.apache.lucene.index.Terms#getDocCount() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SimpleNaiveBayesClassifier.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * count the number of documents in the index having at least a value for the 'class' field
 *
 * @return the no. of documents having a value for the 'class' field
 * @throws IOException if accessing to term vectors or search fails
 */
protected int countDocsWithClass() throws IOException {
  Terms terms = MultiTerms.getTerms(this.indexReader, this.classFieldName);
  int docCount;
  if (terms == null || terms.getDocCount() == -1) { // in case codec doesn't support getDocCount
    TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
    BooleanQuery.Builder q = new BooleanQuery.Builder();
    q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
    if (query != null) {
      q.add(query, BooleanClause.Occur.MUST);
    }
    indexSearcher.search(q.build(),
        classQueryCountCollector);
    docCount = classQueryCountCollector.getTotalHits();
  } else {
    docCount = terms.getDocCount();
  }
  return docCount;
}
 
Example 2
Source File: DateFieldMapper.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public FieldStats stats(Terms terms, int maxDoc) throws IOException {
    long minValue = NumericUtils.getMinLong(terms);
    long maxValue = NumericUtils.getMaxLong(terms);
    return new FieldStats.Date(
        maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue, dateTimeFormatter()
    );
}
 
Example 3
Source File: ShortFieldMapper.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public FieldStats stats(Terms terms, int maxDoc) throws IOException {
    long minValue = NumericUtils.getMinInt(terms);
    long maxValue = NumericUtils.getMaxInt(terms);
    return new FieldStats.Long(
        maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue
    );
}
 
Example 4
Source File: FloatFieldMapper.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public FieldStats stats(Terms terms, int maxDoc) throws IOException {
    float minValue = NumericUtils.sortableIntToFloat(NumericUtils.getMinInt(terms));
    float maxValue = NumericUtils.sortableIntToFloat(NumericUtils.getMaxInt(terms));
    return new FieldStats.Float(
        maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue
    );
}
 
Example 5
Source File: DoubleFieldMapper.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public FieldStats stats(Terms terms, int maxDoc) throws IOException {
    double minValue = NumericUtils.sortableLongToDouble(NumericUtils.getMinLong(terms));
    double maxValue = NumericUtils.sortableLongToDouble(NumericUtils.getMaxLong(terms));
    return new FieldStats.Double(
        maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue
    );
}
 
Example 6
Source File: IntegerFieldMapper.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public FieldStats stats(Terms terms, int maxDoc) throws IOException {
    long minValue = NumericUtils.getMinInt(terms);
    long maxValue = NumericUtils.getMaxInt(terms);
    return new FieldStats.Long(
        maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue
    );
}
 
Example 7
Source File: ByteFieldMapper.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public FieldStats stats(Terms terms, int maxDoc) throws IOException {
    long minValue = NumericUtils.getMinInt(terms);
    long maxValue = NumericUtils.getMaxInt(terms);
    return new FieldStats.Long(
        maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue
    );
}
 
Example 8
Source File: LongFieldMapper.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
public FieldStats stats(Terms terms, int maxDoc) throws IOException {
    long minValue = NumericUtils.getMinLong(terms);
    long maxValue = NumericUtils.getMaxLong(terms);
    return new FieldStats.Long(
        maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue
    );
}
 
Example 9
Source File: SimpleNaiveBayesDocumentClassifier.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the average number of unique terms times the number of docs belonging to the input class
 *
 * @param  term the class term
 * @return the average number of unique terms
 * @throws java.io.IOException If there is a low-level I/O error
 */
private double getTextTermFreqForClass(Term term, String fieldName) throws IOException {
  double avgNumberOfUniqueTerms;
  Terms terms = MultiTerms.getTerms(indexReader, fieldName);
  long numPostings = terms.getSumDocFreq(); // number of term/doc pairs
  avgNumberOfUniqueTerms = numPostings / (double) terms.getDocCount(); // avg # of unique terms per doc
  int docsWithC = indexReader.docFreq(term);
  return avgNumberOfUniqueTerms * docsWithC; // avg # of unique terms in text fields per doc * # docs with c
}
 
Example 10
Source File: SimpleNaiveBayesClassifier.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the average number of unique terms times the number of docs belonging to the input class
 * @param term the term representing the class
 * @return the average number of unique terms
 * @throws IOException if a low level I/O problem happens
 */
private double getTextTermFreqForClass(Term term) throws IOException {
  double avgNumberOfUniqueTerms = 0;
  for (String textFieldName : textFieldNames) {
    Terms terms = MultiTerms.getTerms(indexReader, textFieldName);
    long numPostings = terms.getSumDocFreq(); // number of term/doc pairs
    avgNumberOfUniqueTerms += numPostings / (double) terms.getDocCount(); // avg # of unique terms per doc
  }
  int docsWithC = indexReader.docFreq(term);
  return avgNumberOfUniqueTerms * docsWithC; // avg # of unique terms in text fields per doc * # docs with c
}
 
Example 11
Source File: MappedFieldType.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
/**
 * @return a {@link FieldStats} instance that maps to the type of this field based on the provided {@link Terms} instance.
 */
public FieldStats stats(Terms terms, int maxDoc) throws IOException {
    return new FieldStats.Text(
        maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), terms.getMin(), terms.getMax()
    );
}
 
Example 12
Source File: DocIdSetBuilder.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Create a {@link DocIdSetBuilder} instance that is optimized for
 *  accumulating docs that match the given {@link Terms}. */
public DocIdSetBuilder(int maxDoc, Terms terms) throws IOException {
  this(maxDoc, terms.getDocCount(), terms.getSumDocFreq());
}