Java Code Examples for org.apache.lucene.index.Terms

The following examples show how to use org.apache.lucene.index.Terms. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Override
protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) {
    ShardId shardId = request.shardId();
    Map<String, FieldStats> fieldStats = new HashMap<>();
    IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
    MapperService mapperService = indexServices.mapperService();
    IndexShard shard = indexServices.shardSafe(shardId.id());
    try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
        for (String field : request.getFields()) {
            MappedFieldType fieldType = mapperService.fullName(field);
            if (fieldType != null) {
                IndexReader reader = searcher.reader();
                Terms terms = MultiFields.getTerms(reader, field);
                if (terms != null) {
                    fieldStats.put(field, fieldType.stats(terms, reader.maxDoc()));
                }
            } else {
                throw new IllegalArgumentException("field [" + field + "] doesn't exist");
            }
        }
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
    return new FieldStatsShardResponse(shardId, fieldStats);
}
 
Example 2
Source Project: lucene-solr   Source File: SrndTermQuery.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void visitMatchingTerms(
  IndexReader reader,
  String fieldName,
  MatchingTermVisitor mtv) throws IOException
{
  /* check term presence in index here for symmetry with other SimpleTerm's */
  Terms terms = MultiTerms.getTerms(reader, fieldName);
  if (terms != null) {
    TermsEnum termsEnum = terms.iterator();

    TermsEnum.SeekStatus status = termsEnum.seekCeil(new BytesRef(getTermText()));
    if (status == TermsEnum.SeekStatus.FOUND) {
      mtv.visitMatchingTerm(getLuceneTerm(fieldName));
    }
  }
}
 
Example 3
Source Project: lucene-solr   Source File: SimpleNaiveBayesClassifier.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * count the number of documents in the index having at least a value for the 'class' field
 *
 * @return the no. of documents having a value for the 'class' field
 * @throws IOException if accessing to term vectors or search fails
 */
protected int countDocsWithClass() throws IOException {
  Terms terms = MultiTerms.getTerms(this.indexReader, this.classFieldName);
  int docCount;
  if (terms == null || terms.getDocCount() == -1) { // in case codec doesn't support getDocCount
    TotalHitCountCollector classQueryCountCollector = new TotalHitCountCollector();
    BooleanQuery.Builder q = new BooleanQuery.Builder();
    q.add(new BooleanClause(new WildcardQuery(new Term(classFieldName, String.valueOf(WildcardQuery.WILDCARD_STRING))), BooleanClause.Occur.MUST));
    if (query != null) {
      q.add(query, BooleanClause.Occur.MUST);
    }
    indexSearcher.search(q.build(),
        classQueryCountCollector);
    docCount = classQueryCountCollector.getTotalHits();
  } else {
    docCount = terms.getDocCount();
  }
  return docCount;
}
 
Example 4
Source Project: lucene-solr   Source File: FuzzyLikeThisQuery.java    License: Apache License 2.0 6 votes vote down vote up
private Query newTermQuery(IndexReader reader, Term term) throws IOException {
  if (ignoreTF) {
    return new ConstantScoreQuery(new TermQuery(term));
  } else {
    // we build an artificial TermStates that will give an overall df and ttf
    // equal to 1
    TermStates context = new TermStates(reader.getContext());
    for (LeafReaderContext leafContext : reader.leaves()) {
      Terms terms = leafContext.reader().terms(term.field());
      if (terms != null) {
        TermsEnum termsEnum = terms.iterator();
        if (termsEnum.seekExact(term.bytes())) {
          int freq = 1 - context.docFreq(); // we want the total df and ttf to be 1
          context.register(termsEnum.termState(), leafContext.ord, freq, freq);
        }
      }
    }
    return new TermQuery(term, context);
  }
}
 
Example 5
@Override
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
  Terms terms = ctx.reader().terms(field);
  if (terms == null)
    return null;
  if (terms.hasPositions() == false) {
    throw new IllegalArgumentException("Cannot create an IntervalIterator over field " + field + " because it has no indexed positions");
  }
  if (terms.hasPayloads() == false) {
    throw new IllegalArgumentException("Cannot create a payload-filtered iterator over field " + field + " because it has no indexed payloads");
  }
  TermsEnum te = terms.iterator();
  if (te.seekExact(term) == false) {
    return null;
  }
  return intervals(te);
}
 
Example 6
Source Project: Elasticsearch   Source File: VersionFieldUpgrader.java    License: Apache License 2.0 6 votes vote down vote up
static CodecReader wrap(CodecReader reader) throws IOException {
    final FieldInfos fieldInfos = reader.getFieldInfos();
    final FieldInfo versionInfo = fieldInfos.fieldInfo(VersionFieldMapper.NAME);
    if (versionInfo != null && versionInfo.getDocValuesType() != DocValuesType.NONE) {
        // the reader is a recent one, it has versions and they are stored
        // in a numeric doc values field
        return reader;
    }
    // The segment is an old one, look at the _uid field
    final Terms terms = reader.terms(UidFieldMapper.NAME);
    if (terms == null || !terms.hasPayloads()) {
        // The segment doesn't have an _uid field or doesn't have payloads
        // don't try to do anything clever. If any other segment has versions
        // all versions of this segment will be initialized to 0
        return reader;
    }
    // convert _uid payloads -> _version docvalues
    return new VersionFieldUpgrader(reader);
}
 
Example 7
Source Project: Elasticsearch   Source File: DfsOnlyRequest.java    License: Apache License 2.0 6 votes vote down vote up
public DfsOnlyRequest(Fields termVectorsFields, String[] indices, String[] types, Set<String> selectedFields) throws IOException {
    super(indices);

    // build a search request with a query of all the terms
    final BoolQueryBuilder boolBuilder = boolQuery();
    for (String fieldName : termVectorsFields) {
        if ((selectedFields != null) && (!selectedFields.contains(fieldName))) {
            continue;
        }
        Terms terms = termVectorsFields.terms(fieldName);
        TermsEnum iterator = terms.iterator();
        while (iterator.next() != null) {
            String text = iterator.term().utf8ToString();
            boolBuilder.should(QueryBuilders.termQuery(fieldName, text));
        }
    }
    // wrap a search request object
    this.searchRequest = new SearchRequest(indices).types(types).source(new SearchSourceBuilder().query(boolBuilder));
}
 
Example 8
Source Project: Elasticsearch   Source File: TermVectorsResponse.java    License: Apache License 2.0 6 votes vote down vote up
private void buildTerm(XContentBuilder builder, final CharsRefBuilder spare, Terms curTerms, TermsEnum termIter, BoostAttribute boostAtt) throws IOException {
    // start term, optimized writing
    BytesRef term = termIter.next();
    spare.copyUTF8Bytes(term);
    builder.startObject(spare.toString());
    buildTermStatistics(builder, termIter);
    // finally write the term vectors
    PostingsEnum posEnum = termIter.postings(null, PostingsEnum.ALL);
    int termFreq = posEnum.freq();
    builder.field(FieldStrings.TERM_FREQ, termFreq);
    initMemory(curTerms, termFreq);
    initValues(curTerms, posEnum, termFreq);
    buildValues(builder, curTerms, termFreq);
    buildScore(builder, boostAtt);
    builder.endObject();
}
 
Example 9
Source Project: Elasticsearch   Source File: TermVectorsResponse.java    License: Apache License 2.0 6 votes vote down vote up
private void buildFieldStatistics(XContentBuilder builder, Terms curTerms) throws IOException {
    long sumDocFreq = curTerms.getSumDocFreq();
    int docCount = curTerms.getDocCount();
    long sumTotalTermFrequencies = curTerms.getSumTotalTermFreq();
    if (docCount > 0) {
        assert ((sumDocFreq > 0)) : "docCount >= 0 but sumDocFreq ain't!";
        assert ((sumTotalTermFrequencies > 0)) : "docCount >= 0 but sumTotalTermFrequencies ain't!";
        builder.startObject(FieldStrings.FIELD_STATISTICS);
        builder.field(FieldStrings.SUM_DOC_FREQ, sumDocFreq);
        builder.field(FieldStrings.DOC_COUNT, docCount);
        builder.field(FieldStrings.SUM_TTF, sumTotalTermFrequencies);
        builder.endObject();
    } else if (docCount == -1) { // this should only be -1 if the field
        // statistics were not requested at all. In
        // this case all 3 values should be -1
        assert ((sumDocFreq == -1)) : "docCount was -1 but sumDocFreq ain't!";
        assert ((sumTotalTermFrequencies == -1)) : "docCount was -1 but sumTotalTermFrequencies ain't!";
    } else {
        throw new IllegalStateException(
                "Something is wrong with the field statistics of the term vector request: Values are " + "\n"
                        + FieldStrings.SUM_DOC_FREQ + " " + sumDocFreq + "\n" + FieldStrings.DOC_COUNT + " " + docCount + "\n"
                        + FieldStrings.SUM_TTF + " " + sumTotalTermFrequencies);
    }
}
 
Example 10
Source Project: liresolr   Source File: HashTermStatistics.java    License: GNU General Public License v2.0 6 votes vote down vote up
public static void addToStatistics(SolrIndexSearcher searcher, String field) throws IOException {
        // check if this field is already in the stats.
//        synchronized (instance) {
            if (termstats.get(field)!=null) return;
//        }
        // else add it to the stats.
        Terms terms = searcher.getSlowAtomicReader().terms(field);
        HashMap<String, Integer> term2docFreq = new HashMap<String, Integer>(1000);
        termstats.put(field, term2docFreq);
        if (terms!=null) {
            TermsEnum termsEnum = terms.iterator();
            BytesRef term;
            while ((term = termsEnum.next()) != null) {
                term2docFreq.put(term.utf8ToString(), termsEnum.docFreq());
            }
        }
    }
 
Example 11
Source Project: scava   Source File: ReadmeSimilarityCalculator.java    License: Eclipse Public License 2.0 6 votes vote down vote up
private HashMap<String, Integer> getAllTerms() throws IOException {
	HashMap<String, Integer> allTerms = new HashMap<>();
	int pos = 0;
	for (int docId = 0; docId < getTotalDocumentInIndex(); docId++) {
		Terms vector = getIndexReader().getTermVector(docId, FIELD_CONTENT);
		TermsEnum termsEnum = null;
		termsEnum = vector.iterator();
		BytesRef text = null;
		while ((text = termsEnum.next()) != null) {
			String term = text.utf8ToString();
			allTerms.put(term, pos++);
		}
	}

	// Update postition
	pos = 0;
	for (Entry<String, Integer> s : allTerms.entrySet()) {
		s.setValue(pos++);
	}
	return allTerms;
}
 
Example 12
Source Project: scava   Source File: ReadmeSimilarityCalculator.java    License: Eclipse Public License 2.0 6 votes vote down vote up
private DocVector[] getDocumentVectors() throws IOException {
	DocVector[] docVector = new DocVector[getTotalDocumentInIndex()];
	for (int docId = 0; docId < getTotalDocumentInIndex(); docId++) {
		Terms vector = getIndexReader().getTermVector(docId, FIELD_CONTENT);
		TermsEnum termsEnum = null;
		termsEnum = vector.iterator();
		BytesRef text = null;
		docVector[docId] = new DocVector(getAllTerms());
		while ((text = termsEnum.next()) != null) {
			String term = text.utf8ToString();
			int freq = (int) termsEnum.totalTermFreq();
			docVector[docId].setEntry(term, freq);
		}
		docVector[docId].normalize();
	}
	getIndexReader().close();
	return docVector;
}
 
Example 13
Source Project: lucene4ir   Source File: QERetrievalApp.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Combines the individual term vectors of each document into a single list.
 * @param terms
 * @return
 */
public HashMap<String, QETerm> combineTerms(Vector<Terms> terms){
    HashMap<String, QETerm> combinedTerms = new HashMap<String, QETerm>();
    int numDocs = terms.size();
    for(Terms ts : terms){
        try {
            TermsEnum te = ts.iterator();
            BytesRef term;
            while ((term = te.next()) != null) {
                String tString = term.utf8ToString();
                QETerm qet = new QETerm(tString, te.totalTermFreq(),te.docFreq(),numDocs);
                if (combinedTerms.containsKey(tString)){
                    QETerm mergedTerm = qet.combine(combinedTerms.get(tString));
                    combinedTerms.replace(tString,mergedTerm);
                }
                else
                    combinedTerms.put(tString,qet);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    return combinedTerms;
}
 
Example 14
Source Project: lucene-solr   Source File: TestLegacyTerms.java    License: Apache License 2.0 6 votes vote down vote up
public void testIntFieldMinMax() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  int numDocs = atLeast(100);
  int minValue = Integer.MAX_VALUE;
  int maxValue = Integer.MIN_VALUE;
  for(int i=0;i<numDocs;i++ ){
    Document doc = new Document();
    int num = random().nextInt();
    minValue = Math.min(num, minValue);
    maxValue = Math.max(num, maxValue);
    doc.add(new LegacyIntField("field", num, Field.Store.NO));
    w.addDocument(doc);
  }
  
  IndexReader r = w.getReader();
  Terms terms = MultiTerms.getTerms(r, "field");
  assertEquals(Integer.valueOf(minValue), LegacyNumericUtils.getMinInt(terms));
  assertEquals(Integer.valueOf(maxValue), LegacyNumericUtils.getMaxInt(terms));

  r.close();
  w.close();
  dir.close();
}
 
Example 15
Source Project: lucene-solr   Source File: TokenSources.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * A convenience method that tries a number of approaches to getting a token
 * stream. The cost of finding there are no termVectors in the index is
 * minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
 * approach to coding is probably acceptable
 * 
 * @return null if field not stored correctly
 * @throws IOException If there is a low-level I/O error
 */
@Deprecated // maintenance reasons LUCENE-6445
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
    String field, Analyzer analyzer) throws IOException {
  TokenStream ts = null;

  Fields vectors = reader.getTermVectors(docId);
  if (vectors != null) {
    Terms vector = vectors.terms(field);
    if (vector != null) {
      ts = getTokenStream(vector);
    }
  }

  // No token info stored so fall back to analyzing raw content
  if (ts == null) {
    ts = getTokenStream(reader, docId, field, analyzer);
  }
  return ts;
}
 
Example 16
public void testNoOrds() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  doc.add(new Field("foo", "this is a test", ft));
  iw.addDocument(doc);
  LeafReader ir = getOnlyLeafReader(iw.getReader());
  Terms terms = ir.getTermVector(0, "foo");
  assertNotNull(terms);
  TermsEnum termsEnum = terms.iterator();
  assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("this")));

  expectThrows(UnsupportedOperationException.class, termsEnum::ord);
  expectThrows(UnsupportedOperationException.class, () -> termsEnum.seekExact(0));

  ir.close();
  iw.close();
  dir.close();
}
 
Example 17
Source Project: lucene-solr   Source File: TermVectorOffsetStrategy.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public OffsetsEnum getOffsetsEnum(LeafReader reader, int docId, String content) throws IOException {
  Terms tvTerms = reader.getTermVector(docId, getField());
  if (tvTerms == null) {
    return OffsetsEnum.EMPTY;
  }

  LeafReader singleDocReader = new TermVectorLeafReader(getField(), tvTerms);
  return createOffsetsEnumFromReader(
      new OverlaySingleDocTermsLeafReader(
          reader,
          singleDocReader,
          getField(),
          docId),
      docId);
}
 
Example 18
Source Project: lucene-solr   Source File: TestLegacyTerms.java    License: Apache License 2.0 6 votes vote down vote up
public void testFloatFieldMinMax() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  int numDocs = atLeast(100);
  float minValue = Float.POSITIVE_INFINITY;
  float maxValue = Float.NEGATIVE_INFINITY;
  for(int i=0;i<numDocs;i++ ){
    Document doc = new Document();
    float num = random().nextFloat();
    minValue = Math.min(num, minValue);
    maxValue = Math.max(num, maxValue);
    doc.add(new LegacyFloatField("field", num, Field.Store.NO));
    w.addDocument(doc);
  }
  
  IndexReader r = w.getReader();
  Terms terms = MultiTerms.getTerms(r, "field");
  assertEquals(minValue, NumericUtils.sortableIntToFloat(LegacyNumericUtils.getMinInt(terms)), 0.0f);
  assertEquals(maxValue, NumericUtils.sortableIntToFloat(LegacyNumericUtils.getMaxInt(terms)), 0.0f);

  r.close();
  w.close();
  dir.close();
}
 
Example 19
Source Project: lucene-solr   Source File: IndexToolsImpl.java    License: Apache License 2.0 6 votes vote down vote up
public String exportTerms(String destDir, String field, String delimiter) {
  String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
  Path path = Paths.get(destDir, filename);
  try {
    Terms terms = MultiTerms.getTerms(reader, field);
    if (terms == null) {
      throw new LukeException(String.format(Locale.US, "Field %s does not contain any terms to be exported", field));
    }
    try (BufferedWriter writer = Files.newBufferedWriter(path, Charset.forName("UTF-8"))) {
      TermsEnum termsEnum = terms.iterator();
      BytesRef term;
      while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next()) != null) {
        writer.write(String.format(Locale.US, "%s%s%d\n", term.utf8ToString(), delimiter, +termsEnum.docFreq()));
      }
      return path.toString();
    }
  } catch (IOException e) {
    throw new LukeException("Terms file export for field [" + field + "] to file [" + filename + "] has failed.", e);
  }
}
 
Example 20
Source Project: lucene-solr   Source File: BlockTermsWriter.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void write(Fields fields, NormsProducer norms) throws IOException {

  for(String field : fields) {

    Terms terms = fields.terms(field);
    if (terms == null) {
      continue;
    }

    TermsEnum termsEnum = terms.iterator();

    TermsWriter termsWriter = addField(fieldInfos.fieldInfo(field));

    while (true) {
      BytesRef term = termsEnum.next();
      if (term == null) {
        break;
      }

      termsWriter.write(term, termsEnum, norms);
    }

    termsWriter.finish();
  }
}
 
Example 21
Source Project: lucene-solr   Source File: TestLegacyTerms.java    License: Apache License 2.0 6 votes vote down vote up
public void testLongFieldMinMax() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  int numDocs = atLeast(100);
  long minValue = Long.MAX_VALUE;
  long maxValue = Long.MIN_VALUE;
  for(int i=0;i<numDocs;i++ ){
    Document doc = new Document();
    long num = random().nextLong();
    minValue = Math.min(num, minValue);
    maxValue = Math.max(num, maxValue);
    doc.add(new LegacyLongField("field", num, Field.Store.NO));
    w.addDocument(doc);
  }
  
  IndexReader r = w.getReader();

  Terms terms = MultiTerms.getTerms(r, "field");
  assertEquals(Long.valueOf(minValue), LegacyNumericUtils.getMinLong(terms));
  assertEquals(Long.valueOf(maxValue), LegacyNumericUtils.getMaxLong(terms));

  r.close();
  w.close();
  dir.close();
}
 
Example 22
Source Project: lucene-solr   Source File: GraphTermsQParserPlugin.java    License: Apache License 2.0 5 votes vote down vote up
private void collectTermStates(IndexReader reader,
                               List<LeafReaderContext> leaves,
                               TermStates[] contextArray,
                               Term[] queryTerms) throws IOException {
  TermsEnum termsEnum = null;
  for (LeafReaderContext context : leaves) {

    Terms terms = context.reader().terms(this.field);
    if (terms == null) {
      // field does not exist
      continue;
    }

    termsEnum = terms.iterator();

    if (termsEnum == TermsEnum.EMPTY) continue;

    for (int i = 0; i < queryTerms.length; i++) {
      Term term = queryTerms[i];
      TermStates termStates = contextArray[i];

      if (termsEnum.seekExact(term.bytes())) {
        if (termStates == null) {
          contextArray[i] = new TermStates(reader.getContext(),
              termsEnum.termState(), context.ord, termsEnum.docFreq(),
              termsEnum.totalTermFreq());
        } else {
          termStates.register(termsEnum.termState(), context.ord,
              termsEnum.docFreq(), termsEnum.totalTermFreq());
        }
      }
    }
  }
}
 
Example 23
Source Project: lucene-solr   Source File: QueryIndex.java    License: Apache License 2.0 5 votes vote down vote up
QueryTermFilter(IndexReader reader) throws IOException {
  for (LeafReaderContext ctx : reader.leaves()) {
    for (FieldInfo fi : ctx.reader().getFieldInfos()) {
      BytesRefHash terms = termsHash.computeIfAbsent(fi.name, f -> new BytesRefHash());
      Terms t = ctx.reader().terms(fi.name);
      if (t != null) {
        TermsEnum te = t.iterator();
        BytesRef term;
        while ((term = te.next()) != null) {
          terms.add(term);
        }
      }
    }
  }
}
 
Example 24
Source Project: lucene-solr   Source File: DocToDoubleVectorUtilsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSparseFreqDoubleArrayConversion() throws Exception {
  Terms fieldTerms = MultiTerms.getTerms(index, "text");
  if (fieldTerms != null && fieldTerms.size() != -1) {
    IndexSearcher indexSearcher = new IndexSearcher(index);
    for (ScoreDoc scoreDoc : indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE).scoreDocs) {
      Terms docTerms = index.getTermVector(scoreDoc.doc, "text");
      Double[] vector = DocToDoubleVectorUtils.toSparseLocalFreqDoubleArray(docTerms, fieldTerms);
      assertNotNull(vector);
      assertTrue(vector.length > 0);
    }
  }
}
 
Example 25
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Find words for a more-like-this query former.
 *
 * @param docNum the id of the lucene document from which to find terms
 */
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
  Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
  for (String fieldName : fieldNames) {
    final Fields vectors = ir.getTermVectors(docNum);
    final Terms vector;
    if (vectors != null) {
      vector = vectors.terms(fieldName);
    } else {
      vector = null;
    }

    // field does not store term vector info
    if (vector == null) {
      Document d = ir.document(docNum);
      IndexableField[] fields = d.getFields(fieldName);
      for (IndexableField field : fields) {
        final String stringValue = field.stringValue();
        if (stringValue != null) {
          addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName);
        }
      }
    } else {
      addTermFrequencies(field2termFreqMap, vector, fieldName);
    }
  }

  return createQueue(field2termFreqMap);
}
 
Example 26
Source Project: Elasticsearch   Source File: WordScorer.java    License: Apache License 2.0 5 votes vote down vote up
public WordScorer(IndexReader reader, Terms terms, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
    this.field = field;
    if (terms == null) {
        throw new IllegalArgumentException("Field: [" + field + "] does not exist");
    }
    this.terms = terms;
    final long vocSize = terms.getSumTotalTermFreq();
    this.vocabluarySize =  vocSize == -1 ? reader.maxDoc() : vocSize;
    this.useTotalTermFreq = vocSize != -1;
    this.numTerms = terms.size();
    this.termsEnum = new FreqTermsEnum(reader, field, !useTotalTermFreq, useTotalTermFreq, null, BigArrays.NON_RECYCLING_INSTANCE); // non recycling for now
    this.reader = reader;
    this.realWordLikelyhood = realWordLikelyHood;
    this.separator = separator;
}
 
Example 27
Source Project: mtas   Source File: MtasSpanSequenceQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public MtasSpans getSpans(LeafReaderContext context,
    Postings requiredPostings) throws IOException {
  if (field == null) {
    return null;
  } else {
    Terms terms = context.reader().terms(field);
    if (terms == null) {
      return null; // field does not exist
    }
    List<MtasSpanSequenceQuerySpans> setSequenceSpans = new ArrayList<>(
        items.size());
    Spans ignoreSpans = null;
    boolean allSpansEmpty = true;
    for (MtasSpanSequenceQueryWeight w : subWeights) {
      Spans sequenceSpans = w.spanWeight.getSpans(context,
          requiredPostings);
      if (sequenceSpans != null) {
        setSequenceSpans.add(new MtasSpanSequenceQuerySpans(
            MtasSpanSequenceQuery.this, sequenceSpans, w.optional));
        allSpansEmpty = false;
      } else {
        if (w.optional) {
          setSequenceSpans.add(new MtasSpanSequenceQuerySpans(
              MtasSpanSequenceQuery.this, null, w.optional));
        } else {
          return null;
        }
      }
    }
    if (allSpansEmpty) {
      return null; // at least one required
    } else if (ignoreWeight != null) {
      ignoreSpans = ignoreWeight.getSpans(context, requiredPostings);
    }
    return new MtasSpanSequenceSpans(MtasSpanSequenceQuery.this,
        setSequenceSpans, ignoreSpans, maximumIgnoreLength);
  }
}
 
Example 28
Source Project: lucene-solr   Source File: PerFieldMergeState.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Terms terms(String field) throws IOException {
  if (!filtered.contains(field)) {
    throw new IllegalArgumentException("The field named '" + field + "' is not accessible in the current " +
        "merge context, available ones are: " + filtered);
  }
  return in.terms(field);
}
 
Example 29
Source Project: Elasticsearch   Source File: ShortFieldMapper.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public FieldStats stats(Terms terms, int maxDoc) throws IOException {
    long minValue = NumericUtils.getMinInt(terms);
    long maxValue = NumericUtils.getMaxInt(terms);
    return new FieldStats.Long(
        maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue
    );
}
 
Example 30
Source Project: incubator-retired-blur   Source File: BlurUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static void applyFamily(OpenBitSet bits, String family, AtomicReader atomicReader, int primeDocRowId,
    int numberOfDocsInRow, Bits liveDocs) throws IOException {
  Fields fields = atomicReader.fields();
  Terms terms = fields.terms(BlurConstants.FAMILY);
  TermsEnum iterator = terms.iterator(null);
  BytesRef text = new BytesRef(family);
  int lastDocId = primeDocRowId + numberOfDocsInRow;
  if (iterator.seekExact(text, true)) {
    DocsEnum docs = iterator.docs(liveDocs, null, DocsEnum.FLAG_NONE);
    int doc = primeDocRowId;
    while ((doc = docs.advance(doc)) < lastDocId) {
      bits.set(doc - primeDocRowId);
    }
  }
}