Java Code Examples for org.apache.lucene.index.MultiFields#getTerms()

The following examples show how to use org.apache.lucene.index.MultiFields#getTerms() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TransportFieldStatsTransportAction.java    From Elasticsearch with Apache License 2.0 7 votes vote down vote up
@Override
protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) {
    ShardId shardId = request.shardId();
    Map<String, FieldStats> fieldStats = new HashMap<>();
    IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex());
    MapperService mapperService = indexServices.mapperService();
    IndexShard shard = indexServices.shardSafe(shardId.id());
    try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) {
        for (String field : request.getFields()) {
            MappedFieldType fieldType = mapperService.fullName(field);
            if (fieldType != null) {
                IndexReader reader = searcher.reader();
                Terms terms = MultiFields.getTerms(reader, field);
                if (terms != null) {
                    fieldStats.put(field, fieldType.stats(terms, reader.maxDoc()));
                }
            } else {
                throw new IllegalArgumentException("field [" + field + "] doesn't exist");
            }
        }
    } catch (IOException e) {
        throw ExceptionsHelper.convertToElastic(e);
    }
    return new FieldStatsShardResponse(shardId, fieldStats);
}
 
Example 2
Source File: LuceneIndexCorpus.java    From word2vec-lucene with Apache License 2.0 6 votes vote down vote up
@Override
public void learnVocab() throws IOException {
  super.learnVocab();

  final String field = ((LuceneIndexConfig)config).getField();
  final Terms terms = MultiFields.getTerms(reader, field);
  final BytesRef maxTerm = terms.getMax();
  final BytesRef minTerm = terms.getMin();
  Query q = new TermRangeQuery(field, minTerm, maxTerm, true, true);
  IndexSearcher searcher = new IndexSearcher(reader);
  topDocs = searcher.search(q, Integer.MAX_VALUE);

  TermsEnum termsEnum = null;
  termsEnum = terms.iterator(termsEnum);

  termsEnum.seekCeil(new BytesRef());
  BytesRef term = termsEnum.term();
  while(term != null){
    int p = addWordToVocab(term.utf8ToString());
    vocab[p].setCn((int)termsEnum.totalTermFreq());
    term = termsEnum.next();
  }
}
 
Example 3
Source File: WordScorer.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public WordScorer(IndexReader reader, String field, double realWordLikelyHood, BytesRef separator) throws IOException {
    this(reader, MultiFields.getTerms(reader, field), field, realWordLikelyHood, separator);
}
 
Example 4
Source File: AlfrescoLukeRequestHandler.java    From SearchServices with GNU Lesser General Public License v3.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
private static void getDetailedFieldInfo(SolrQueryRequest req,
		String field, SimpleOrderedMap<Object> fieldMap) throws IOException {

	SolrParams params = req.getParams();
	final int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT);

	TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to
														// collect the top N
														// terms in.

	final CharsRefBuilder spare = new CharsRefBuilder();

	Terms terms = MultiFields.getTerms(req.getSearcher().getIndexReader(),
			field);
	if (terms == null) { // field does not exist
		return;
	}
	TermsEnum termsEnum = terms.iterator();
	BytesRef text;
	int[] buckets = new int[HIST_ARRAY_SIZE];
	while ((text = termsEnum.next()) != null) {
		++tiq.distinctTerms;
		int freq = termsEnum.docFreq(); // This calculation seems odd, but
										// it gives the same results as it
										// used to.
		int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
		buckets[slot] = buckets[slot] + 1;
		if (numTerms > 0 && freq > tiq.minFreq) {
			spare.copyUTF8Bytes(text);
			String t = spare.toString();

			tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum
					.docFreq()));
			if (tiq.size() > numTerms) { // if tiq full
				tiq.pop(); // remove lowest in tiq
				tiq.minFreq = tiq.getTopTermInfo().docFreq;
			}
		}
	}
	tiq.histogram.add(buckets);
	fieldMap.add("distinct", tiq.distinctTerms);

	// Include top terms
	fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema()));

	// Add a histogram
	fieldMap.add("histogram", tiq.histogram.toNamedList());
}