Java Code Examples for org.apache.lucene.util.PriorityQueue

The following examples show how to use org.apache.lucene.util.PriorityQueue. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Elasticsearch   Source File: CandidateScorer.java    License: Apache License 2.0 6 votes vote down vote up
private void updateTop(CandidateSet[] candidates, Candidate[] path, PriorityQueue<Correction> corrections, double cutoffScore, double score)
        throws IOException {
    score = Math.exp(score);
    assert Math.abs(score - score(path, candidates)) < 0.00001;
    if (score > cutoffScore) {
        if (corrections.size() < maxNumCorrections) {
            Candidate[] c = new Candidate[candidates.length];
            System.arraycopy(path, 0, c, 0, path.length);
            corrections.add(new Correction(score, c));
        } else if (corrections.top().compareTo(score, path) < 0) {
            Correction top = corrections.top();
            System.arraycopy(path, 0, top.candidates, 0, path.length);
            top.score = score;
            corrections.updateTop();
        }
    }
}
 
Example 2
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Add to an existing boolean query the More Like This query from this PriorityQueue
 */
private void addToQuery(PriorityQueue<ScoreTerm> q, BooleanQuery query) {
    ScoreTerm scoreTerm;
    float bestScore = -1;

    while ((scoreTerm = q.pop()) != null) {
        TermQuery tq = new TermQuery(new Term(scoreTerm.topField, scoreTerm.word));

        if (boost) {
            if (bestScore == -1) {
                bestScore = (scoreTerm.score);
            }
            float myScore = (scoreTerm.score);
            tq.setBoost(boostFactor * myScore / bestScore);
        }

        try {
            query.add(tq, BooleanClause.Occur.SHOULD);
        }
        catch (BooleanQuery.TooManyClauses ignore) {
            break;
        }
    }
}
 
Example 3
MinimumShouldMatchIntervalIterator(Collection<IntervalIterator> subs, int minShouldMatch) {
  this.disiQueue = new DisiPriorityQueue(subs.size());
  float mc = 0;
  for (IntervalIterator it : subs) {
    this.disiQueue.add(new DisiWrapper(it));
    mc += it.matchCost();
  }
  this.approximation = new DisjunctionDISIApproximation(disiQueue);
  this.matchCost = mc;
  this.minShouldMatch = minShouldMatch;

  this.proximityQueue = new PriorityQueue<IntervalIterator>(minShouldMatch) {
    @Override
    protected boolean lessThan(IntervalIterator a, IntervalIterator b) {
      return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end());
    }
  };
  this.backgroundQueue = new PriorityQueue<IntervalIterator>(subs.size()) {
    @Override
    protected boolean lessThan(IntervalIterator a, IntervalIterator b) {
      return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start());
    }
  };
}
 
Example 4
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 6 votes vote down vote up
private PriorityQueue<ScoreTerm> retrieveTerms(Map<String, Collection<Object>> field2fieldValues) throws
    IOException {
  Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
  for (String fieldName : fieldNames) {
    Collection<Object> fieldValues = field2fieldValues.get(fieldName);
    if (fieldValues == null)
      continue;
    for (Object fieldValue : fieldValues) {
      if (fieldValue != null) {
        addTermFrequencies(new StringReader(String.valueOf(fieldValue)), field2termFreqMap,
            fieldName);
      }
    }
  }
  return createQueue(field2termFreqMap);
}
 
Example 5
Source Project: lucene-solr   Source File: MinShouldMatchSumScorer.java    License: Apache License 2.0 6 votes vote down vote up
static long cost(LongStream costs, int numScorers, int minShouldMatch) {
  // the idea here is the following: a boolean query c1,c2,...cn with minShouldMatch=m
  // could be rewritten to:
  // (c1 AND (c2..cn|msm=m-1)) OR (!c1 AND (c2..cn|msm=m))
  // if we assume that clauses come in ascending cost, then
  // the cost of the first part is the cost of c1 (because the cost of a conjunction is
  // the cost of the least costly clause)
  // the cost of the second part is the cost of finding m matches among the c2...cn
  // remaining clauses
  // since it is a disjunction overall, the total cost is the sum of the costs of these
  // two parts

  // If we recurse infinitely, we find out that the cost of a msm query is the sum of the
  // costs of the num_scorers - minShouldMatch + 1 least costly scorers
  final PriorityQueue<Long> pq = new PriorityQueue<Long>(numScorers - minShouldMatch + 1) {
    @Override
    protected boolean lessThan(Long a, Long b) {
      return a > b;
    }
  };
  costs.forEach(pq::insertWithOverflow);
  return StreamSupport.stream(pq.spliterator(), false).mapToLong(Number::longValue).sum();
}
 
Example 6
Source Project: Elasticsearch   Source File: CandidateScorer.java    License: Apache License 2.0 5 votes vote down vote up
public void findCandidates(CandidateSet[] candidates, Candidate[] path, int ord, int numMissspellingsLeft,
        PriorityQueue<Correction> corrections, double cutoffScore, final double pathScore) throws IOException {
    CandidateSet current = candidates[ord];
    if (ord == candidates.length - 1) {
        path[ord] = current.originalTerm;
        updateTop(candidates, path, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
        if (numMissspellingsLeft > 0) {
            for (int i = 0; i < current.candidates.length; i++) {
                path[ord] = current.candidates[i];
                updateTop(candidates, path, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
            }
        }
    } else {
        if (numMissspellingsLeft > 0) {
            path[ord] = current.originalTerm;
            findCandidates(candidates, path, ord + 1, numMissspellingsLeft, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
            for (int i = 0; i < current.candidates.length; i++) {
                path[ord] = current.candidates[i];
                findCandidates(candidates, path, ord + 1, numMissspellingsLeft - 1, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
            }
        } else {
            path[ord] = current.originalTerm;
            findCandidates(candidates, path, ord + 1, 0, corrections, cutoffScore, pathScore + scorer.score(path, candidates, ord, gramSize));
        }
    }

}
 
Example 7
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Find words for a more-like-this query former.
 *
 * @param docNum the id of the lucene document from which to find terms
 */
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
    Map<String, Int> termFreqMap = new HashMap<>();
    for (String fieldName : fieldNames) {
        final Fields vectors = ir.getTermVectors(docNum);
        final Terms vector;
        if (vectors != null) {
            vector = vectors.terms(fieldName);
        } else {
            vector = null;
        }

        // field does not store term vector info
        if (vector == null) {
            Document d = ir.document(docNum);
            IndexableField fields[] = d.getFields(fieldName);
            for (IndexableField field : fields) {
                final String stringValue = field.stringValue();
                if (stringValue != null) {
                    addTermFrequencies(new FastStringReader(stringValue), termFreqMap, fieldName);
                }
            }
        } else {
            addTermFrequencies(termFreqMap, vector, fieldName);
        }
    }

    return createQueue(termFreqMap);
}
 
Example 8
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @see #retrieveInterestingTerms(java.io.Reader, String)
 */
public String[] retrieveInterestingTerms(int docNum) throws IOException {
    ArrayList<Object> al = new ArrayList<>(maxQueryTerms);
    PriorityQueue<ScoreTerm> pq = retrieveTerms(docNum);
    ScoreTerm scoreTerm;
    int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    // we just want to return the top words
    while (((scoreTerm = pq.pop()) != null) && lim-- > 0) {
        al.add(scoreTerm.word); // the 1st entry is the interesting word
    }
    String[] res = new String[al.size()];
    return al.toArray(res);
}
 
Example 9
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Convenience routine to make it easy to return the most interesting words in a document.
 * More advanced users will call {@link #retrieveTerms(Reader, String) retrieveTerms()} directly.
 *
 * @param r the source document
 * @param fieldName field passed to analyzer to use when analyzing the content
 * @return the most interesting words in the document
 * @see #retrieveTerms(java.io.Reader, String)
 * @see #setMaxQueryTerms
 */
public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException {
    ArrayList<Object> al = new ArrayList<>(maxQueryTerms);
    PriorityQueue<ScoreTerm> pq = retrieveTerms(r, fieldName);
    ScoreTerm scoreTerm;
    int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
    // we just want to return the top words
    while (((scoreTerm = pq.pop()) != null) && lim-- > 0) {
        al.add(scoreTerm.word); // the 1st entry is the interesting word
    }
    String[] res = new String[al.size()];
    return al.toArray(res);
}
 
Example 10
Source Project: lucene-solr   Source File: QualityQueriesFinder.java    License: Apache License 2.0 5 votes vote down vote up
private String [] bestTerms(String field,int numTerms) throws IOException {
  PriorityQueue<TermDf> pq = new TermsDfQueue(numTerms);
  IndexReader ir = DirectoryReader.open(dir);
  try {
    int threshold = ir.maxDoc() / 10; // ignore words too common.
    Terms terms = MultiTerms.getTerms(ir, field);
    if (terms != null) {
      TermsEnum termsEnum = terms.iterator();
      while (termsEnum.next() != null) {
        int df = termsEnum.docFreq();
        if (df<threshold) {
          String ttxt = termsEnum.term().utf8ToString();
          pq.insertWithOverflow(new TermDf(ttxt,df));
        }
      }
    }
  } finally {
    ir.close();
  }
  String res[] = new String[pq.size()];
  int i = 0;
  while (pq.size()>0) {
    TermDf tdf = pq.pop(); 
    res[i++] = tdf.word;
    System.out.println(i+".   word:  "+tdf.df+"   "+tdf.word);
  }
  return res;
}
 
Example 11
Source Project: lucene-solr   Source File: UnorderedIntervalsSource.java    License: Apache License 2.0 5 votes vote down vote up
UnorderedIntervalIterator(List<IntervalIterator> subIterators) {
  super(subIterators);
  this.queue = new PriorityQueue<IntervalIterator>(subIterators.size()) {
    @Override
    protected boolean lessThan(IntervalIterator a, IntervalIterator b) {
      return a.start() < b.start() || (a.start() == b.start() && a.end() >= b.end());
    }
  };
  this.subIterators = new IntervalIterator[subIterators.size()];

  for (int i = 0; i < subIterators.size(); i++) {
    this.subIterators[i] = subIterators.get(i);
  }
}
 
Example 12
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Find words for a more-like-this query former.
 *
 * @param docNum the id of the lucene document from which to find terms
 */
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
  Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
  for (String fieldName : fieldNames) {
    final Fields vectors = ir.getTermVectors(docNum);
    final Terms vector;
    if (vectors != null) {
      vector = vectors.terms(fieldName);
    } else {
      vector = null;
    }

    // field does not store term vector info
    if (vector == null) {
      Document d = ir.document(docNum);
      IndexableField[] fields = d.getFields(fieldName);
      for (IndexableField field : fields) {
        final String stringValue = field.stringValue();
        if (stringValue != null) {
          addTermFrequencies(new StringReader(stringValue), field2termFreqMap, fieldName);
        }
      }
    } else {
      addTermFrequencies(field2termFreqMap, vector, fieldName);
    }
  }

  return createQueue(field2termFreqMap);
}
 
Example 13
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @see #retrieveInterestingTerms(java.io.Reader, String)
 */
public String[] retrieveInterestingTerms(int docNum) throws IOException {
  ArrayList<String> al = new ArrayList<>(maxQueryTerms);
  PriorityQueue<ScoreTerm> pq = retrieveTerms(docNum);
  ScoreTerm scoreTerm;
  int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
  // we just want to return the top words
  while (((scoreTerm = pq.pop()) != null) && lim-- > 0) {
    al.add(scoreTerm.word); // the 1st entry is the interesting word
  }
  String[] res = new String[al.size()];
  return al.toArray(res);
}
 
Example 14
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Convenience routine to make it easy to return the most interesting words in a document.
 * More advanced users will call {@link #retrieveTerms(Reader, String) retrieveTerms()} directly.
 *
 * @param r the source document
 * @param fieldName field passed to analyzer to use when analyzing the content
 * @return the most interesting words in the document
 * @see #retrieveTerms(java.io.Reader, String)
 * @see #setMaxQueryTerms
 */
public String[] retrieveInterestingTerms(Reader r, String fieldName) throws IOException {
  ArrayList<String> al = new ArrayList<>(maxQueryTerms);
  PriorityQueue<ScoreTerm> pq = retrieveTerms(r, fieldName);
  ScoreTerm scoreTerm;
  int lim = maxQueryTerms; // have to be careful, retrieveTerms returns all words but that's probably not useful to our caller...
  // we just want to return the top words
  while (((scoreTerm = pq.pop()) != null) && lim-- > 0) {
    al.add(scoreTerm.word); // the 1st entry is the interesting word
  }
  String[] res = new String[al.size()];
  return al.toArray(res);
}
 
Example 15
Source Project: lucene-solr   Source File: CommonTermsQueryTest.java    License: Apache License 2.0 5 votes vote down vote up
private static List<TermAndFreq> queueToList(PriorityQueue<TermAndFreq> queue) {
  List<TermAndFreq> terms = new ArrayList<>();
  while (queue.size() > 0) {
    terms.add(queue.pop());
  }
  return terms;
}
 
Example 16
Source Project: lucene-solr   Source File: DisjunctionScorer.java    License: Apache License 2.0 5 votes vote down vote up
private TwoPhase(DocIdSetIterator approximation, float matchCost) {
  super(approximation);
  this.matchCost = matchCost;
  unverifiedMatches = new PriorityQueue<DisiWrapper>(DisjunctionScorer.this.subScorers.size()) {
    @Override
    protected boolean lessThan(DisiWrapper a, DisiWrapper b) {
      return a.matchCost < b.matchCost;
    }
  };
}
 
Example 17
Source Project: lucene-solr   Source File: MultiPhraseQuery.java    License: Apache License 2.0 5 votes vote down vote up
UnionFullPostingsEnum(List<PostingsEnum> subs) {
  super(subs);
  this.posQueue = new PriorityQueue<PostingsAndPosition>(subs.size()) {
    @Override
    protected boolean lessThan(PostingsAndPosition a, PostingsAndPosition b) {
      return a.pos < b.pos;
    }
  };
  this.subs = new ArrayList<>();
  for (PostingsEnum pe : subs) {
    this.subs.add(new PostingsAndPosition(pe));
  }
}
 
Example 18
Source Project: lucene-solr   Source File: DisjunctionMatchesIterator.java    License: Apache License 2.0 5 votes vote down vote up
private DisjunctionMatchesIterator(List<MatchesIterator> matches) throws IOException {
  queue = new PriorityQueue<MatchesIterator>(matches.size()){
    @Override
    protected boolean lessThan(MatchesIterator a, MatchesIterator b) {
      return a.startPosition() < b.startPosition() ||
          (a.startPosition() == b.startPosition() && a.endPosition() < b.endPosition()) ||
          (a.startPosition() == b.startPosition() && a.endPosition() == b.endPosition());
    }
  };
  for (MatchesIterator mi : matches) {
    if (mi.next()) {
      queue.add(mi);
    }
  }
}
 
Example 19
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Create the More like query from a PriorityQueue
 */
private Query createQuery(PriorityQueue<ScoreTerm> q) {
    BooleanQuery query = new BooleanQuery();
    addToQuery(q, query);
    return query;
}
 
Example 20
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Create a PriorityQueue from a word-&gt;tf map.
 *
 * @param words a map of words keyed on the word(String) with Int objects as the values.
 * @param fieldNames an array of field names to override defaults.
 */
private PriorityQueue<ScoreTerm> createQueue(Map<String, Int> words, String... fieldNames) throws IOException {
    // have collected all words in doc and their freqs
    int numDocs = ir.numDocs();
    final int limit = Math.min(maxQueryTerms, words.size());
    FreqQ queue = new FreqQ(limit); // will order words by score

    for (String word : words.keySet()) { // for every word
        int tf = words.get(word).x; // term freq in the source doc
        if (minTermFreq > 0 && tf < minTermFreq) {
            continue; // filter out words that don't occur enough times in the source
        }

        // go through all the fields and find the largest document frequency
        String topField = fieldNames[0];
        int docFreq = 0;
        for (String fieldName : fieldNames) {
            int freq = ir.docFreq(new Term(fieldName, word));
            topField = (freq > docFreq) ? fieldName : topField;
            docFreq = (freq > docFreq) ? freq : docFreq;
        }

        if (minDocFreq > 0 && docFreq < minDocFreq) {
            continue; // filter out words that don't occur in enough docs
        }

        if (docFreq > maxDocFreq) {
            continue; // filter out words that occur in too many docs
        }

        if (docFreq == 0) {
            continue; // index update problem?
        }

        float idf = similarity.idf(docFreq, numDocs);
        float score = tf * idf;

        if (queue.size() < limit) {
            // there is still space in the queue
            queue.add(new ScoreTerm(word, topField, score, idf, docFreq, tf));
        } else {
            ScoreTerm term = queue.top();
            if (term.score < score) { // update the smallest in the queue in place and update the queue.
                term.update(word, topField, score, idf, docFreq, tf);
                queue.updateTop();
            }
        }
    }
    return queue;
}
 
Example 21
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Create a PriorityQueue from a word-&gt;tf map.
 *
 * @param perFieldTermFrequencies a per field map of words keyed on the word(String) with Int objects as the values.
 */
private PriorityQueue<ScoreTerm> createQueue(Map<String, Map<String, Int>> perFieldTermFrequencies) throws IOException {
  // have collected all words in doc and their freqs
  final int limit = Math.min(maxQueryTerms, this.getTermsCount(perFieldTermFrequencies));
  FreqQ queue = new FreqQ(limit); // will order words by score
  for (Map.Entry<String, Map<String, Int>> entry : perFieldTermFrequencies.entrySet()) {
    Map<String, Int> perWordTermFrequencies = entry.getValue();
    String fieldName = entry.getKey();

    long numDocs = ir.getDocCount(fieldName);
    if(numDocs == -1) {
      numDocs = ir.numDocs();
    }

    for (Map.Entry<String, Int> tfEntry : perWordTermFrequencies.entrySet()) { // for every word
      String word = tfEntry.getKey();
      int tf = tfEntry.getValue().x; // term freq in the source doc
      if (minTermFreq > 0 && tf < minTermFreq) {
        continue; // filter out words that don't occur enough times in the source
      }

      int docFreq = ir.docFreq(new Term(fieldName, word));

      if (minDocFreq > 0 && docFreq < minDocFreq) {
        continue; // filter out words that don't occur in enough docs
      }

      if (docFreq > maxDocFreq) {
        continue; // filter out words that occur in too many docs
      }

      if (docFreq == 0) {
        continue; // index update problem?
      }

      float idf = similarity.idf(docFreq, numDocs);
      float score = tf * idf;

      if (queue.size() < limit) {
        // there is still space in the queue
        queue.add(new ScoreTerm(word, fieldName, score, idf, docFreq, tf));
      } else {
        ScoreTerm term = queue.top();
        if (term.score < score) { // update the smallest in the queue in place and update the queue.
          term.update(word, fieldName, score, idf, docFreq, tf);
          queue.updateTop();
        }
      }
    }
  }
  return queue;
}
 
Example 22
Source Project: lucene-solr   Source File: TopDocsCollector.java    License: Apache License 2.0 4 votes vote down vote up
protected TopDocsCollector(PriorityQueue<T> pq) {
  this.pq = pq;
}
 
Example 23
Source Project: lucene-solr   Source File: JustCompileSearch.java    License: Apache License 2.0 4 votes vote down vote up
protected JustCompileTopDocsCollector(PriorityQueue<ScoreDoc> pq) {
  super(pq);
}
 
Example 24
Source Project: lucene-solr   Source File: RankQueryTestPlugin.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({"unchecked"})
public TestCollector(@SuppressWarnings({"rawtypes"})PriorityQueue pq) {
  super(pq);
}
 
Example 25
Source Project: lucene-solr   Source File: RankQueryTestPlugin.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings({"unchecked"})
public TestCollector1(@SuppressWarnings({"rawtypes"})PriorityQueue pq) {
  super(pq);
}
 
Example 26
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Create a PriorityQueue from a word-&gt;tf map.
 *
 * @param words a map of words keyed on the word(String) with Int objects as the values.
 */
private PriorityQueue<ScoreTerm> createQueue(Map<String, Int> words) throws IOException {
    return createQueue(words, this.fieldNames);
}
 
Example 27
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Find words for a more-like-this query former.
 * The result is a priority queue of arrays with one entry for <b>every word</b> in the document.
 * Each array has 6 elements.
 * The elements are:
 * <ol>
 * <li> The word (String)
 * <li> The top field that this word comes from (String)
 * <li> The score for this word (Float)
 * <li> The IDF value (Float)
 * <li> The frequency of this word in the index (Integer)
 * <li> The frequency of this word in the source document (Integer)
 * </ol>
 * This is a somewhat "advanced" routine, and in general only the 1st entry in the array is of interest.
 * This method is exposed so that you can identify the "interesting words" in a document.
 * For an easier method to call see {@link #retrieveInterestingTerms retrieveInterestingTerms()}.
 *
 * @param r the reader that has the content of the document
 * @param fieldName field passed to the analyzer to use when analyzing the content
 * @return the most interesting words in the document ordered by score, with the highest scoring, or best entry, first
 * @see #retrieveInterestingTerms
 */
private PriorityQueue<ScoreTerm> retrieveTerms(Reader r, String fieldName) throws IOException {
    Map<String, Int> words = new HashMap<>();
    addTermFrequencies(r, words, fieldName);
    return createQueue(words);
}
 
Example 28
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Find words for a more-like-this query former.
 * The result is a priority queue of arrays with one entry for <b>every word</b> in the document.
 * Each array has 6 elements.
 * The elements are:
 * <ol>
 * <li> The word (String)
 * <li> The top field that this word comes from (String)
 * <li> The score for this word (Float)
 * <li> The IDF value (Float)
 * <li> The frequency of this word in the index (Integer)
 * <li> The frequency of this word in the source document (Integer)
 * </ol>
 * This is a somewhat "advanced" routine, and in general only the 1st entry in the array is of interest.
 * This method is exposed so that you can identify the "interesting words" in a document.
 * For an easier method to call see {@link #retrieveInterestingTerms retrieveInterestingTerms()}.
 *
 * @param r the reader that has the content of the document
 * @param fieldName field passed to the analyzer to use when analyzing the content
 * @return the most interesting words in the document ordered by score, with the highest scoring, or best entry, first
 * @see #retrieveInterestingTerms
 */
private PriorityQueue<ScoreTerm> retrieveTerms(Reader r, String fieldName) throws IOException {
  Map<String, Map<String, Int>> field2termFreqMap = new HashMap<>();
  addTermFrequencies(r, field2termFreqMap, fieldName);
  return createQueue(field2termFreqMap);
}