org.apache.lucene.search.similarities.TFIDFSimilarity Java Examples

The following examples show how to use org.apache.lucene.search.similarities.TFIDFSimilarity. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NormValueSource.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
  IndexSearcher searcher = (IndexSearcher)context.get("searcher");
  final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field);
  if (similarity == null) {
    throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
  }
  // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
  // is 1 when docCount == docFreq == 1
  final SimScorer simScorer = similarity.scorer(1f,
      new CollectionStatistics(field, 1, 1, 1, 1),
      new TermStatistics(new BytesRef("bogus"), 1, 1));
  final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, readerContext.reader(), field, true);
  
  return new FloatDocValues(this) {
    int lastDocID = -1;
    @Override
    public float floatVal(int docID) throws IOException {
      if (docID < lastDocID) {
        throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
      }
      lastDocID = docID;
      return leafSimScorer.score(docID, 1f);
    }
  };
}
 
Example #2
Source File: SweetSpotSimilarityTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testHyperbolicSweetSpot() {

  SweetSpotSimilarity ss = new SweetSpotSimilarity() {
      @Override
      public float tf(float freq) {
        return hyperbolicTf(freq);
      }
    };
  ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f);
  
  TFIDFSimilarity s = ss;

  for (int i = 1; i <=1000; i++) {
    assertTrue("MIN tf: i="+i+" : s="+s.tf(i),
               3.3f <= s.tf(i));
    assertTrue("MAX tf: i="+i+" : s="+s.tf(i),
               s.tf(i) <= 7.7f);
  }
  assertEquals("MID tf", 3.3f+(7.7f - 3.3f)/2.0f, s.tf(5), 0.00001f);
  
  // stupidity
  assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
  
}
 
Example #3
Source File: TestFieldMaskingSpanQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testSpans2() throws Exception {
  assumeTrue("Broken scoring: LUCENE-3723",
      searcher.getSimilarity() instanceof TFIDFSimilarity);
  SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
  SpanQuery qA2 = new SpanTermQuery(new Term("first",  "james"));
  SpanQuery qA  = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender"));
  SpanQuery qB  = new SpanTermQuery(new Term("last",   "jones"));
  SpanQuery q   = new SpanNearQuery(new SpanQuery[]
    { new FieldMaskingSpanQuery(qA, "id"),
      new FieldMaskingSpanQuery(qB, "id") }, -1, false );
  check(q, new int[] { 0, 1, 2, 3 });

  Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
  assertNext(span, 0,0,1);
  assertNext(span, 1,1,2);
  assertNext(span, 2,0,1);
  assertNext(span, 2,2,3);
  assertNext(span, 3,0,1);
  assertFinished(span);
}
 
Example #4
Source File: LtrQueryTests.java    From elasticsearch-learning-to-rank with Apache License 2.0 6 votes vote down vote up
private void assertScoresMatch(List<PrebuiltFeature> features, float[] scores,
                               RankerQuery ltrQuery, ScoreDoc scoreDoc) throws IOException {
    Document d = searcherUnderTest.doc(scoreDoc.doc);
    String idVal = d.get("id");
    int docId = Integer.decode(idVal);
    float modelScore = scores[docId];
    float queryScore = scoreDoc.score;

    assertEquals("Scores match with similarity " + similarity.getClass(), modelScore,
            queryScore, SCORE_NB_ULP_PREC *Math.ulp(modelScore));

    if (!(similarity instanceof TFIDFSimilarity)) {
        // There are precision issues with these similarities when using explain
        // It produces 0.56103003 for feat:0 in doc1 using score() but 0.5610301 using explain
        Explanation expl = searcherUnderTest.explain(ltrQuery, docId);

        assertEquals("Explain scores match with similarity " + similarity.getClass(), expl.getValue().floatValue(),
                queryScore, 5 * Math.ulp(modelScore));
        checkFeatureNames(expl, features);
    }
}
 
Example #5
Source File: IDFValueSource.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
  IndexSearcher searcher = (IndexSearcher)context.get("searcher");
  TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(), field);
  if (sim == null) {
    throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
  }
  int docfreq = searcher.getIndexReader().docFreq(new Term(indexedField, indexedBytes));
  float idf = sim.idf(docfreq, searcher.getIndexReader().maxDoc());
  return new DocFreqValueSource.ConstDoubleDocValues(idf, this);
}
 
Example #6
Source File: IDFValueSource.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static TFIDFSimilarity asTFIDF(Similarity sim, String field) {
  while (sim instanceof PerFieldSimilarityWrapper) {
    sim = ((PerFieldSimilarityWrapper)sim).get(field);
  }
  if (sim instanceof TFIDFSimilarity) {
    return (TFIDFSimilarity)sim;
  } else {
    return null;
  }
}
 
Example #7
Source File: TestFieldMaskingSpanQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSimple2() throws Exception {
  assumeTrue("Broken scoring: LUCENE-3723", 
      searcher.getSimilarity() instanceof TFIDFSimilarity);
  SpanQuery q1 = new SpanTermQuery(new Term("gender", "female"));
  SpanQuery q2 = new SpanTermQuery(new Term("last", "smith"));
  SpanQuery q = new SpanNearQuery(new SpanQuery[]
    { q1, new FieldMaskingSpanQuery(q2, "gender")}, -1, false );
  check(q, new int[] { 2, 4 });
  q = new SpanNearQuery(new SpanQuery[]
    { new FieldMaskingSpanQuery(q1, "id"),
      new FieldMaskingSpanQuery(q2, "id") }, -1, false );
  check(q, new int[] { 2, 4 });
}
 
Example #8
Source File: XMoreLikeThis.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
    this.ir = ir;
    this.similarity = sim;
}
 
Example #9
Source File: XMoreLikeThis.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public TFIDFSimilarity getSimilarity() {
    return similarity;
}
 
Example #10
Source File: XMoreLikeThis.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public void setSimilarity(TFIDFSimilarity similarity) {
    this.similarity = similarity;
}
 
Example #11
Source File: MoreLikeThis.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public MoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
  this.ir = ir;
  this.similarity = sim;
}
 
Example #12
Source File: MoreLikeThis.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public TFIDFSimilarity getSimilarity() {
  return similarity;
}
 
Example #13
Source File: MoreLikeThis.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void setSimilarity(TFIDFSimilarity similarity) {
  this.similarity = similarity;
}
 
Example #14
Source File: SweetSpotSimilarityTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testSweetSpotTf() {

  SweetSpotSimilarity ss = new SweetSpotSimilarity();

  TFIDFSimilarity d = new ClassicSimilarity();
  TFIDFSimilarity s = ss;
  
  // tf equal

  ss.setBaselineTfFactors(0.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertEquals("tf: i="+i,
                 d.tf(i), s.tf(i), 0.0f);
  }

  // tf higher

  ss.setBaselineTfFactors(1.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertTrue("tf: i="+i+" : d="+d.tf(i)+
               " < s="+s.tf(i),
               d.tf(i) < s.tf(i));
  }

  // tf flat

  ss.setBaselineTfFactors(1.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
  }
  ss.setBaselineTfFactors(2.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
  }
  for (int i = 6; i <=1000; i++) {
    assertTrue("tf: i="+i+" : s="+s.tf(i)+
               " < d="+d.tf(i),
               s.tf(i) < d.tf(i));
  }

  // stupidity
  assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
}
 
Example #15
Source File: MoreLikeThisQuery.java    From Elasticsearch with Apache License 2.0 3 votes vote down vote up
public void setSimilarity(Similarity similarity) {
    if (similarity == null || similarity instanceof TFIDFSimilarity) {
        //LUCENE 4 UPGRADE we need TFIDF similarity here so I only set it if it is an instance of it
        this.similarity = (TFIDFSimilarity) similarity;
    }
}