Java Code Examples for org.apache.lucene.search.similarities.TFIDFSimilarity

The following examples show how to use org.apache.lucene.search.similarities.TFIDFSimilarity. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: NormValueSource.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
  IndexSearcher searcher = (IndexSearcher)context.get("searcher");
  final TFIDFSimilarity similarity = IDFValueSource.asTFIDF(searcher.getSimilarity(), field);
  if (similarity == null) {
    throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
  }
  // Only works if the contribution of the tf is 1 when the freq is 1 and contribution of the idf
  // is 1 when docCount == docFreq == 1
  final SimScorer simScorer = similarity.scorer(1f,
      new CollectionStatistics(field, 1, 1, 1, 1),
      new TermStatistics(new BytesRef("bogus"), 1, 1));
  final LeafSimScorer leafSimScorer = new LeafSimScorer(simScorer, readerContext.reader(), field, true);
  
  return new FloatDocValues(this) {
    int lastDocID = -1;
    @Override
    public float floatVal(int docID) throws IOException {
      if (docID < lastDocID) {
        throw new AssertionError("docs out of order: lastDocID=" + lastDocID + " docID=" + docID);
      }
      lastDocID = docID;
      return leafSimScorer.score(docID, 1f);
    }
  };
}
 
Example 2
Source Project: lucene-solr   Source File: SweetSpotSimilarityTest.java    License: Apache License 2.0 6 votes vote down vote up
public void testHyperbolicSweetSpot() {

  SweetSpotSimilarity ss = new SweetSpotSimilarity() {
      @Override
      public float tf(float freq) {
        return hyperbolicTf(freq);
      }
    };
  ss.setHyperbolicTfFactors(3.3f, 7.7f, Math.E, 5.0f);
  
  TFIDFSimilarity s = ss;

  for (int i = 1; i <=1000; i++) {
    assertTrue("MIN tf: i="+i+" : s="+s.tf(i),
               3.3f <= s.tf(i));
    assertTrue("MAX tf: i="+i+" : s="+s.tf(i),
               s.tf(i) <= 7.7f);
  }
  assertEquals("MID tf", 3.3f+(7.7f - 3.3f)/2.0f, s.tf(5), 0.00001f);
  
  // stupidity
  assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
  
}
 
Example 3
Source Project: lucene-solr   Source File: TestFieldMaskingSpanQuery.java    License: Apache License 2.0 6 votes vote down vote up
public void testSpans2() throws Exception {
  assumeTrue("Broken scoring: LUCENE-3723",
      searcher.getSimilarity() instanceof TFIDFSimilarity);
  SpanQuery qA1 = new SpanTermQuery(new Term("gender", "female"));
  SpanQuery qA2 = new SpanTermQuery(new Term("first",  "james"));
  SpanQuery qA  = new SpanOrQuery(qA1, new FieldMaskingSpanQuery(qA2, "gender"));
  SpanQuery qB  = new SpanTermQuery(new Term("last",   "jones"));
  SpanQuery q   = new SpanNearQuery(new SpanQuery[]
    { new FieldMaskingSpanQuery(qA, "id"),
      new FieldMaskingSpanQuery(qB, "id") }, -1, false );
  check(q, new int[] { 0, 1, 2, 3 });

  Spans span = q.createWeight(searcher, ScoreMode.COMPLETE_NO_SCORES, 1f).getSpans(searcher.getIndexReader().leaves().get(0), SpanWeight.Postings.POSITIONS);
  assertNext(span, 0,0,1);
  assertNext(span, 1,1,2);
  assertNext(span, 2,0,1);
  assertNext(span, 2,2,3);
  assertNext(span, 3,0,1);
  assertFinished(span);
}
 
Example 4
private void assertScoresMatch(List<PrebuiltFeature> features, float[] scores,
                               RankerQuery ltrQuery, ScoreDoc scoreDoc) throws IOException {
    Document d = searcherUnderTest.doc(scoreDoc.doc);
    String idVal = d.get("id");
    int docId = Integer.decode(idVal);
    float modelScore = scores[docId];
    float queryScore = scoreDoc.score;

    assertEquals("Scores match with similarity " + similarity.getClass(), modelScore,
            queryScore, SCORE_NB_ULP_PREC *Math.ulp(modelScore));

    if (!(similarity instanceof TFIDFSimilarity)) {
        // There are precision issues with these similarities when using explain
        // It produces 0.56103003 for feat:0 in doc1 using score() but 0.5610301 using explain
        Explanation expl = searcherUnderTest.explain(ltrQuery, docId);

        assertEquals("Explain scores match with similarity " + similarity.getClass(), expl.getValue().floatValue(),
                queryScore, 5 * Math.ulp(modelScore));
        checkFeatureNames(expl, features);
    }
}
 
Example 5
Source Project: lucene-solr   Source File: IDFValueSource.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public FunctionValues getValues(Map<Object, Object> context, LeafReaderContext readerContext) throws IOException {
  IndexSearcher searcher = (IndexSearcher)context.get("searcher");
  TFIDFSimilarity sim = asTFIDF(searcher.getSimilarity(), field);
  if (sim == null) {
    throw new UnsupportedOperationException("requires a TFIDFSimilarity (such as ClassicSimilarity)");
  }
  int docfreq = searcher.getIndexReader().docFreq(new Term(indexedField, indexedBytes));
  float idf = sim.idf(docfreq, searcher.getIndexReader().maxDoc());
  return new DocFreqValueSource.ConstDoubleDocValues(idf, this);
}
 
Example 6
Source Project: lucene-solr   Source File: IDFValueSource.java    License: Apache License 2.0 5 votes vote down vote up
static TFIDFSimilarity asTFIDF(Similarity sim, String field) {
  while (sim instanceof PerFieldSimilarityWrapper) {
    sim = ((PerFieldSimilarityWrapper)sim).get(field);
  }
  if (sim instanceof TFIDFSimilarity) {
    return (TFIDFSimilarity)sim;
  } else {
    return null;
  }
}
 
Example 7
Source Project: lucene-solr   Source File: TestFieldMaskingSpanQuery.java    License: Apache License 2.0 5 votes vote down vote up
public void testSimple2() throws Exception {
  assumeTrue("Broken scoring: LUCENE-3723", 
      searcher.getSimilarity() instanceof TFIDFSimilarity);
  SpanQuery q1 = new SpanTermQuery(new Term("gender", "female"));
  SpanQuery q2 = new SpanTermQuery(new Term("last", "smith"));
  SpanQuery q = new SpanNearQuery(new SpanQuery[]
    { q1, new FieldMaskingSpanQuery(q2, "gender")}, -1, false );
  check(q, new int[] { 2, 4 });
  q = new SpanNearQuery(new SpanQuery[]
    { new FieldMaskingSpanQuery(q1, "id"),
      new FieldMaskingSpanQuery(q2, "id") }, -1, false );
  check(q, new int[] { 2, 4 });
}
 
Example 8
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
public XMoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
    this.ir = ir;
    this.similarity = sim;
}
 
Example 9
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
public TFIDFSimilarity getSimilarity() {
    return similarity;
}
 
Example 10
Source Project: Elasticsearch   Source File: XMoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
public void setSimilarity(TFIDFSimilarity similarity) {
    this.similarity = similarity;
}
 
Example 11
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
public MoreLikeThis(IndexReader ir, TFIDFSimilarity sim) {
  this.ir = ir;
  this.similarity = sim;
}
 
Example 12
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
public TFIDFSimilarity getSimilarity() {
  return similarity;
}
 
Example 13
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
public void setSimilarity(TFIDFSimilarity similarity) {
  this.similarity = similarity;
}
 
Example 14
Source Project: lucene-solr   Source File: SweetSpotSimilarityTest.java    License: Apache License 2.0 4 votes vote down vote up
public void testSweetSpotTf() {

  SweetSpotSimilarity ss = new SweetSpotSimilarity();

  TFIDFSimilarity d = new ClassicSimilarity();
  TFIDFSimilarity s = ss;
  
  // tf equal

  ss.setBaselineTfFactors(0.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertEquals("tf: i="+i,
                 d.tf(i), s.tf(i), 0.0f);
  }

  // tf higher

  ss.setBaselineTfFactors(1.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertTrue("tf: i="+i+" : d="+d.tf(i)+
               " < s="+s.tf(i),
               d.tf(i) < s.tf(i));
  }

  // tf flat

  ss.setBaselineTfFactors(1.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
  }
  ss.setBaselineTfFactors(2.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
  }
  for (int i = 6; i <=1000; i++) {
    assertTrue("tf: i="+i+" : s="+s.tf(i)+
               " < d="+d.tf(i),
               s.tf(i) < d.tf(i));
  }

  // stupidity
  assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
}
 
Example 15
Source Project: Elasticsearch   Source File: MoreLikeThisQuery.java    License: Apache License 2.0 3 votes vote down vote up
public void setSimilarity(Similarity similarity) {
    if (similarity == null || similarity instanceof TFIDFSimilarity) {
        //LUCENE 4 UPGRADE we need TFIDF similarity here so I only set it if it is an instance of it
        this.similarity = (TFIDFSimilarity) similarity;
    }
}