org.apache.lucene.search.similarities.ClassicSimilarity Java Examples

The following examples show how to use org.apache.lucene.search.similarities.ClassicSimilarity. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestBooleanQueryVisitSubscorers.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void setUp() throws Exception {
  super.setUp();
  analyzer = new MockAnalyzer(random());
  dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig(analyzer);
  config.setMergePolicy(newLogMergePolicy()); // we will use docids to validate
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  writer.addDocument(doc("lucene", "lucene is a very popular search engine library"));
  writer.addDocument(doc("solr", "solr is a very popular search server and is using lucene"));
  writer.addDocument(doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop"));
  reader = writer.getReader();
  writer.close();
  // we do not use newSearcher because the assertingXXX layers break
  // the toString representations we are relying on
  // TODO: clean that up
  searcher = new IndexSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
  scorerSearcher = new ScorerIndexSearcher(reader);
  scorerSearcher.setSimilarity(new CountingSimilarity());
}

Example #2

Source File: SORecommender.java From scava with Eclipse Public License 2.0

6 votes

public TopDocs executeQuery(org.apache.lucene.search.Query query) throws IOException, ParseException {
	Directory indexDir = FSDirectory.open(Paths.get(INDEX_DIRECTORY));
	try {
		IndexReader reader = DirectoryReader.open(indexDir);
		IndexSearcher searcher = new IndexSearcher(reader);
		if (isBm25 == false) {
			ClassicSimilarity CS = new ClassicSimilarity();
			searcher.setSimilarity(CS);
		}
		TopDocs docs = searcher.search(query, hitsPerPage);
		return docs;
	} catch (Exception e) {
		logger.error(e.getMessage());
		return null;
	}
}

Example #3

Source File: TestTermScorer.java From lucene-solr with Apache License 2.0

6 votes

@Override
public void setUp() throws Exception {
  super.setUp();
  directory = newDirectory();
  
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(new MockAnalyzer(random()))
      .setMergePolicy(newLogMergePolicy())
      .setSimilarity(new ClassicSimilarity()));
  for (int i = 0; i < values.length; i++) {
    Document doc = new Document();
    doc.add(newTextField(FIELD, values[i], Field.Store.YES));
    writer.addDocument(doc);
  }
  writer.forceMerge(1);
  indexReader = getOnlyLeafReader(writer.getReader());
  writer.close();
  indexSearcher = newSearcher(indexReader, false);
  indexSearcher.setSimilarity(new ClassicSimilarity());
}

Example #4

Source File: TestSimilarities.java From lucene-solr with Apache License 2.0

6 votes

public void testNonStandardSimilarity() throws Exception {

    try (Monitor monitor = newMonitor()) {
      monitor.register(new MonitorQuery("1", MonitorTestBase.parse("test")));

      Similarity similarity = new ClassicSimilarity() {
        @Override
        public float tf(float freq) {
          return 1000f;
        }
      };

      Document doc = new Document();
      doc.add(newTextField("field", "this is a test", Field.Store.NO));

      MatchingQueries<ScoringMatch> standard = monitor.match(doc, ScoringMatch.matchWithSimilarity(new ClassicSimilarity()));
      MatchingQueries<ScoringMatch> withSim = monitor.match(doc, ScoringMatch.matchWithSimilarity(similarity));

      float standScore = standard.getMatches().iterator().next().getScore();
      float simScore = withSim.getMatches().iterator().next().getScore();
      assertEquals(standScore, simScore / 1000, 0.1f);
    }
  }

Example #5

Source File: TestValueSources.java From lucene-solr with Apache License 2.0

6 votes

public void testNorm() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(new ClassicSimilarity());
    ValueSource vs = new NormValueSource("byte");
    assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });

    // regardless of whether norms exist, value source exists == 0
    assertAllExist(vs);

    vs = new NormValueSource("text");
    assertAllExist(vs);
    
  } finally {
    searcher.setSimilarity(saved);
  }
}

Example #6

Source File: TestValueSources.java From lucene-solr with Apache License 2.0

6 votes

public void testTF() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(new ClassicSimilarity());

    ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test"));
    assertHits(new FunctionQuery(vs), 
               new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) });
    assertAllExist(vs);
               
    vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"));
    assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
    assertAllExist(vs);
    
    // regardless of whether norms exist, value source exists == 0
    vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
    assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });
    assertAllExist(vs);

  } finally {
    searcher.setSimilarity(saved);
  }
}

Example #7

Source File: TestPayloadSpanUtil.java From lucene-solr with Apache License 2.0

6 votes

public void testPayloadSpanUtil() throws Exception {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
      newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(new ClassicSimilarity()));

  Document doc = new Document();
  doc.add(newTextField(FIELD, "xx rr yy mm  pp", Field.Store.YES));
  writer.addDocument(doc);

  IndexReader reader = writer.getReader();
  writer.close();
  IndexSearcher searcher = newSearcher(reader);

  PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());

  Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(FIELD, "rr")));
  if(VERBOSE) {
    System.out.println("Num payloads:" + payloads.size());
    for (final byte [] bytes : payloads) {
      System.out.println(new String(bytes, StandardCharsets.UTF_8));
    }
  }
  reader.close();
  directory.close();
}

Example #8

Source File: TestTaxonomyFacetCounts.java From lucene-solr with Apache License 2.0

6 votes

public void testReallyNoNormsForDrillDown() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(new PerFieldSimilarityWrapper() {
      final Similarity sim = new ClassicSimilarity();

      @Override
      public Similarity get(String name) {
        assertEquals("field", name);
        return sim;
      }
    });
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  writer.addDocument(config.build(taxoWriter, doc));
  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}

Example #9

Source File: TestSweetSpotSimilarityFactory.java From lucene-solr with Apache License 2.0

5 votes

/** default parameters */
public void testDefaults() throws Exception {
  SweetSpotSimilarity sim = getSimilarity("text", SweetSpotSimilarity.class);

  // SSS tf w/defaults should behave just like DS
  ClassicSimilarity d = new ClassicSimilarity();
  for (int i = 0; i <=1000; i++) {
    assertEquals("tf: i="+i, d.tf(i), sim.tf(i), 0.0F);
  }

  // default norm sanity check
  assertEquals("norm 1",  1.00F, computeNorm(sim, 1),  0.0F);
  assertEquals("norm 4",  0.50F, computeNorm(sim, 4),  0.0F);
  assertEquals("norm 16", 0.25F, computeNorm(sim, 16), 0.0F);
}

Example #10

Source File: TestQueryRescorer.java From lucene-solr with Apache License 2.0

5 votes

private IndexSearcher getSearcher(IndexReader r) {
  IndexSearcher searcher = newSearcher(r);

  // We rely on more tokens = lower score:
  searcher.setSimilarity(new ClassicSimilarity());

  return searcher;
}

Example #11

Source File: TestBooleanQuery.java From lucene-solr with Apache License 2.0

5 votes

public void testNullOrSubScorer() throws Throwable {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  doc.add(newTextField("field", "a b c d", Field.Store.NO));
  w.addDocument(doc);

  IndexReader r = w.getReader();
  IndexSearcher s = newSearcher(r);
  // this test relies upon coord being the default implementation,
  // otherwise scores are different!
  s.setSimilarity(new ClassicSimilarity());

  BooleanQuery.Builder q = new BooleanQuery.Builder();
  q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);

  // PhraseQuery w/ no terms added returns a null scorer
  PhraseQuery pq = new PhraseQuery("field", new String[0]);
  q.add(pq, BooleanClause.Occur.SHOULD);
  assertEquals(1, s.search(q.build(), 10).totalHits.value);

  // A required clause which returns null scorer should return null scorer to
  // IndexSearcher.
  q = new BooleanQuery.Builder();
  pq = new PhraseQuery("field", new String[0]);
  q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
  q.add(pq, BooleanClause.Occur.MUST);
  assertEquals(0, s.search(q.build(), 10).totalHits.value);

  DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(
      Arrays.asList(new TermQuery(new Term("field", "a")), pq),
      1.0f);
  assertEquals(1, s.search(dmq, 10).totalHits.value);

  r.close();
  w.close();
  dir.close();
}

Example #12

Source File: TestPhraseQuery.java From lucene-solr with Apache License 2.0

5 votes

public void testSlopScoring() throws IOException {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(new MockAnalyzer(random()))
        .setMergePolicy(newLogMergePolicy())
        .setSimilarity(new BM25Similarity()));

  Document doc = new Document();
  doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES));
  writer.addDocument(doc);
  
  Document doc2 = new Document();
  doc2.add(newTextField("field", "foo firstname zzz lastname foo", Field.Store.YES));
  writer.addDocument(doc2);
  
  Document doc3 = new Document();
  doc3.add(newTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES));
  writer.addDocument(doc3);
  
  IndexReader reader = writer.getReader();
  writer.close();

  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
  PhraseQuery query = new PhraseQuery(Integer.MAX_VALUE, "field", "firstname", "lastname");
  ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
  assertEquals(3, hits.length);
  // Make sure that those matches where the terms appear closer to
  // each other get a higher score:
  assertEquals(1.0, hits[0].score, 0.01);
  assertEquals(0, hits[0].doc);
  assertEquals(0.63, hits[1].score, 0.01);
  assertEquals(1, hits[1].doc);
  assertEquals(0.47, hits[2].score, 0.01);
  assertEquals(2, hits[2].doc);
  QueryUtils.check(random(), query,searcher);
  reader.close();
  directory.close();
}

Example #13

Source File: TestSweetSpotSimilarityFactory.java From lucene-solr with Apache License 2.0

5 votes

/** baseline with parameters */
public void testBaselineParameters() throws Exception {
  SweetSpotSimilarity sim = getSimilarity("text_baseline", 
                                          SweetSpotSimilarity.class);
  
  ClassicSimilarity d = new ClassicSimilarity();

  // constant up to 6
  for (int i = 1; i <=6; i++) {
    assertEquals("tf i="+i, 1.5F, sim.tf(i), 0.0F);
  }
  // less then default sim above 6
  for (int i = 6; i <=1000; i++) {
    assertTrue("tf: i="+i+" : s="+sim.tf(i)+
               " < d="+d.tf(i),
               sim.tf(i) < d.tf(i));
  }

  // norms: plateau from 3-5
  assertEquals("norm 1 == 7", 
               computeNorm(sim, 1), computeNorm(sim, 7),  0.0F);
  assertEquals("norm 2 == 6",  
               computeNorm(sim, 1), computeNorm(sim, 7),  0.0F);
  assertEquals("norm 3",  1.00F, computeNorm(sim, 3),  0.0F);
  assertEquals("norm 4",  1.00F, computeNorm(sim, 4),  0.0F);
  assertEquals("norm 5",  1.00F, computeNorm(sim, 5),  0.0F);
  assertTrue("norm 6 too high: " + computeNorm(sim, 6),
             computeNorm(sim, 6) < 1.0F);
  assertTrue("norm 7 higher then norm 6", 
             computeNorm(sim, 7) < computeNorm(sim, 6));
  assertEquals("norm 20", 0.25F, computeNorm(sim, 20), 0.0F);
}

Example #14

Source File: LtrQueryTests.java From elasticsearch-learning-to-rank with Apache License 2.0

5 votes

@Before
public void setupIndex() throws IOException {
    dirUnderTest = newDirectory();
    List<Similarity> sims = Arrays.asList(
            new ClassicSimilarity(),
            new SweetSpotSimilarity(), // extends Classic
            new BM25Similarity(),
            new LMDirichletSimilarity(),
            new BooleanSimilarity(),
            new LMJelinekMercerSimilarity(0.2F),
            new AxiomaticF3LOG(0.5F, 10),
            new DFISimilarity(new IndependenceChiSquared()),
            new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()),
            new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3())
        );
    similarity = sims.get(random().nextInt(sims.size()));

    indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity));
    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + i, Field.Store.YES));
        doc.add(newField("field", docs[i], Store.YES));
        indexWriterUnderTest.addDocument(doc);
    }
    indexWriterUnderTest.commit();
    indexWriterUnderTest.forceMerge(1);
    indexWriterUnderTest.flush();


    indexReaderUnderTest = indexWriterUnderTest.getReader();
    searcherUnderTest = newSearcher(indexReaderUnderTest);
    searcherUnderTest.setSimilarity(similarity);
}

Example #15

Source File: LuceneTermQueryBuilderTest.java From querqy with Apache License 2.0

5 votes

@Test
public void testThatQueryUsesTermButNoFieldBoost() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);


    TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    final TermQuery termQuery = new LuceneTermQueryBuilder()
            .createTermQuery(new Term("f1", "v1"), new ConstantFieldBoost(3f));
    final Term term = termQuery.getTerm();
    assertEquals("f1", term.field());
    assertEquals("v1", term.text());

    TopDocs topDocs = indexSearcher.search(termQuery, 10);

    final Weight weight = termQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();

    assertTrue(explainText.contains("4.5 = boost")); // 4.5 (query) but ignore field boost
    assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
    assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field
}

Example #16

Source File: DocFreq.java From lumongo with Apache License 2.0

5 votes

public DocFreq(IndexReader indexReader, String field) {
	this.indexReader = indexReader;
	this.field = field;
	this.docFreqMap = new HashMap<>();
	this.similarity = new ClassicSimilarity();
	this.numDocs = indexReader.numDocs();
}

Example #17

Source File: LumongoSegment.java From lumongo with Apache License 2.0

5 votes

private PerFieldSimilarityWrapper getSimilarity(final QueryWithFilters queryWithFilters) {
	return new PerFieldSimilarityWrapper() {
		@Override
		public Similarity get(String name) {

			AnalyzerSettings analyzerSettings = indexConfig.getAnalyzerSettingsForIndexField(name);
			AnalyzerSettings.Similarity similarity = AnalyzerSettings.Similarity.BM25;
			if (analyzerSettings != null) {
				similarity = analyzerSettings.getSimilarity();
			}

			AnalyzerSettings.Similarity fieldSimilarityOverride = queryWithFilters.getFieldSimilarityOverride(name);
			if (fieldSimilarityOverride != null) {
				similarity = fieldSimilarityOverride;
			}

			if (AnalyzerSettings.Similarity.TFIDF.equals(similarity)) {
				return new ClassicSimilarity();
			}
			else if (AnalyzerSettings.Similarity.BM25.equals(similarity)) {
				return new BM25Similarity();
			}
			else if (AnalyzerSettings.Similarity.CONSTANT.equals(similarity)) {
				return new ConstantSimilarity();
			}
			else if (AnalyzerSettings.Similarity.TF.equals(similarity)) {
				return new TFSimilarity();
			}
			else {
				throw new RuntimeException("Unknown similarity type <" + similarity + ">");
			}
		}
	};
}

Example #18

Source File: TestFuzzyQuery.java From lucene-solr with Apache License 2.0

5 votes

public void testSingleQueryExactMatchScoresHighest() throws Exception {
  //See issue LUCENE-329 - IDF shouldn't wreck similarity ranking 
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smythe", writer);
  addDoc("smdssasd", writer);

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework
  writer.close();
  String searchTerms[] = { "smith", "smythe", "smdssasd" };
  for (String searchTerm : searchTerms) {
    FuzzyQuery query = new FuzzyQuery(new Term("field", searchTerm), 2, 1);
    ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
    Document bestDoc = searcher.doc(hits[0].doc);
    assertTrue(hits.length > 0);
    String topMatch = bestDoc.get("field");
    assertEquals(searchTerm, topMatch);
    if (hits.length > 1) {
      Document worstDoc = searcher.doc(hits[hits.length - 1].doc);
      String worstMatch = worstDoc.get("field");
      assertNotSame(searchTerm, worstMatch);
    }
  }
  reader.close();
  directory.close();
}

Example #19

Source File: TestMemoryIndex.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testFreezeAPI() {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "some text", analyzer);

  assertThat(mi.search(new MatchAllDocsQuery()), not(is(0.0f)));
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  // check we can add a new field after searching
  mi.addField("f2", "some more text", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f2", "some"))), not(is(0.0f)));

  // freeze!
  mi.freeze();

  RuntimeException expected = expectThrows(RuntimeException.class, () -> {
    mi.addField("f3", "and yet more", analyzer);
  });
  assertThat(expected.getMessage(), containsString("frozen"));

  expected = expectThrows(RuntimeException.class, () -> {
    mi.setSimilarity(new BM25Similarity(1, 1));
  });
  assertThat(expected.getMessage(), containsString("frozen"));

  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  mi.reset();
  mi.addField("f1", "wibble", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
  assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));

  // check we can set the Similarity again
  mi.setSimilarity(new ClassicSimilarity());

}

Example #20

Source File: SearchImpl.java From lucene-solr with Apache License 2.0

5 votes

private Similarity createSimilarity(SimilarityConfig config) {
  Similarity similarity;

  if (config.isUseClassicSimilarity()) {
    ClassicSimilarity tfidf = new ClassicSimilarity();
    tfidf.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = tfidf;
  } else {
    BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB());
    bm25.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = bm25;
  }

  return similarity;
}

Example #21

Source File: TestValueSources.java From lucene-solr with Apache License 2.0

5 votes

public void testIDF() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    searcher.setSimilarity(new ClassicSimilarity());
    ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
    assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f });
    assertAllExist(vs);
  } finally {
    searcher.setSimilarity(saved);
  }
}

Example #22

Source File: TestPayloadScoreQuery.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testNestedNearQuery() throws Exception {

  // (one OR hundred) NEAR (twenty two) ~ 1
  //  2    4        4    4
  // one hundred twenty two
  // two hundred twenty two

  SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
      new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))),
      new SpanNearQuery(new SpanQuery[]{
          new SpanTermQuery(new Term("field", "twenty")),
          new SpanTermQuery(new Term("field", "two"))
      }, 0, true)
  }, 1, true);

  // check includeSpanScore makes a difference here
  searcher.setSimilarity(new ClassicSimilarity());
  try {
    checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 20.901256561279297f, 17.06580352783203f });
    checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 17.06580352783203f, 10.450628280639648f });
    checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[]{ 19.15948486328125f, 17.06580352783203f });
    checkQuery(q, new MaxPayloadFunction(), false, new int[]{122, 222}, new float[]{4.0f, 4.0f});
    checkQuery(q, new MinPayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 2.0f});
    checkQuery(q, new AveragePayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 3.666666f});
  }
  finally {
    searcher.setSimilarity(similarity);
  }

}

Example #23

Source File: TestMinShouldMatch2.java From lucene-solr with Apache License 2.0

5 votes

@BeforeClass
public static void beforeClass() throws Exception {
  dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  final int numDocs = atLeast(300);
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    
    addSome(doc, alwaysTerms);
    
    if (random().nextInt(100) < 90) {
      addSome(doc, commonTerms);
    }
    if (random().nextInt(100) < 50) {
      addSome(doc, mediumTerms);
    }
    if (random().nextInt(100) < 10) {
      addSome(doc, rareTerms);
    }
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  r = DirectoryReader.open(dir);
  reader = getOnlyLeafReader(r);
  searcher = new IndexSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
}

Example #24

Source File: TestElevationComparator.java From lucene-solr with Apache License 2.0

5 votes

public void testSorting() throws Throwable {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(
      directory,
      newIndexWriterConfig(new MockAnalyzer(random())).
          setMaxBufferedDocs(2).
          setMergePolicy(newLogMergePolicy(1000)).
          setSimilarity(new ClassicSimilarity())
  );
  writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"}));
  writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"}));
  writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"}));
  writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"}));
  writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"}));
  writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"}));

  IndexReader r = DirectoryReader.open(writer);
  writer.close();

  IndexSearcher searcher = newSearcher(r);
  searcher.setSimilarity(new BM25Similarity());

  runTest(searcher, true);
  runTest(searcher, false);

  r.close();
  directory.close();
}

Example #25

Source File: TestQueryRescorer.java From lucene-solr with Apache License 2.0

4 votes

public static IndexWriterConfig newIndexWriterConfig() {
  // We rely on more tokens = lower score:
  return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity());
}

Example #26

Source File: SweetSpotSimilarityTest.java From lucene-solr with Apache License 2.0

4 votes

public void testSweetSpotTf() {

  SweetSpotSimilarity ss = new SweetSpotSimilarity();

  TFIDFSimilarity d = new ClassicSimilarity();
  TFIDFSimilarity s = ss;
  
  // tf equal

  ss.setBaselineTfFactors(0.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertEquals("tf: i="+i,
                 d.tf(i), s.tf(i), 0.0f);
  }

  // tf higher

  ss.setBaselineTfFactors(1.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertTrue("tf: i="+i+" : d="+d.tf(i)+
               " < s="+s.tf(i),
               d.tf(i) < s.tf(i));
  }

  // tf flat

  ss.setBaselineTfFactors(1.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
  }
  ss.setBaselineTfFactors(2.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
  }
  for (int i = 6; i <=1000; i++) {
    assertTrue("tf: i="+i+" : s="+s.tf(i)+
               " < d="+d.tf(i),
               s.tf(i) < d.tf(i));
  }

  // stupidity
  assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
}

Example #27

Source File: ClassicSimilarityFactory.java From lucene-solr with Apache License 2.0

4 votes

@Override
public Similarity getSimilarity() {
  ClassicSimilarity sim = new ClassicSimilarity();
  sim.setDiscountOverlaps(discountOverlaps);
  return sim;
}

Example #28

Source File: TestBoolean2.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testRandomQueries() throws Exception {
  String[] vals = {"w1","w2","w3","w4","w5","xx","yy","zzz"};

  int tot=0;

  BooleanQuery q1 = null;
  try {

    // increase number of iterations for more complete testing
    int num = atLeast(3);
    for (int i=0; i<num; i++) {
      int level = random().nextInt(3);
      q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null).build();
      
      // Can't sort by relevance since floating point numbers may not quite
      // match up.
      Sort sort = Sort.INDEXORDER;

      QueryUtils.check(random(), q1,searcher); // baseline sim
      try {
        // a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop.
        searcher.setSimilarity(bigSearcher.getSimilarity()); // random sim
        QueryUtils.check(random(), q1, searcher);
      } finally {
        searcher.setSimilarity(new ClassicSimilarity()); // restore
      }

      // check diff (randomized) scorers (from AssertingSearcher) produce the same results
      TopFieldCollector collector = TopFieldCollector.create(sort, 1000, 1);
      searcher.search(q1, collector);
      ScoreDoc[] hits1 = collector.topDocs().scoreDocs;
      collector = TopFieldCollector.create(sort, 1000, 1);
      searcher.search(q1, collector);
      ScoreDoc[] hits2 = collector.topDocs().scoreDocs;
      tot+=hits2.length;
      CheckHits.checkEqual(q1, hits1, hits2);

      BooleanQuery.Builder q3 = new BooleanQuery.Builder();
      q3.add(q1, BooleanClause.Occur.SHOULD);
      q3.add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD);
      assertEquals(mulFactor*collector.totalHits + NUM_EXTRA_DOCS/2, bigSearcher.count(q3.build()));

      // test diff (randomized) scorers produce the same results on bigSearcher as well
      collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1);
      bigSearcher.search(q1, collector);
      hits1 = collector.topDocs().scoreDocs;
      collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1);
      bigSearcher.search(q1, collector);
      hits2 = collector.topDocs().scoreDocs;
      CheckHits.checkEqual(q1, hits1, hits2);
      
    }

  } catch (Exception e) {
    // For easier debugging
    System.out.println("failed query: " + q1);
    throw e;
  }

  // System.out.println("Total hits:"+tot);
}

Example #29

Source File: TestComplexExplanations.java From lucene-solr with Apache License 2.0

4 votes

@Override
public void setUp() throws Exception {
  super.setUp();
  // TODO: switch to BM25?
  searcher.setSimilarity(new ClassicSimilarity());
}

Example #30

Source File: TestClassicSimilarityFactory.java From lucene-solr with Apache License 2.0

4 votes

/** Classic w/ default parameters */
public void testDefaults() throws Exception {
  ClassicSimilarity sim = getSimilarity("text", ClassicSimilarity.class);
  assertEquals(true, sim.getDiscountOverlaps());
}