Java Code Examples for org.apache.lucene.search.similarities.ClassicSimilarity

The following examples show how to use org.apache.lucene.search.similarities.ClassicSimilarity. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: scava   Source File: SORecommender.java    License: Eclipse Public License 2.0 6 votes vote down vote up
public TopDocs executeQuery(org.apache.lucene.search.Query query) throws IOException, ParseException {
	Directory indexDir = FSDirectory.open(Paths.get(INDEX_DIRECTORY));
	try {
		IndexReader reader = DirectoryReader.open(indexDir);
		IndexSearcher searcher = new IndexSearcher(reader);
		if (isBm25 == false) {
			ClassicSimilarity CS = new ClassicSimilarity();
			searcher.setSimilarity(CS);
		}
		TopDocs docs = searcher.search(query, hitsPerPage);
		return docs;
	} catch (Exception e) {
		logger.error(e.getMessage());
		return null;
	}
}
 
Example 2
Source Project: lucene-solr   Source File: TestValueSources.java    License: Apache License 2.0 6 votes vote down vote up
public void testNorm() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(new ClassicSimilarity());
    ValueSource vs = new NormValueSource("byte");
    assertHits(new FunctionQuery(vs), new float[] { 1f, 1f });

    // regardless of whether norms exist, value source exists == 0
    assertAllExist(vs);

    vs = new NormValueSource("text");
    assertAllExist(vs);
    
  } finally {
    searcher.setSimilarity(saved);
  }
}
 
Example 3
Source Project: lucene-solr   Source File: TestValueSources.java    License: Apache License 2.0 6 votes vote down vote up
public void testTF() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    // no norm field (so agnostic to indexed similarity)
    searcher.setSimilarity(new ClassicSimilarity());

    ValueSource vs = new TFValueSource("bogus", "bogus", "text", new BytesRef("test"));
    assertHits(new FunctionQuery(vs), 
               new float[] { (float)Math.sqrt(3d), (float)Math.sqrt(1d) });
    assertAllExist(vs);
               
    vs = new TFValueSource("bogus", "bogus", "string", new BytesRef("bar"));
    assertHits(new FunctionQuery(vs), new float[] { 0f, 1f });
    assertAllExist(vs);
    
    // regardless of whether norms exist, value source exists == 0
    vs = new TFValueSource("bogus", "bogus", "bogus", new BytesRef("bogus"));
    assertHits(new FunctionQuery(vs), new float[] { 0F, 0F });
    assertAllExist(vs);

  } finally {
    searcher.setSimilarity(saved);
  }
}
 
Example 4
Source Project: lucene-solr   Source File: TestTaxonomyFacetCounts.java    License: Apache License 2.0 6 votes vote down vote up
public void testReallyNoNormsForDrillDown() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(new PerFieldSimilarityWrapper() {
      final Similarity sim = new ClassicSimilarity();

      @Override
      public Similarity get(String name) {
        assertEquals("field", name);
        return sim;
      }
    });
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  writer.addDocument(config.build(taxoWriter, doc));
  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}
 
Example 5
Source Project: lucene-solr   Source File: TestPayloadSpanUtil.java    License: Apache License 2.0 6 votes vote down vote up
public void testPayloadSpanUtil() throws Exception {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory,
      newIndexWriterConfig(new PayloadAnalyzer()).setSimilarity(new ClassicSimilarity()));

  Document doc = new Document();
  doc.add(newTextField(FIELD, "xx rr yy mm  pp", Field.Store.YES));
  writer.addDocument(doc);

  IndexReader reader = writer.getReader();
  writer.close();
  IndexSearcher searcher = newSearcher(reader);

  PayloadSpanUtil psu = new PayloadSpanUtil(searcher.getTopReaderContext());

  Collection<byte[]> payloads = psu.getPayloadsForQuery(new TermQuery(new Term(FIELD, "rr")));
  if(VERBOSE) {
    System.out.println("Num payloads:" + payloads.size());
    for (final byte [] bytes : payloads) {
      System.out.println(new String(bytes, StandardCharsets.UTF_8));
    }
  }
  reader.close();
  directory.close();
}
 
Example 6
Source Project: lucene-solr   Source File: TestSimilarities.java    License: Apache License 2.0 6 votes vote down vote up
public void testNonStandardSimilarity() throws Exception {

    try (Monitor monitor = newMonitor()) {
      monitor.register(new MonitorQuery("1", MonitorTestBase.parse("test")));

      Similarity similarity = new ClassicSimilarity() {
        @Override
        public float tf(float freq) {
          return 1000f;
        }
      };

      Document doc = new Document();
      doc.add(newTextField("field", "this is a test", Field.Store.NO));

      MatchingQueries<ScoringMatch> standard = monitor.match(doc, ScoringMatch.matchWithSimilarity(new ClassicSimilarity()));
      MatchingQueries<ScoringMatch> withSim = monitor.match(doc, ScoringMatch.matchWithSimilarity(similarity));

      float standScore = standard.getMatches().iterator().next().getScore();
      float simScore = withSim.getMatches().iterator().next().getScore();
      assertEquals(standScore, simScore / 1000, 0.1f);
    }
  }
 
Example 7
@Override
public void setUp() throws Exception {
  super.setUp();
  analyzer = new MockAnalyzer(random());
  dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig(analyzer);
  config.setMergePolicy(newLogMergePolicy()); // we will use docids to validate
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  writer.addDocument(doc("lucene", "lucene is a very popular search engine library"));
  writer.addDocument(doc("solr", "solr is a very popular search server and is using lucene"));
  writer.addDocument(doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop"));
  reader = writer.getReader();
  writer.close();
  // we do not use newSearcher because the assertingXXX layers break
  // the toString representations we are relying on
  // TODO: clean that up
  searcher = new IndexSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
  scorerSearcher = new ScorerIndexSearcher(reader);
  scorerSearcher.setSimilarity(new CountingSimilarity());
}
 
Example 8
Source Project: lucene-solr   Source File: TestTermScorer.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  directory = newDirectory();
  
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(new MockAnalyzer(random()))
      .setMergePolicy(newLogMergePolicy())
      .setSimilarity(new ClassicSimilarity()));
  for (int i = 0; i < values.length; i++) {
    Document doc = new Document();
    doc.add(newTextField(FIELD, values[i], Field.Store.YES));
    writer.addDocument(doc);
  }
  writer.forceMerge(1);
  indexReader = getOnlyLeafReader(writer.getReader());
  writer.close();
  indexSearcher = newSearcher(indexReader, false);
  indexSearcher.setSimilarity(new ClassicSimilarity());
}
 
Example 9
Source Project: lucene-solr   Source File: TestMemoryIndex.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFreezeAPI() {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "some text", analyzer);

  assertThat(mi.search(new MatchAllDocsQuery()), not(is(0.0f)));
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  // check we can add a new field after searching
  mi.addField("f2", "some more text", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f2", "some"))), not(is(0.0f)));

  // freeze!
  mi.freeze();

  RuntimeException expected = expectThrows(RuntimeException.class, () -> {
    mi.addField("f3", "and yet more", analyzer);
  });
  assertThat(expected.getMessage(), containsString("frozen"));

  expected = expectThrows(RuntimeException.class, () -> {
    mi.setSimilarity(new BM25Similarity(1, 1));
  });
  assertThat(expected.getMessage(), containsString("frozen"));

  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  mi.reset();
  mi.addField("f1", "wibble", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
  assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));

  // check we can set the Similarity again
  mi.setSimilarity(new ClassicSimilarity());

}
 
Example 10
Source Project: lucene-solr   Source File: SearchImpl.java    License: Apache License 2.0 5 votes vote down vote up
private Similarity createSimilarity(SimilarityConfig config) {
  Similarity similarity;

  if (config.isUseClassicSimilarity()) {
    ClassicSimilarity tfidf = new ClassicSimilarity();
    tfidf.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = tfidf;
  } else {
    BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB());
    bm25.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = bm25;
  }

  return similarity;
}
 
Example 11
Source Project: lucene-solr   Source File: TestValueSources.java    License: Apache License 2.0 5 votes vote down vote up
public void testIDF() throws Exception {
  Similarity saved = searcher.getSimilarity();
  try {
    searcher.setSimilarity(new ClassicSimilarity());
    ValueSource vs = new IDFValueSource("bogus", "bogus", "text", new BytesRef("test"));
    assertHits(new FunctionQuery(vs), new float[] { 1.0f, 1.0f });
    assertAllExist(vs);
  } finally {
    searcher.setSimilarity(saved);
  }
}
 
Example 12
Source Project: lucene-solr   Source File: TestPayloadScoreQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedNearQuery() throws Exception {

  // (one OR hundred) NEAR (twenty two) ~ 1
  //  2    4        4    4
  // one hundred twenty two
  // two hundred twenty two

  SpanNearQuery q = new SpanNearQuery(new SpanQuery[]{
      new SpanOrQuery(new SpanTermQuery(new Term("field", "one")), new SpanTermQuery(new Term("field", "hundred"))),
      new SpanNearQuery(new SpanQuery[]{
          new SpanTermQuery(new Term("field", "twenty")),
          new SpanTermQuery(new Term("field", "two"))
      }, 0, true)
  }, 1, true);

  // check includeSpanScore makes a difference here
  searcher.setSimilarity(new ClassicSimilarity());
  try {
    checkQuery(q, new MaxPayloadFunction(), new int[]{ 122, 222 }, new float[]{ 20.901256561279297f, 17.06580352783203f });
    checkQuery(q, new MinPayloadFunction(), new int[]{ 222, 122 }, new float[]{ 17.06580352783203f, 10.450628280639648f });
    checkQuery(q, new AveragePayloadFunction(), new int[] { 122, 222 }, new float[]{ 19.15948486328125f, 17.06580352783203f });
    checkQuery(q, new MaxPayloadFunction(), false, new int[]{122, 222}, new float[]{4.0f, 4.0f});
    checkQuery(q, new MinPayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 2.0f});
    checkQuery(q, new AveragePayloadFunction(), false, new int[]{222, 122}, new float[]{4.0f, 3.666666f});
  }
  finally {
    searcher.setSimilarity(similarity);
  }

}
 
Example 13
Source Project: lucene-solr   Source File: TestMinShouldMatch2.java    License: Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  final int numDocs = atLeast(300);
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    
    addSome(doc, alwaysTerms);
    
    if (random().nextInt(100) < 90) {
      addSome(doc, commonTerms);
    }
    if (random().nextInt(100) < 50) {
      addSome(doc, mediumTerms);
    }
    if (random().nextInt(100) < 10) {
      addSome(doc, rareTerms);
    }
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  r = DirectoryReader.open(dir);
  reader = getOnlyLeafReader(r);
  searcher = new IndexSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
}
 
Example 14
Source Project: lucene-solr   Source File: TestElevationComparator.java    License: Apache License 2.0 5 votes vote down vote up
public void testSorting() throws Throwable {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(
      directory,
      newIndexWriterConfig(new MockAnalyzer(random())).
          setMaxBufferedDocs(2).
          setMergePolicy(newLogMergePolicy(1000)).
          setSimilarity(new ClassicSimilarity())
  );
  writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"}));
  writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"}));
  writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"}));
  writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"}));
  writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"}));
  writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"}));

  IndexReader r = DirectoryReader.open(writer);
  writer.close();

  IndexSearcher searcher = newSearcher(r);
  searcher.setSimilarity(new BM25Similarity());

  runTest(searcher, true);
  runTest(searcher, false);

  r.close();
  directory.close();
}
 
Example 15
Source Project: lucene-solr   Source File: TestFuzzyQuery.java    License: Apache License 2.0 5 votes vote down vote up
public void testSingleQueryExactMatchScoresHighest() throws Exception {
  //See issue LUCENE-329 - IDF shouldn't wreck similarity ranking 
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smith", writer);
  addDoc("smythe", writer);
  addDoc("smdssasd", writer);

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework
  writer.close();
  String searchTerms[] = { "smith", "smythe", "smdssasd" };
  for (String searchTerm : searchTerms) {
    FuzzyQuery query = new FuzzyQuery(new Term("field", searchTerm), 2, 1);
    ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
    Document bestDoc = searcher.doc(hits[0].doc);
    assertTrue(hits.length > 0);
    String topMatch = bestDoc.get("field");
    assertEquals(searchTerm, topMatch);
    if (hits.length > 1) {
      Document worstDoc = searcher.doc(hits[hits.length - 1].doc);
      String worstMatch = worstDoc.get("field");
      assertNotSame(searchTerm, worstMatch);
    }
  }
  reader.close();
  directory.close();
}
 
Example 16
Source Project: lucene-solr   Source File: TestPhraseQuery.java    License: Apache License 2.0 5 votes vote down vote up
public void testSlopScoring() throws IOException {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(new MockAnalyzer(random()))
        .setMergePolicy(newLogMergePolicy())
        .setSimilarity(new BM25Similarity()));

  Document doc = new Document();
  doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES));
  writer.addDocument(doc);
  
  Document doc2 = new Document();
  doc2.add(newTextField("field", "foo firstname zzz lastname foo", Field.Store.YES));
  writer.addDocument(doc2);
  
  Document doc3 = new Document();
  doc3.add(newTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES));
  writer.addDocument(doc3);
  
  IndexReader reader = writer.getReader();
  writer.close();

  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
  PhraseQuery query = new PhraseQuery(Integer.MAX_VALUE, "field", "firstname", "lastname");
  ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
  assertEquals(3, hits.length);
  // Make sure that those matches where the terms appear closer to
  // each other get a higher score:
  assertEquals(1.0, hits[0].score, 0.01);
  assertEquals(0, hits[0].doc);
  assertEquals(0.63, hits[1].score, 0.01);
  assertEquals(1, hits[1].doc);
  assertEquals(0.47, hits[2].score, 0.01);
  assertEquals(2, hits[2].doc);
  QueryUtils.check(random(), query,searcher);
  reader.close();
  directory.close();
}
 
Example 17
Source Project: lucene-solr   Source File: TestQueryRescorer.java    License: Apache License 2.0 5 votes vote down vote up
private IndexSearcher getSearcher(IndexReader r) {
  IndexSearcher searcher = newSearcher(r);

  // We rely on more tokens = lower score:
  searcher.setSimilarity(new ClassicSimilarity());

  return searcher;
}
 
Example 18
Source Project: lucene-solr   Source File: TestBooleanQuery.java    License: Apache License 2.0 5 votes vote down vote up
public void testNullOrSubScorer() throws Throwable {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  doc.add(newTextField("field", "a b c d", Field.Store.NO));
  w.addDocument(doc);

  IndexReader r = w.getReader();
  IndexSearcher s = newSearcher(r);
  // this test relies upon coord being the default implementation,
  // otherwise scores are different!
  s.setSimilarity(new ClassicSimilarity());

  BooleanQuery.Builder q = new BooleanQuery.Builder();
  q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);

  // PhraseQuery w/ no terms added returns a null scorer
  PhraseQuery pq = new PhraseQuery("field", new String[0]);
  q.add(pq, BooleanClause.Occur.SHOULD);
  assertEquals(1, s.search(q.build(), 10).totalHits.value);

  // A required clause which returns null scorer should return null scorer to
  // IndexSearcher.
  q = new BooleanQuery.Builder();
  pq = new PhraseQuery("field", new String[0]);
  q.add(new TermQuery(new Term("field", "a")), BooleanClause.Occur.SHOULD);
  q.add(pq, BooleanClause.Occur.MUST);
  assertEquals(0, s.search(q.build(), 10).totalHits.value);

  DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(
      Arrays.asList(new TermQuery(new Term("field", "a")), pq),
      1.0f);
  assertEquals(1, s.search(dmq, 10).totalHits.value);

  r.close();
  w.close();
  dir.close();
}
 
Example 19
/** default parameters */
public void testDefaults() throws Exception {
  SweetSpotSimilarity sim = getSimilarity("text", SweetSpotSimilarity.class);

  // SSS tf w/defaults should behave just like DS
  ClassicSimilarity d = new ClassicSimilarity();
  for (int i = 0; i <=1000; i++) {
    assertEquals("tf: i="+i, d.tf(i), sim.tf(i), 0.0F);
  }

  // default norm sanity check
  assertEquals("norm 1",  1.00F, computeNorm(sim, 1),  0.0F);
  assertEquals("norm 4",  0.50F, computeNorm(sim, 4),  0.0F);
  assertEquals("norm 16", 0.25F, computeNorm(sim, 16), 0.0F);
}
 
Example 20
/** baseline with parameters */
public void testBaselineParameters() throws Exception {
  SweetSpotSimilarity sim = getSimilarity("text_baseline", 
                                          SweetSpotSimilarity.class);
  
  ClassicSimilarity d = new ClassicSimilarity();

  // constant up to 6
  for (int i = 1; i <=6; i++) {
    assertEquals("tf i="+i, 1.5F, sim.tf(i), 0.0F);
  }
  // less then default sim above 6
  for (int i = 6; i <=1000; i++) {
    assertTrue("tf: i="+i+" : s="+sim.tf(i)+
               " < d="+d.tf(i),
               sim.tf(i) < d.tf(i));
  }

  // norms: plateau from 3-5
  assertEquals("norm 1 == 7", 
               computeNorm(sim, 1), computeNorm(sim, 7),  0.0F);
  assertEquals("norm 2 == 6",  
               computeNorm(sim, 1), computeNorm(sim, 7),  0.0F);
  assertEquals("norm 3",  1.00F, computeNorm(sim, 3),  0.0F);
  assertEquals("norm 4",  1.00F, computeNorm(sim, 4),  0.0F);
  assertEquals("norm 5",  1.00F, computeNorm(sim, 5),  0.0F);
  assertTrue("norm 6 too high: " + computeNorm(sim, 6),
             computeNorm(sim, 6) < 1.0F);
  assertTrue("norm 7 higher then norm 6", 
             computeNorm(sim, 7) < computeNorm(sim, 6));
  assertEquals("norm 20", 0.25F, computeNorm(sim, 20), 0.0F);
}
 
Example 21
@Before
public void setupIndex() throws IOException {
    dirUnderTest = newDirectory();
    List<Similarity> sims = Arrays.asList(
            new ClassicSimilarity(),
            new SweetSpotSimilarity(), // extends Classic
            new BM25Similarity(),
            new LMDirichletSimilarity(),
            new BooleanSimilarity(),
            new LMJelinekMercerSimilarity(0.2F),
            new AxiomaticF3LOG(0.5F, 10),
            new DFISimilarity(new IndependenceChiSquared()),
            new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()),
            new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3())
        );
    similarity = sims.get(random().nextInt(sims.size()));

    indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity));
    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + i, Field.Store.YES));
        doc.add(newField("field", docs[i], Store.YES));
        indexWriterUnderTest.addDocument(doc);
    }
    indexWriterUnderTest.commit();
    indexWriterUnderTest.forceMerge(1);
    indexWriterUnderTest.flush();


    indexReaderUnderTest = indexWriterUnderTest.getReader();
    searcherUnderTest = newSearcher(indexReaderUnderTest);
    searcherUnderTest.setSimilarity(similarity);
}
 
Example 22
Source Project: querqy   Source File: LuceneTermQueryBuilderTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testThatQueryUsesTermButNoFieldBoost() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);


    TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    final TermQuery termQuery = new LuceneTermQueryBuilder()
            .createTermQuery(new Term("f1", "v1"), new ConstantFieldBoost(3f));
    final Term term = termQuery.getTerm();
    assertEquals("f1", term.field());
    assertEquals("v1", term.text());

    TopDocs topDocs = indexSearcher.search(termQuery, 10);

    final Weight weight = termQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();

    assertTrue(explainText.contains("4.5 = boost")); // 4.5 (query) but ignore field boost
    assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
    assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field
}
 
Example 23
Source Project: lumongo   Source File: DocFreq.java    License: Apache License 2.0 5 votes vote down vote up
public DocFreq(IndexReader indexReader, String field) {
	this.indexReader = indexReader;
	this.field = field;
	this.docFreqMap = new HashMap<>();
	this.similarity = new ClassicSimilarity();
	this.numDocs = indexReader.numDocs();
}
 
Example 24
Source Project: lumongo   Source File: LumongoSegment.java    License: Apache License 2.0 5 votes vote down vote up
private PerFieldSimilarityWrapper getSimilarity(final QueryWithFilters queryWithFilters) {
	return new PerFieldSimilarityWrapper() {
		@Override
		public Similarity get(String name) {

			AnalyzerSettings analyzerSettings = indexConfig.getAnalyzerSettingsForIndexField(name);
			AnalyzerSettings.Similarity similarity = AnalyzerSettings.Similarity.BM25;
			if (analyzerSettings != null) {
				similarity = analyzerSettings.getSimilarity();
			}

			AnalyzerSettings.Similarity fieldSimilarityOverride = queryWithFilters.getFieldSimilarityOverride(name);
			if (fieldSimilarityOverride != null) {
				similarity = fieldSimilarityOverride;
			}

			if (AnalyzerSettings.Similarity.TFIDF.equals(similarity)) {
				return new ClassicSimilarity();
			}
			else if (AnalyzerSettings.Similarity.BM25.equals(similarity)) {
				return new BM25Similarity();
			}
			else if (AnalyzerSettings.Similarity.CONSTANT.equals(similarity)) {
				return new ConstantSimilarity();
			}
			else if (AnalyzerSettings.Similarity.TF.equals(similarity)) {
				return new TFSimilarity();
			}
			else {
				throw new RuntimeException("Unknown similarity type <" + similarity + ">");
			}
		}
	};
}
 
Example 25
Source Project: lucene-solr   Source File: MoreLikeThis.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Constructor requiring an IndexReader.
 */
public MoreLikeThis(IndexReader ir) {
  this(ir, new ClassicSimilarity());
}
 
Example 26
Source Project: lucene-solr   Source File: SweetSpotSimilarityTest.java    License: Apache License 2.0 4 votes vote down vote up
public void testSweetSpotTf() {

  SweetSpotSimilarity ss = new SweetSpotSimilarity();

  TFIDFSimilarity d = new ClassicSimilarity();
  TFIDFSimilarity s = ss;
  
  // tf equal

  ss.setBaselineTfFactors(0.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertEquals("tf: i="+i,
                 d.tf(i), s.tf(i), 0.0f);
  }

  // tf higher

  ss.setBaselineTfFactors(1.0f, 0.0f);

  for (int i = 1; i < 1000; i++) {
    assertTrue("tf: i="+i+" : d="+d.tf(i)+
               " < s="+s.tf(i),
               d.tf(i) < s.tf(i));
  }

  // tf flat

  ss.setBaselineTfFactors(1.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat1: i="+i, 1.0f, s.tf(i), 0.0f);
  }
  ss.setBaselineTfFactors(2.0f, 6.0f);
  for (int i = 1; i <=6; i++) {
    assertEquals("tf flat2: i="+i, 2.0f, s.tf(i), 0.0f);
  }
  for (int i = 6; i <=1000; i++) {
    assertTrue("tf: i="+i+" : s="+s.tf(i)+
               " < d="+d.tf(i),
               s.tf(i) < d.tf(i));
  }

  // stupidity
  assertEquals("tf zero", 0.0f, s.tf(0), 0.0f);
}
 
Example 27
Source Project: lucene-solr   Source File: TestComplexExplanations.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  // TODO: switch to BM25?
  searcher.setSimilarity(new ClassicSimilarity());
}
 
Example 28
Source Project: lucene-solr   Source File: TestFuzzyQuery.java    License: Apache License 2.0 4 votes vote down vote up
public void testMultipleQueriesIdfWorks() throws Exception {
  // With issue LUCENE-329 - it could be argued a MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite
  // is the solution as it disables IDF.
  // However - IDF is still useful as in this case where there are multiple FuzzyQueries.
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory);

  addDoc("michael smith", writer);
  addDoc("michael lucero", writer);
  addDoc("doug cutting", writer);
  addDoc("doug cuttin", writer);
  addDoc("michael wardle", writer);
  addDoc("micheal vegas", writer);
  addDoc("michael lydon", writer);

  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity()); //avoid randomisation of similarity algo by test framework

  writer.close();

  BooleanQuery.Builder query = new BooleanQuery.Builder();
  String commonSearchTerm = "michael";
  FuzzyQuery commonQuery = new FuzzyQuery(new Term("field", commonSearchTerm), 2, 1);
  query.add(commonQuery, Occur.SHOULD);

  String rareSearchTerm = "cutting";
  FuzzyQuery rareQuery = new FuzzyQuery(new Term("field", rareSearchTerm), 2, 1);
  query.add(rareQuery, Occur.SHOULD);
  ScoreDoc[] hits = searcher.search(query.build(), 1000).scoreDocs;

  // Matches on the rare surname should be worth more than matches on the common forename
  assertEquals(7, hits.length);
  Document bestDoc = searcher.doc(hits[0].doc);
  String topMatch = bestDoc.get("field");
  assertTrue(topMatch.contains(rareSearchTerm));

  Document runnerUpDoc = searcher.doc(hits[1].doc);
  String runnerUpMatch = runnerUpDoc.get("field");
  assertTrue(runnerUpMatch.contains("cuttin"));

  Document worstDoc = searcher.doc(hits[hits.length - 1].doc);
  String worstMatch = worstDoc.get("field");
  assertTrue(worstMatch.contains("micheal")); //misspelling of common name

  reader.close();
  directory.close();
}
 
Example 29
Source Project: lucene-solr   Source File: TestBoolean2.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testRandomQueries() throws Exception {
  String[] vals = {"w1","w2","w3","w4","w5","xx","yy","zzz"};

  int tot=0;

  BooleanQuery q1 = null;
  try {

    // increase number of iterations for more complete testing
    int num = atLeast(3);
    for (int i=0; i<num; i++) {
      int level = random().nextInt(3);
      q1 = randBoolQuery(new Random(random().nextLong()), random().nextBoolean(), level, field, vals, null).build();
      
      // Can't sort by relevance since floating point numbers may not quite
      // match up.
      Sort sort = Sort.INDEXORDER;

      QueryUtils.check(random(), q1,searcher); // baseline sim
      try {
        // a little hackish, QueryUtils.check is too costly to do on bigSearcher in this loop.
        searcher.setSimilarity(bigSearcher.getSimilarity()); // random sim
        QueryUtils.check(random(), q1, searcher);
      } finally {
        searcher.setSimilarity(new ClassicSimilarity()); // restore
      }

      // check diff (randomized) scorers (from AssertingSearcher) produce the same results
      TopFieldCollector collector = TopFieldCollector.create(sort, 1000, 1);
      searcher.search(q1, collector);
      ScoreDoc[] hits1 = collector.topDocs().scoreDocs;
      collector = TopFieldCollector.create(sort, 1000, 1);
      searcher.search(q1, collector);
      ScoreDoc[] hits2 = collector.topDocs().scoreDocs;
      tot+=hits2.length;
      CheckHits.checkEqual(q1, hits1, hits2);

      BooleanQuery.Builder q3 = new BooleanQuery.Builder();
      q3.add(q1, BooleanClause.Occur.SHOULD);
      q3.add(new PrefixQuery(new Term("field2", "b")), BooleanClause.Occur.SHOULD);
      assertEquals(mulFactor*collector.totalHits + NUM_EXTRA_DOCS/2, bigSearcher.count(q3.build()));

      // test diff (randomized) scorers produce the same results on bigSearcher as well
      collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1);
      bigSearcher.search(q1, collector);
      hits1 = collector.topDocs().scoreDocs;
      collector = TopFieldCollector.create(sort, 1000 * mulFactor, 1);
      bigSearcher.search(q1, collector);
      hits2 = collector.topDocs().scoreDocs;
      CheckHits.checkEqual(q1, hits1, hits2);
      
    }

  } catch (Exception e) {
    // For easier debugging
    System.out.println("failed query: " + q1);
    throw e;
  }

  // System.out.println("Total hits:"+tot);
}
 
Example 30
Source Project: lucene-solr   Source File: TestQueryRescorer.java    License: Apache License 2.0 4 votes vote down vote up
public static IndexWriterConfig newIndexWriterConfig() {
  // We rely on more tokens = lower score:
  return LuceneTestCase.newIndexWriterConfig().setSimilarity(new ClassicSimilarity());
}