org.apache.lucene.search.similarities.BM25Similarity Java Examples

The following examples show how to use org.apache.lucene.search.similarities.BM25Similarity. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BM25FQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private BM25FQuery(BM25Similarity similarity, TreeMap<String, FieldAndWeight> fieldAndWeights, BytesRef[] terms) {
  this.similarity = similarity;
  this.fieldAndWeights = fieldAndWeights;
  this.terms = terms;
  int numFieldTerms = fieldAndWeights.size() * terms.length;
  if (numFieldTerms > IndexSearcher.getMaxClauseCount()) {
    throw new IndexSearcher.TooManyClauses();
  }
  this.fieldTerms = new Term[numFieldTerms];
  Arrays.sort(terms);
  int pos = 0;
  for (String field : fieldAndWeights.keySet()) {
    for (BytesRef term : terms) {
      fieldTerms[pos++] = new Term(field, term);
    }
  }

  this.ramBytesUsed = BASE_RAM_BYTES +
      RamUsageEstimator.sizeOfObject(fieldAndWeights) +
      RamUsageEstimator.sizeOfObject(fieldTerms) +
      RamUsageEstimator.sizeOfObject(terms);
}
 
Example #2
Source File: KNearestNeighborClassifier.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link KNearestNeighborClassifier}.
 *
 * @param indexReader     the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param similarity     the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
 *                       (defaults to {@link org.apache.lucene.search.similarities.BM25Similarity})
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param k              the no. of docs to select in the MLT results to find the nearest neighbor
 * @param minDocsFreq    {@link MoreLikeThis#minDocFreq} parameter
 * @param minTermFreq    {@link MoreLikeThis#minTermFreq} parameter
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
 */
public KNearestNeighborClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k, int minDocsFreq,
                                  int minTermFreq, String classFieldName, String... textFieldNames) {
  this.textFieldNames = textFieldNames;
  this.classFieldName = classFieldName;
  this.mlt = new MoreLikeThis(indexReader);
  this.mlt.setAnalyzer(analyzer);
  this.mlt.setFieldNames(textFieldNames);
  this.indexSearcher = new IndexSearcher(indexReader);
  if (similarity != null) {
    this.indexSearcher.setSimilarity(similarity);
  } else {
    this.indexSearcher.setSimilarity(new BM25Similarity());
  }
  if (minDocsFreq > 0) {
    mlt.setMinDocFreq(minDocsFreq);
  }
  if (minTermFreq > 0) {
    mlt.setMinTermFreq(minTermFreq);
  }
  this.query = query;
  this.k = k;
}
 
Example #3
Source File: KNearestNeighborClassifierTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Test
public void testBasicUsage() throws Exception {
  LeafReader leafReader = null;
  try {
    MockAnalyzer analyzer = new MockAnalyzer(random());
    leafReader = getSampleIndex(analyzer);
    checkCorrectClassification(new KNearestNeighborClassifier(leafReader, null, analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    ClassificationResult<BytesRef> resultDS =  checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new BM25Similarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    ClassificationResult<BytesRef> resultLMS =  checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    assertTrue(resultDS.getScore() != resultLMS.getScore());
  } finally {
    if (leafReader != null) {
      leafReader.close();
    }
  }
}
 
Example #4
Source File: TestPhraseQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSlopScoring() throws IOException {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(new MockAnalyzer(random()))
        .setMergePolicy(newLogMergePolicy())
        .setSimilarity(new BM25Similarity()));

  Document doc = new Document();
  doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES));
  writer.addDocument(doc);
  
  Document doc2 = new Document();
  doc2.add(newTextField("field", "foo firstname zzz lastname foo", Field.Store.YES));
  writer.addDocument(doc2);
  
  Document doc3 = new Document();
  doc3.add(newTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES));
  writer.addDocument(doc3);
  
  IndexReader reader = writer.getReader();
  writer.close();

  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
  PhraseQuery query = new PhraseQuery(Integer.MAX_VALUE, "field", "firstname", "lastname");
  ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
  assertEquals(3, hits.length);
  // Make sure that those matches where the terms appear closer to
  // each other get a higher score:
  assertEquals(1.0, hits[0].score, 0.01);
  assertEquals(0, hits[0].doc);
  assertEquals(0.63, hits[1].score, 0.01);
  assertEquals(1, hits[1].doc);
  assertEquals(0.47, hits[2].score, 0.01);
  assertEquals(2, hits[2].doc);
  QueryUtils.check(random(), query,searcher);
  reader.close();
  directory.close();
}
 
Example #5
Source File: LumongoSegment.java    From lumongo with Apache License 2.0 5 votes vote down vote up
private PerFieldSimilarityWrapper getSimilarity(final QueryWithFilters queryWithFilters) {
	return new PerFieldSimilarityWrapper() {
		@Override
		public Similarity get(String name) {

			AnalyzerSettings analyzerSettings = indexConfig.getAnalyzerSettingsForIndexField(name);
			AnalyzerSettings.Similarity similarity = AnalyzerSettings.Similarity.BM25;
			if (analyzerSettings != null) {
				similarity = analyzerSettings.getSimilarity();
			}

			AnalyzerSettings.Similarity fieldSimilarityOverride = queryWithFilters.getFieldSimilarityOverride(name);
			if (fieldSimilarityOverride != null) {
				similarity = fieldSimilarityOverride;
			}

			if (AnalyzerSettings.Similarity.TFIDF.equals(similarity)) {
				return new ClassicSimilarity();
			}
			else if (AnalyzerSettings.Similarity.BM25.equals(similarity)) {
				return new BM25Similarity();
			}
			else if (AnalyzerSettings.Similarity.CONSTANT.equals(similarity)) {
				return new ConstantSimilarity();
			}
			else if (AnalyzerSettings.Similarity.TF.equals(similarity)) {
				return new TFSimilarity();
			}
			else {
				throw new RuntimeException("Unknown similarity type <" + similarity + ">");
			}
		}
	};
}
 
Example #6
Source File: Lucene.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public Lucene(Path path) throws IOException {
	/* Setup Lucene */
       Directory dir = FSDirectory.open(path);
       // here we are using a standard analyzer, there are a lot of analyzers available to our use.
       Analyzer analyzer = new StandardAnalyzer();
       IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
       //this mode by default overwrites the previous index, not a very good option in real usage
       iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
       iwc.setSimilarity(new BM25Similarity());
       index = new IndexWriter(dir, iwc);
}
 
Example #7
Source File: LtrQueryTests.java    From elasticsearch-learning-to-rank with Apache License 2.0 5 votes vote down vote up
@Before
public void setupIndex() throws IOException {
    dirUnderTest = newDirectory();
    List<Similarity> sims = Arrays.asList(
            new ClassicSimilarity(),
            new SweetSpotSimilarity(), // extends Classic
            new BM25Similarity(),
            new LMDirichletSimilarity(),
            new BooleanSimilarity(),
            new LMJelinekMercerSimilarity(0.2F),
            new AxiomaticF3LOG(0.5F, 10),
            new DFISimilarity(new IndependenceChiSquared()),
            new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()),
            new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3())
        );
    similarity = sims.get(random().nextInt(sims.size()));

    indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity));
    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + i, Field.Store.YES));
        doc.add(newField("field", docs[i], Store.YES));
        indexWriterUnderTest.addDocument(doc);
    }
    indexWriterUnderTest.commit();
    indexWriterUnderTest.forceMerge(1);
    indexWriterUnderTest.flush();


    indexReaderUnderTest = indexWriterUnderTest.getReader();
    searcherUnderTest = newSearcher(indexReaderUnderTest);
    searcherUnderTest.setSimilarity(similarity);
}
 
Example #8
Source File: TestBM25SimilarityFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** bm25 with parameters */
public void testParameters() throws Exception {
  Similarity sim = getSimilarity("text_params");
  assertEquals(BM25Similarity.class, sim.getClass());
  BM25Similarity bm25 = (BM25Similarity) sim;
  assertEquals(1.2f, bm25.getK1(), 0.01f);
  assertEquals(0.76f, bm25.getB(), 0.01f);
}
 
Example #9
Source File: SchemaSimilarityFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Similarity getSimilarity() {
  if (null == core) {
    throw new IllegalStateException("SchemaSimilarityFactory can not be used until SolrCoreAware.inform has been called");
  }
  if (null == similarity) {
    // Need to instantiate lazily, can't do this in inform(SolrCore) because of chicken/egg
    // circular initialization hell with core.getLatestSchema() to lookup defaultSimFromFieldType
    
    Similarity defaultSim = null;
    if (null == defaultSimFromFieldType) {
      // nothing configured, choose a sensible implicit default...
      defaultSim = coreVersion.onOrAfter(Version.LUCENE_8_0_0) ? 
          new BM25Similarity() :
          new LegacyBM25Similarity();
    } else {
      FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType);
      if (null == defSimFT) {
        throw new SolrException(ErrorCode.SERVER_ERROR,
                                "SchemaSimilarityFactory configured with " + INIT_OPT + "='" +
                                defaultSimFromFieldType + "' but that <fieldType> does not exist");
                                
      }
      defaultSim = defSimFT.getSimilarity();
      if (null == defaultSim) {
        throw new SolrException(ErrorCode.SERVER_ERROR,
                                "SchemaSimilarityFactory configured with " + INIT_OPT + "='" + 
                                defaultSimFromFieldType +
                                "' but that <fieldType> does not define a <similarity>");
      }
    }
    similarity = new SchemaSimilarity(defaultSim);
  }
  return similarity;
}
 
Example #10
Source File: TestQueryRescorer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRescoreIsIdempotent() throws Exception {
  Directory dir = newDirectory();
  int numDocs = 100;
  String fieldName = "field";
  IndexReader reader = publishDocs(numDocs, fieldName, dir);

  // Construct a query that will get numDocs hits.
  String wordOne = dictionary.get(0);
  TermQuery termQuery = new TermQuery(new Term(fieldName, wordOne));
  IndexSearcher searcher = getSearcher(reader);
  searcher.setSimilarity(new BM25Similarity());
  TopDocs hits1 = searcher.search(termQuery, numDocs);
  TopDocs hits2 = searcher.search(termQuery, numDocs);

  // Next, use a more specific phrase query that will return different scores
  // from the above term query
  String wordTwo = RandomPicks.randomFrom(random(), dictionary);
  PhraseQuery phraseQuery = new PhraseQuery(1, fieldName, wordOne, wordTwo);

  // rescore, requesting the same hits as topN
  int topN = numDocs;
  TopDocs firstRescoreHits = QueryRescorer.rescore(searcher, hits1, phraseQuery, 2.0, topN);

  // now rescore again, where topN is less than numDocs
  topN = random().nextInt(numDocs-1);
  ScoreDoc[] secondRescoreHits = QueryRescorer.rescore(searcher, hits2, phraseQuery, 2.0, topN).scoreDocs;
  ScoreDoc[] expectedTopNScoreDocs = ArrayUtil.copyOfSubArray(firstRescoreHits.scoreDocs, 0, topN);
  CheckHits.checkEqual(phraseQuery, expectedTopNScoreDocs, secondRescoreHits);

  reader.close();
  dir.close();
}
 
Example #11
Source File: TestQueryRescorer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRescoreOfASubsetOfHits() throws Exception {
  Directory dir = newDirectory();
  int numDocs = 100;
  String fieldName = "field";
  IndexReader reader = publishDocs(numDocs, fieldName, dir);

  // Construct a query that will get numDocs hits.
  String wordOne = dictionary.get(0);
  TermQuery termQuery = new TermQuery(new Term(fieldName, wordOne));
  IndexSearcher searcher = getSearcher(reader);
  searcher.setSimilarity(new BM25Similarity());
  TopDocs hits = searcher.search(termQuery, numDocs);

  // Next, use a more specific phrase query that will return different scores
  // from the above term query
  String wordTwo = RandomPicks.randomFrom(random(), dictionary);
  PhraseQuery phraseQuery = new PhraseQuery(1, fieldName, wordOne, wordTwo);

  // rescore, requesting a smaller topN
  int topN = random().nextInt(numDocs-1);
  TopDocs phraseQueryHits = QueryRescorer.rescore(searcher, hits, phraseQuery, 2.0, topN);
  assertEquals(topN, phraseQueryHits.scoreDocs.length);

  for (int i = 1; i < phraseQueryHits.scoreDocs.length; i++) {
    assertTrue(phraseQueryHits.scoreDocs[i].score <= phraseQueryHits.scoreDocs[i-1].score);
  }
  reader.close();
  dir.close();
}
 
Example #12
Source File: BM25SimilarityProvider.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Inject
public BM25SimilarityProvider(@Assisted String name, @Assisted Settings settings) {
    super(name);
    float k1 = settings.getAsFloat("k1", 1.2f);
    float b = settings.getAsFloat("b", 0.75f);
    boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);

    this.similarity = new BM25Similarity(k1, b);
    this.similarity.setDiscountOverlaps(discountOverlaps);
}
 
Example #13
Source File: TestElevationComparator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSorting() throws Throwable {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(
      directory,
      newIndexWriterConfig(new MockAnalyzer(random())).
          setMaxBufferedDocs(2).
          setMergePolicy(newLogMergePolicy(1000)).
          setSimilarity(new ClassicSimilarity())
  );
  writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"}));
  writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"}));
  writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"}));
  writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"}));
  writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"}));
  writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"}));

  IndexReader r = DirectoryReader.open(writer);
  writer.close();

  IndexSearcher searcher = newSearcher(r);
  searcher.setSimilarity(new BM25Similarity());

  runTest(searcher, true);
  runTest(searcher, false);

  r.close();
  directory.close();
}
 
Example #14
Source File: TestMemoryIndex.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testFreezeAPI() {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "some text", analyzer);

  assertThat(mi.search(new MatchAllDocsQuery()), not(is(0.0f)));
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  // check we can add a new field after searching
  mi.addField("f2", "some more text", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f2", "some"))), not(is(0.0f)));

  // freeze!
  mi.freeze();

  RuntimeException expected = expectThrows(RuntimeException.class, () -> {
    mi.addField("f3", "and yet more", analyzer);
  });
  assertThat(expected.getMessage(), containsString("frozen"));

  expected = expectThrows(RuntimeException.class, () -> {
    mi.setSimilarity(new BM25Similarity(1, 1));
  });
  assertThat(expected.getMessage(), containsString("frozen"));

  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  mi.reset();
  mi.addField("f1", "wibble", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
  assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));

  // check we can set the Similarity again
  mi.setSimilarity(new ClassicSimilarity());

}
 
Example #15
Source File: SearchImpl.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Similarity createSimilarity(SimilarityConfig config) {
  Similarity similarity;

  if (config.isUseClassicSimilarity()) {
    ClassicSimilarity tfidf = new ClassicSimilarity();
    tfidf.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = tfidf;
  } else {
    BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB());
    bm25.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = bm25;
  }

  return similarity;
}
 
Example #16
Source File: KNearestFuzzyClassifier.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link KNearestFuzzyClassifier}.
 *
 * @param indexReader    the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param similarity     the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
 *                       (defaults to {@link BM25Similarity})
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param k              the no. of docs to select in the MLT results to find the nearest neighbor
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
 */
public KNearestFuzzyClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k,
                               String classFieldName, String... textFieldNames) {
  this.textFieldNames = textFieldNames;
  this.classFieldName = classFieldName;
  this.analyzer = analyzer;
  this.indexSearcher = new IndexSearcher(indexReader);
  if (similarity != null) {
    this.indexSearcher.setSimilarity(similarity);
  } else {
    this.indexSearcher.setSimilarity(new BM25Similarity());
  }
  this.query = query;
  this.k = k;
}
 
Example #17
Source File: TestNonDefinedSimilarityFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testCurrentBM25FromV8() throws Exception {
  // no sys prop set, rely on LATEST
  initCore("solrconfig-basic.xml","schema-tiny.xml");
  BM25Similarity sim = getSimilarity("text", BM25Similarity.class);
  assertEquals(0.75F, sim.getB(), 0.0F);
}
 
Example #18
Source File: TestBM25SimilarityFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** bm25 with default parameters */
public void test() throws Exception {
  assertEquals(BM25Similarity.class, getSimilarity("text").getClass());
}
 
Example #19
Source File: TestPerFieldSimilarity.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** test a field that does not exist */
public void testNonexistent() throws Exception {
  Similarity sim = getSimilarity("sdfdsfdsfdswr5fsdfdsfdsfs");
  assertEquals(BM25Similarity.class, sim.getClass());
}
 
Example #20
Source File: TestPerFieldSimilarity.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** ... and for a dynamic field */
public void testDefaultsDynamic() throws Exception {
  Similarity sim = getSimilarity("text_sim3");
  assertEquals(BM25Similarity.class, sim.getClass());
}
 
Example #21
Source File: TestPerFieldSimilarity.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** test a field where no similarity is specified */
public void testDefaults() throws Exception {
  Similarity sim = getSimilarity("sim3text");
  assertEquals(BM25Similarity.class, sim.getClass());;
}
 
Example #22
Source File: TestLegacyBM25Similarity.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testDefaults() {
  LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity();
  BM25Similarity bm25Similarity = new BM25Similarity();
  assertEquals(bm25Similarity.getB(), legacyBM25Similarity.getB(), 0f);
  assertEquals(bm25Similarity.getK1(), legacyBM25Similarity.getK1(), 0f);
}
 
Example #23
Source File: BM25SimilarityFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public Similarity getSimilarity() {
  BM25Similarity sim = new BM25Similarity(k1, b);
  sim.setDiscountOverlaps(discountOverlaps);
  return sim;
}
 
Example #24
Source File: TestFeatureField.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testDemo() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig()
      .setMergePolicy(newLogMergePolicy(random().nextBoolean())));
  Document doc = new Document();
  FeatureField pagerank = new FeatureField("features", "pagerank", 1);
  doc.add(pagerank);
  TextField body = new TextField("body", "", Store.NO);
  doc.add(body);

  pagerank.setFeatureValue(10);
  body.setStringValue("Apache Lucene");
  writer.addDocument(doc);

  pagerank.setFeatureValue(1000);
  body.setStringValue("Apache Web HTTP server");
  writer.addDocument(doc);

  pagerank.setFeatureValue(1);
  body.setStringValue("Lucene is a search engine");
  writer.addDocument(doc);

  pagerank.setFeatureValue(42);
  body.setStringValue("Lucene in the sky with diamonds");
  writer.addDocument(doc);

  DirectoryReader reader = writer.getReader();
  writer.close();

  // NOTE: If you need to make changes below, then you likely also need to
  // update javadocs of FeatureField.

  IndexSearcher searcher = new IndexSearcher(reader);
  searcher.setSimilarity(new BM25Similarity());
  Query query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "apache")), Occur.SHOULD)
      .add(new TermQuery(new Term("body", "lucene")), Occur.SHOULD)
      .build();
  Query boost = FeatureField.newSaturationQuery("features", "pagerank");
  Query boostedQuery = new BooleanQuery.Builder()
      .add(query, Occur.MUST)
      .add(boost, Occur.SHOULD)
      .build();
  TopDocs topDocs = searcher.search(boostedQuery, 10);
  assertEquals(4, topDocs.scoreDocs.length);
  assertEquals(1, topDocs.scoreDocs[0].doc);
  assertEquals(0, topDocs.scoreDocs[1].doc);
  assertEquals(3, topDocs.scoreDocs[2].doc);
  assertEquals(2, topDocs.scoreDocs[3].doc);

  reader.close();
  dir.close();
}
 
Example #25
Source File: TestLegacyBM25Similarity.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testToString() {
  LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity();
  BM25Similarity bm25Similarity = new BM25Similarity();
  assertEquals(bm25Similarity.toString(), legacyBM25Similarity.toString());
}
 
Example #26
Source File: BM25FQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Default builder.
 */
public Builder() {
  this.similarity = new BM25Similarity();
}
 
Example #27
Source File: BM25NBClassifier.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
/**
 * Creates a new NaiveBayes classifier.
 *
 * @param indexReader    the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param classFieldName the name of the field used as the output for the classifier NOTE: must not be heavely analyzed
 *                       as the returned class will be a token indexed for this field
 * @param textFieldNames the name of the fields used as the inputs for the classifier, NO boosting supported per field
 */
public BM25NBClassifier(IndexReader indexReader, Analyzer analyzer, Query query, String classFieldName, String... textFieldNames) {
  this.indexReader = indexReader;
  this.indexSearcher = new IndexSearcher(this.indexReader);
  this.indexSearcher.setSimilarity(new BM25Similarity());
  this.textFieldNames = textFieldNames;
  this.classFieldName = classFieldName;
  this.analyzer = analyzer;
  this.query = query;
}
 
Example #28
Source File: LegacyBM25Similarity.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * BM25 with the supplied parameter values.
 * @param k1 Controls non-linear term frequency normalization (saturation).
 * @param b Controls to what degree document length normalizes tf values.
 * @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is
 *         not within the range {@code [0..1]}
 */
public LegacyBM25Similarity(float k1, float b) {
  this.bm25Similarity = new BM25Similarity(k1, b);
}
 
Example #29
Source File: LegacyBM25Similarity.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/** BM25 with these default values:
 * <ul>
 *   <li>{@code k1 = 1.2}</li>
 *   <li>{@code b = 0.75}</li>
 * </ul>
 */
public LegacyBM25Similarity() {
  this.bm25Similarity = new BM25Similarity();
}
 
Example #30
Source File: BM25FQuery.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builder with the supplied parameter values.
 * @param k1 Controls non-linear term frequency normalization (saturation).
 * @param b Controls to what degree document length normalizes tf values.
 */
public Builder(float k1, float b) {
  this.similarity = new BM25Similarity(k1, b);
}