Java Code Examples for org.apache.lucene.search.similarities.BM25Similarity

The following examples show how to use org.apache.lucene.search.similarities.BM25Similarity. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: BM25FQuery.java    License: Apache License 2.0 6 votes vote down vote up
private BM25FQuery(BM25Similarity similarity, TreeMap<String, FieldAndWeight> fieldAndWeights, BytesRef[] terms) {
  this.similarity = similarity;
  this.fieldAndWeights = fieldAndWeights;
  this.terms = terms;
  int numFieldTerms = fieldAndWeights.size() * terms.length;
  if (numFieldTerms > IndexSearcher.getMaxClauseCount()) {
    throw new IndexSearcher.TooManyClauses();
  }
  this.fieldTerms = new Term[numFieldTerms];
  Arrays.sort(terms);
  int pos = 0;
  for (String field : fieldAndWeights.keySet()) {
    for (BytesRef term : terms) {
      fieldTerms[pos++] = new Term(field, term);
    }
  }

  this.ramBytesUsed = BASE_RAM_BYTES +
      RamUsageEstimator.sizeOfObject(fieldAndWeights) +
      RamUsageEstimator.sizeOfObject(fieldTerms) +
      RamUsageEstimator.sizeOfObject(terms);
}
 
Example 2
Source Project: lucene-solr   Source File: KNearestNeighborClassifier.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Creates a {@link KNearestNeighborClassifier}.
 *
 * @param indexReader     the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param similarity     the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
 *                       (defaults to {@link org.apache.lucene.search.similarities.BM25Similarity})
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param k              the no. of docs to select in the MLT results to find the nearest neighbor
 * @param minDocsFreq    {@link MoreLikeThis#minDocFreq} parameter
 * @param minTermFreq    {@link MoreLikeThis#minTermFreq} parameter
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
 */
public KNearestNeighborClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k, int minDocsFreq,
                                  int minTermFreq, String classFieldName, String... textFieldNames) {
  this.textFieldNames = textFieldNames;
  this.classFieldName = classFieldName;
  this.mlt = new MoreLikeThis(indexReader);
  this.mlt.setAnalyzer(analyzer);
  this.mlt.setFieldNames(textFieldNames);
  this.indexSearcher = new IndexSearcher(indexReader);
  if (similarity != null) {
    this.indexSearcher.setSimilarity(similarity);
  } else {
    this.indexSearcher.setSimilarity(new BM25Similarity());
  }
  if (minDocsFreq > 0) {
    mlt.setMinDocFreq(minDocsFreq);
  }
  if (minTermFreq > 0) {
    mlt.setMinTermFreq(minTermFreq);
  }
  this.query = query;
  this.k = k;
}
 
Example 3
@Test
public void testBasicUsage() throws Exception {
  LeafReader leafReader = null;
  try {
    MockAnalyzer analyzer = new MockAnalyzer(random());
    leafReader = getSampleIndex(analyzer);
    checkCorrectClassification(new KNearestNeighborClassifier(leafReader, null, analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 1, 0, 0, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    ClassificationResult<BytesRef> resultDS =  checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new BM25Similarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    ClassificationResult<BytesRef> resultLMS =  checkCorrectClassification(new KNearestNeighborClassifier(leafReader, new LMDirichletSimilarity(), analyzer, null, 3, 2, 1, categoryFieldName, textFieldName), TECHNOLOGY_INPUT, TECHNOLOGY_RESULT);
    assertTrue(resultDS.getScore() != resultLMS.getScore());
  } finally {
    if (leafReader != null) {
      leafReader.close();
    }
  }
}
 
Example 4
Source Project: Elasticsearch   Source File: BM25SimilarityProvider.java    License: Apache License 2.0 5 votes vote down vote up
@Inject
public BM25SimilarityProvider(@Assisted String name, @Assisted Settings settings) {
    super(name);
    float k1 = settings.getAsFloat("k1", 1.2f);
    float b = settings.getAsFloat("b", 0.75f);
    boolean discountOverlaps = settings.getAsBoolean("discount_overlaps", true);

    this.similarity = new BM25Similarity(k1, b);
    this.similarity.setDiscountOverlaps(discountOverlaps);
}
 
Example 5
Source Project: lucene-solr   Source File: TestMemoryIndex.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testFreezeAPI() {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "some text", analyzer);

  assertThat(mi.search(new MatchAllDocsQuery()), not(is(0.0f)));
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  // check we can add a new field after searching
  mi.addField("f2", "some more text", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f2", "some"))), not(is(0.0f)));

  // freeze!
  mi.freeze();

  RuntimeException expected = expectThrows(RuntimeException.class, () -> {
    mi.addField("f3", "and yet more", analyzer);
  });
  assertThat(expected.getMessage(), containsString("frozen"));

  expected = expectThrows(RuntimeException.class, () -> {
    mi.setSimilarity(new BM25Similarity(1, 1));
  });
  assertThat(expected.getMessage(), containsString("frozen"));

  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), not(is(0.0f)));

  mi.reset();
  mi.addField("f1", "wibble", analyzer);
  assertThat(mi.search(new TermQuery(new Term("f1", "some"))), is(0.0f));
  assertThat(mi.search(new TermQuery(new Term("f1", "wibble"))), not(is(0.0f)));

  // check we can set the Similarity again
  mi.setSimilarity(new ClassicSimilarity());

}
 
Example 6
Source Project: lucene-solr   Source File: SearchImpl.java    License: Apache License 2.0 5 votes vote down vote up
private Similarity createSimilarity(SimilarityConfig config) {
  Similarity similarity;

  if (config.isUseClassicSimilarity()) {
    ClassicSimilarity tfidf = new ClassicSimilarity();
    tfidf.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = tfidf;
  } else {
    BM25Similarity bm25 = new BM25Similarity(config.getK1(), config.getB());
    bm25.setDiscountOverlaps(config.isDiscountOverlaps());
    similarity = bm25;
  }

  return similarity;
}
 
Example 7
Source Project: lucene-solr   Source File: KNearestFuzzyClassifier.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a {@link KNearestFuzzyClassifier}.
 *
 * @param indexReader    the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param similarity     the {@link Similarity} to be used by the underlying {@link IndexSearcher} or {@code null}
 *                       (defaults to {@link BM25Similarity})
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param k              the no. of docs to select in the MLT results to find the nearest neighbor
 * @param classFieldName the name of the field used as the output for the classifier
 * @param textFieldNames the name of the fields used as the inputs for the classifier, they can contain boosting indication e.g. title^10
 */
public KNearestFuzzyClassifier(IndexReader indexReader, Similarity similarity, Analyzer analyzer, Query query, int k,
                               String classFieldName, String... textFieldNames) {
  this.textFieldNames = textFieldNames;
  this.classFieldName = classFieldName;
  this.analyzer = analyzer;
  this.indexSearcher = new IndexSearcher(indexReader);
  if (similarity != null) {
    this.indexSearcher.setSimilarity(similarity);
  } else {
    this.indexSearcher.setSimilarity(new BM25Similarity());
  }
  this.query = query;
  this.k = k;
}
 
Example 8
Source Project: lucene-solr   Source File: TestElevationComparator.java    License: Apache License 2.0 5 votes vote down vote up
public void testSorting() throws Throwable {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(
      directory,
      newIndexWriterConfig(new MockAnalyzer(random())).
          setMaxBufferedDocs(2).
          setMergePolicy(newLogMergePolicy(1000)).
          setSimilarity(new ClassicSimilarity())
  );
  writer.addDocument(adoc(new String[] {"id", "a", "title", "ipod", "str_s", "a"}));
  writer.addDocument(adoc(new String[] {"id", "b", "title", "ipod ipod", "str_s", "b"}));
  writer.addDocument(adoc(new String[] {"id", "c", "title", "ipod ipod ipod", "str_s","c"}));
  writer.addDocument(adoc(new String[] {"id", "x", "title", "boosted", "str_s", "x"}));
  writer.addDocument(adoc(new String[] {"id", "y", "title", "boosted boosted", "str_s","y"}));
  writer.addDocument(adoc(new String[] {"id", "z", "title", "boosted boosted boosted","str_s", "z"}));

  IndexReader r = DirectoryReader.open(writer);
  writer.close();

  IndexSearcher searcher = newSearcher(r);
  searcher.setSimilarity(new BM25Similarity());

  runTest(searcher, true);
  runTest(searcher, false);

  r.close();
  directory.close();
}
 
Example 9
Source Project: lucene-solr   Source File: TestPhraseQuery.java    License: Apache License 2.0 5 votes vote down vote up
public void testSlopScoring() throws IOException {
  Directory directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(new MockAnalyzer(random()))
        .setMergePolicy(newLogMergePolicy())
        .setSimilarity(new BM25Similarity()));

  Document doc = new Document();
  doc.add(newTextField("field", "foo firstname lastname foo", Field.Store.YES));
  writer.addDocument(doc);
  
  Document doc2 = new Document();
  doc2.add(newTextField("field", "foo firstname zzz lastname foo", Field.Store.YES));
  writer.addDocument(doc2);
  
  Document doc3 = new Document();
  doc3.add(newTextField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES));
  writer.addDocument(doc3);
  
  IndexReader reader = writer.getReader();
  writer.close();

  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new ClassicSimilarity());
  PhraseQuery query = new PhraseQuery(Integer.MAX_VALUE, "field", "firstname", "lastname");
  ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
  assertEquals(3, hits.length);
  // Make sure that those matches where the terms appear closer to
  // each other get a higher score:
  assertEquals(1.0, hits[0].score, 0.01);
  assertEquals(0, hits[0].doc);
  assertEquals(0.63, hits[1].score, 0.01);
  assertEquals(1, hits[1].doc);
  assertEquals(0.47, hits[2].score, 0.01);
  assertEquals(2, hits[2].doc);
  QueryUtils.check(random(), query,searcher);
  reader.close();
  directory.close();
}
 
Example 10
Source Project: lucene-solr   Source File: TestQueryRescorer.java    License: Apache License 2.0 5 votes vote down vote up
public void testRescoreOfASubsetOfHits() throws Exception {
  Directory dir = newDirectory();
  int numDocs = 100;
  String fieldName = "field";
  IndexReader reader = publishDocs(numDocs, fieldName, dir);

  // Construct a query that will get numDocs hits.
  String wordOne = dictionary.get(0);
  TermQuery termQuery = new TermQuery(new Term(fieldName, wordOne));
  IndexSearcher searcher = getSearcher(reader);
  searcher.setSimilarity(new BM25Similarity());
  TopDocs hits = searcher.search(termQuery, numDocs);

  // Next, use a more specific phrase query that will return different scores
  // from the above term query
  String wordTwo = RandomPicks.randomFrom(random(), dictionary);
  PhraseQuery phraseQuery = new PhraseQuery(1, fieldName, wordOne, wordTwo);

  // rescore, requesting a smaller topN
  int topN = random().nextInt(numDocs-1);
  TopDocs phraseQueryHits = QueryRescorer.rescore(searcher, hits, phraseQuery, 2.0, topN);
  assertEquals(topN, phraseQueryHits.scoreDocs.length);

  for (int i = 1; i < phraseQueryHits.scoreDocs.length; i++) {
    assertTrue(phraseQueryHits.scoreDocs[i].score <= phraseQueryHits.scoreDocs[i-1].score);
  }
  reader.close();
  dir.close();
}
 
Example 11
Source Project: lucene-solr   Source File: TestQueryRescorer.java    License: Apache License 2.0 5 votes vote down vote up
public void testRescoreIsIdempotent() throws Exception {
  Directory dir = newDirectory();
  int numDocs = 100;
  String fieldName = "field";
  IndexReader reader = publishDocs(numDocs, fieldName, dir);

  // Construct a query that will get numDocs hits.
  String wordOne = dictionary.get(0);
  TermQuery termQuery = new TermQuery(new Term(fieldName, wordOne));
  IndexSearcher searcher = getSearcher(reader);
  searcher.setSimilarity(new BM25Similarity());
  TopDocs hits1 = searcher.search(termQuery, numDocs);
  TopDocs hits2 = searcher.search(termQuery, numDocs);

  // Next, use a more specific phrase query that will return different scores
  // from the above term query
  String wordTwo = RandomPicks.randomFrom(random(), dictionary);
  PhraseQuery phraseQuery = new PhraseQuery(1, fieldName, wordOne, wordTwo);

  // rescore, requesting the same hits as topN
  int topN = numDocs;
  TopDocs firstRescoreHits = QueryRescorer.rescore(searcher, hits1, phraseQuery, 2.0, topN);

  // now rescore again, where topN is less than numDocs
  topN = random().nextInt(numDocs-1);
  ScoreDoc[] secondRescoreHits = QueryRescorer.rescore(searcher, hits2, phraseQuery, 2.0, topN).scoreDocs;
  ScoreDoc[] expectedTopNScoreDocs = ArrayUtil.copyOfSubArray(firstRescoreHits.scoreDocs, 0, topN);
  CheckHits.checkEqual(phraseQuery, expectedTopNScoreDocs, secondRescoreHits);

  reader.close();
  dir.close();
}
 
Example 12
Source Project: lucene-solr   Source File: SchemaSimilarityFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Similarity getSimilarity() {
  if (null == core) {
    throw new IllegalStateException("SchemaSimilarityFactory can not be used until SolrCoreAware.inform has been called");
  }
  if (null == similarity) {
    // Need to instantiate lazily, can't do this in inform(SolrCore) because of chicken/egg
    // circular initialization hell with core.getLatestSchema() to lookup defaultSimFromFieldType
    
    Similarity defaultSim = null;
    if (null == defaultSimFromFieldType) {
      // nothing configured, choose a sensible implicit default...
      defaultSim = coreVersion.onOrAfter(Version.LUCENE_8_0_0) ? 
          new BM25Similarity() :
          new LegacyBM25Similarity();
    } else {
      FieldType defSimFT = core.getLatestSchema().getFieldTypeByName(defaultSimFromFieldType);
      if (null == defSimFT) {
        throw new SolrException(ErrorCode.SERVER_ERROR,
                                "SchemaSimilarityFactory configured with " + INIT_OPT + "='" +
                                defaultSimFromFieldType + "' but that <fieldType> does not exist");
                                
      }
      defaultSim = defSimFT.getSimilarity();
      if (null == defaultSim) {
        throw new SolrException(ErrorCode.SERVER_ERROR,
                                "SchemaSimilarityFactory configured with " + INIT_OPT + "='" + 
                                defaultSimFromFieldType +
                                "' but that <fieldType> does not define a <similarity>");
      }
    }
    similarity = new SchemaSimilarity(defaultSim);
  }
  return similarity;
}
 
Example 13
Source Project: lucene-solr   Source File: TestBM25SimilarityFactory.java    License: Apache License 2.0 5 votes vote down vote up
/** bm25 with parameters */
public void testParameters() throws Exception {
  Similarity sim = getSimilarity("text_params");
  assertEquals(BM25Similarity.class, sim.getClass());
  BM25Similarity bm25 = (BM25Similarity) sim;
  assertEquals(1.2f, bm25.getK1(), 0.01f);
  assertEquals(0.76f, bm25.getB(), 0.01f);
}
 
Example 14
@Before
public void setupIndex() throws IOException {
    dirUnderTest = newDirectory();
    List<Similarity> sims = Arrays.asList(
            new ClassicSimilarity(),
            new SweetSpotSimilarity(), // extends Classic
            new BM25Similarity(),
            new LMDirichletSimilarity(),
            new BooleanSimilarity(),
            new LMJelinekMercerSimilarity(0.2F),
            new AxiomaticF3LOG(0.5F, 10),
            new DFISimilarity(new IndependenceChiSquared()),
            new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()),
            new IBSimilarity(new DistributionLL(), new LambdaDF(), new NormalizationH3())
        );
    similarity = sims.get(random().nextInt(sims.size()));

    indexWriterUnderTest = new RandomIndexWriter(random(), dirUnderTest, newIndexWriterConfig().setSimilarity(similarity));
    for (int i = 0; i < docs.length; i++) {
        Document doc = new Document();
        doc.add(newStringField("id", "" + i, Field.Store.YES));
        doc.add(newField("field", docs[i], Store.YES));
        indexWriterUnderTest.addDocument(doc);
    }
    indexWriterUnderTest.commit();
    indexWriterUnderTest.forceMerge(1);
    indexWriterUnderTest.flush();


    indexReaderUnderTest = indexWriterUnderTest.getReader();
    searcherUnderTest = newSearcher(indexReaderUnderTest);
    searcherUnderTest.setSimilarity(similarity);
}
 
Example 15
Source Project: uncc2014watsonsim   Source File: Lucene.java    License: GNU General Public License v2.0 5 votes vote down vote up
public Lucene(Path path) throws IOException {
	/* Setup Lucene */
       Directory dir = FSDirectory.open(path);
       // here we are using a standard analyzer, there are a lot of analyzers available to our use.
       Analyzer analyzer = new StandardAnalyzer();
       IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
       //this mode by default overwrites the previous index, not a very good option in real usage
       iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
       iwc.setSimilarity(new BM25Similarity());
       index = new IndexWriter(dir, iwc);
}
 
Example 16
Source Project: lumongo   Source File: LumongoSegment.java    License: Apache License 2.0 5 votes vote down vote up
private PerFieldSimilarityWrapper getSimilarity(final QueryWithFilters queryWithFilters) {
	return new PerFieldSimilarityWrapper() {
		@Override
		public Similarity get(String name) {

			AnalyzerSettings analyzerSettings = indexConfig.getAnalyzerSettingsForIndexField(name);
			AnalyzerSettings.Similarity similarity = AnalyzerSettings.Similarity.BM25;
			if (analyzerSettings != null) {
				similarity = analyzerSettings.getSimilarity();
			}

			AnalyzerSettings.Similarity fieldSimilarityOverride = queryWithFilters.getFieldSimilarityOverride(name);
			if (fieldSimilarityOverride != null) {
				similarity = fieldSimilarityOverride;
			}

			if (AnalyzerSettings.Similarity.TFIDF.equals(similarity)) {
				return new ClassicSimilarity();
			}
			else if (AnalyzerSettings.Similarity.BM25.equals(similarity)) {
				return new BM25Similarity();
			}
			else if (AnalyzerSettings.Similarity.CONSTANT.equals(similarity)) {
				return new ConstantSimilarity();
			}
			else if (AnalyzerSettings.Similarity.TF.equals(similarity)) {
				return new TFSimilarity();
			}
			else {
				throw new RuntimeException("Unknown similarity type <" + similarity + ">");
			}
		}
	};
}
 
Example 17
Source Project: lucene-solr   Source File: TestLegacyBM25Similarity.java    License: Apache License 2.0 4 votes vote down vote up
public void testDefaults() {
  LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity();
  BM25Similarity bm25Similarity = new BM25Similarity();
  assertEquals(bm25Similarity.getB(), legacyBM25Similarity.getB(), 0f);
  assertEquals(bm25Similarity.getK1(), legacyBM25Similarity.getK1(), 0f);
}
 
Example 18
Source Project: lucene-solr   Source File: TestLegacyBM25Similarity.java    License: Apache License 2.0 4 votes vote down vote up
public void testToString() {
  LegacyBM25Similarity legacyBM25Similarity = new LegacyBM25Similarity();
  BM25Similarity bm25Similarity = new BM25Similarity();
  assertEquals(bm25Similarity.toString(), legacyBM25Similarity.toString());
}
 
Example 19
Source Project: lucene-solr   Source File: BM25FQuery.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Default builder.
 */
public Builder() {
  this.similarity = new BM25Similarity();
}
 
Example 20
Source Project: lucene-solr   Source File: TestFeatureField.java    License: Apache License 2.0 4 votes vote down vote up
public void testDemo() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, newIndexWriterConfig()
      .setMergePolicy(newLogMergePolicy(random().nextBoolean())));
  Document doc = new Document();
  FeatureField pagerank = new FeatureField("features", "pagerank", 1);
  doc.add(pagerank);
  TextField body = new TextField("body", "", Store.NO);
  doc.add(body);

  pagerank.setFeatureValue(10);
  body.setStringValue("Apache Lucene");
  writer.addDocument(doc);

  pagerank.setFeatureValue(1000);
  body.setStringValue("Apache Web HTTP server");
  writer.addDocument(doc);

  pagerank.setFeatureValue(1);
  body.setStringValue("Lucene is a search engine");
  writer.addDocument(doc);

  pagerank.setFeatureValue(42);
  body.setStringValue("Lucene in the sky with diamonds");
  writer.addDocument(doc);

  DirectoryReader reader = writer.getReader();
  writer.close();

  // NOTE: If you need to make changes below, then you likely also need to
  // update javadocs of FeatureField.

  IndexSearcher searcher = new IndexSearcher(reader);
  searcher.setSimilarity(new BM25Similarity());
  Query query = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("body", "apache")), Occur.SHOULD)
      .add(new TermQuery(new Term("body", "lucene")), Occur.SHOULD)
      .build();
  Query boost = FeatureField.newSaturationQuery("features", "pagerank");
  Query boostedQuery = new BooleanQuery.Builder()
      .add(query, Occur.MUST)
      .add(boost, Occur.SHOULD)
      .build();
  TopDocs topDocs = searcher.search(boostedQuery, 10);
  assertEquals(4, topDocs.scoreDocs.length);
  assertEquals(1, topDocs.scoreDocs[0].doc);
  assertEquals(0, topDocs.scoreDocs[1].doc);
  assertEquals(3, topDocs.scoreDocs[2].doc);
  assertEquals(2, topDocs.scoreDocs[3].doc);

  reader.close();
  dir.close();
}
 
Example 21
Source Project: lucene-solr   Source File: BM25SimilarityFactory.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public Similarity getSimilarity() {
  BM25Similarity sim = new BM25Similarity(k1, b);
  sim.setDiscountOverlaps(discountOverlaps);
  return sim;
}
 
Example 22
public void testCurrentBM25FromV8() throws Exception {
  // no sys prop set, rely on LATEST
  initCore("solrconfig-basic.xml","schema-tiny.xml");
  BM25Similarity sim = getSimilarity("text", BM25Similarity.class);
  assertEquals(0.75F, sim.getB(), 0.0F);
}
 
Example 23
Source Project: lucene-solr   Source File: TestPerFieldSimilarity.java    License: Apache License 2.0 4 votes vote down vote up
/** test a field where no similarity is specified */
public void testDefaults() throws Exception {
  Similarity sim = getSimilarity("sim3text");
  assertEquals(BM25Similarity.class, sim.getClass());;
}
 
Example 24
Source Project: lucene-solr   Source File: TestPerFieldSimilarity.java    License: Apache License 2.0 4 votes vote down vote up
/** ... and for a dynamic field */
public void testDefaultsDynamic() throws Exception {
  Similarity sim = getSimilarity("text_sim3");
  assertEquals(BM25Similarity.class, sim.getClass());
}
 
Example 25
Source Project: lucene-solr   Source File: TestPerFieldSimilarity.java    License: Apache License 2.0 4 votes vote down vote up
/** test a field that does not exist */
public void testNonexistent() throws Exception {
  Similarity sim = getSimilarity("sdfdsfdsfdswr5fsdfdsfdsfs");
  assertEquals(BM25Similarity.class, sim.getClass());
}
 
Example 26
Source Project: lucene-solr   Source File: TestBM25SimilarityFactory.java    License: Apache License 2.0 4 votes vote down vote up
/** bm25 with default parameters */
public void test() throws Exception {
  assertEquals(BM25Similarity.class, getSimilarity("text").getClass());
}
 
Example 27
Source Project: lucene-solr   Source File: BM25NBClassifier.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * Creates a new NaiveBayes classifier.
 *
 * @param indexReader    the reader on the index to be used for classification
 * @param analyzer       an {@link Analyzer} used to analyze unseen text
 * @param query          a {@link Query} to eventually filter the docs used for training the classifier, or {@code null}
 *                       if all the indexed docs should be used
 * @param classFieldName the name of the field used as the output for the classifier NOTE: must not be heavely analyzed
 *                       as the returned class will be a token indexed for this field
 * @param textFieldNames the name of the fields used as the inputs for the classifier, NO boosting supported per field
 */
public BM25NBClassifier(IndexReader indexReader, Analyzer analyzer, Query query, String classFieldName, String... textFieldNames) {
  this.indexReader = indexReader;
  this.indexSearcher = new IndexSearcher(this.indexReader);
  this.indexSearcher.setSimilarity(new BM25Similarity());
  this.textFieldNames = textFieldNames;
  this.classFieldName = classFieldName;
  this.analyzer = analyzer;
  this.query = query;
}
 
Example 28
Source Project: lucene-solr   Source File: LegacyBM25Similarity.java    License: Apache License 2.0 2 votes vote down vote up
/** BM25 with these default values:
 * <ul>
 *   <li>{@code k1 = 1.2}</li>
 *   <li>{@code b = 0.75}</li>
 * </ul>
 */
public LegacyBM25Similarity() {
  this.bm25Similarity = new BM25Similarity();
}
 
Example 29
Source Project: lucene-solr   Source File: LegacyBM25Similarity.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * BM25 with the supplied parameter values.
 * @param k1 Controls non-linear term frequency normalization (saturation).
 * @param b Controls to what degree document length normalizes tf values.
 * @throws IllegalArgumentException if {@code k1} is infinite or negative, or if {@code b} is
 *         not within the range {@code [0..1]}
 */
public LegacyBM25Similarity(float k1, float b) {
  this.bm25Similarity = new BM25Similarity(k1, b);
}
 
Example 30
Source Project: lucene-solr   Source File: BM25FQuery.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Builder with the supplied parameter values.
 * @param k1 Controls non-linear term frequency normalization (saturation).
 * @param b Controls to what degree document length normalizes tf values.
 */
public Builder(float k1, float b) {
  this.similarity = new BM25Similarity(k1, b);
}