Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setSimilarity()

The following examples show how to use org.apache.lucene.index.IndexWriterConfig#setSimilarity() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestTaxonomyFacetCounts.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testReallyNoNormsForDrillDown() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(new PerFieldSimilarityWrapper() {
      final Similarity sim = new ClassicSimilarity();

      @Override
      public Similarity get(String name) {
        assertEquals("field", name);
        return sim;
      }
    });
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  writer.addDocument(config.build(taxoWriter, doc));
  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}
 
Example 2
Source File: test.java    From vscode-extension with MIT License 5 votes vote down vote up
private IndexWriterConfig getIndexWriterConfig() {
    final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
    iwc.setCommitOnClose(false); // we by default don't commit on close
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
    // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
    boolean verbose = false;
    try {
        verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
    } catch (Exception ignore) {
    }
    iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
    iwc.setMergeScheduler(mergeScheduler);
    // Give us the opportunity to upgrade old segments while performing
    // background merges
    MergePolicy mergePolicy = config().getMergePolicy();
    // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
    iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
    if (softDeleteEnabled) {
        mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
            new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
    }
    iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
    iwc.setSimilarity(engineConfig.getSimilarity());
    iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
    iwc.setCodec(engineConfig.getCodec());
    iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
    if (config().getIndexSort() != null) {
        iwc.setIndexSort(config().getIndexSort());
    }
    return iwc;
}
 
Example 3
Source File: SolrIndexConfig.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
  IndexSchema schema = core.getLatestSchema();
  IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
  if (maxBufferedDocs != -1)
    iwc.setMaxBufferedDocs(maxBufferedDocs);

  if (ramBufferSizeMB != -1)
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

  if (ramPerThreadHardLimitMB != -1) {
    iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB);
  }

  iwc.setSimilarity(schema.getSimilarity());
  MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema);
  iwc.setMergePolicy(mergePolicy);
  MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader());
  iwc.setMergeScheduler(mergeScheduler);
  iwc.setInfoStream(infoStream);

  if (mergePolicy instanceof SortingMergePolicy) {
    Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
    iwc.setIndexSort(indexSort);
  }

  iwc.setUseCompoundFile(useCompoundFile);

  if (mergedSegmentWarmerInfo != null) {
    // TODO: add infostream -> normal logging system (there is an issue somewhere)
    @SuppressWarnings({"rawtypes"})
    IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
                                                                      IndexReaderWarmer.class,
                                                                      null,
                                                                      new Class[] { InfoStream.class },
                                                                      new Object[] { iwc.getInfoStream() });
    iwc.setMergedSegmentWarmer(warmer);
  }

  return iwc;
}
 
Example 4
Source File: Lucene.java    From uncc2014watsonsim with GNU General Public License v2.0 5 votes vote down vote up
public Lucene(Path path) throws IOException {
	/* Setup Lucene */
       Directory dir = FSDirectory.open(path);
       // here we are using a standard analyzer, there are a lot of analyzers available to our use.
       Analyzer analyzer = new StandardAnalyzer();
       IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
       //this mode by default overwrites the previous index, not a very good option in real usage
       iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
       iwc.setSimilarity(new BM25Similarity());
       index = new IndexWriter(dir, iwc);
}
 
Example 5
Source File: LuceneTermQueryBuilderTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
public void testThatQueryUsesTermButNoFieldBoost() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);


    TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    final TermQuery termQuery = new LuceneTermQueryBuilder()
            .createTermQuery(new Term("f1", "v1"), new ConstantFieldBoost(3f));
    final Term term = termQuery.getTerm();
    assertEquals("f1", term.field());
    assertEquals("v1", term.text());

    TopDocs topDocs = indexSearcher.search(termQuery, 10);

    final Weight weight = termQuery.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();

    assertTrue(explainText.contains("4.5 = boost")); // 4.5 (query) but ignore field boost
    assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
    assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field
}
 
Example 6
Source File: LuceneIndex.java    From rdf4j with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
/**
 * Method produces {@link IndexWriterConfig} using settings.
 *
 * @return
 */
private IndexWriterConfig getIndexWriterConfig() {
	IndexWriterConfig cnf = new IndexWriterConfig(analyzer);
	cnf.setSimilarity(similarity);
	return cnf;
}
 
Example 7
Source File: LuceneTranslationMemory.java    From modernmt with Apache License 2.0 4 votes vote down vote up
public LuceneTranslationMemory(Directory directory, DocumentBuilder documentBuilder, QueryBuilder queryBuilder, Rescorer rescorer, AnalyzerFactory analyzerFactory, int minQuerySize) throws IOException {
    this.indexDirectory = directory;
    this.queryBuilder = queryBuilder;
    this.rescorer = rescorer;
    this.documentBuilder = documentBuilder;
    this.analyzerFactory = analyzerFactory;
    this.shortQueryAnalyzer = analyzerFactory.createShortQueryAnalyzer();
    this.longQueryAnalyzer = analyzerFactory.createLongQueryAnalyzer();
    this.minQuerySize = minQuerySize;

    // Index writer setup
    IndexWriterConfig indexConfig = new IndexWriterConfig(Version.LUCENE_4_10_4, new DelegatingAnalyzerWrapper(PER_FIELD_REUSE_STRATEGY) {
        @Override
        protected Analyzer getWrappedAnalyzer(String fieldName) {
            if (documentBuilder.isHashField(fieldName))
                return analyzerFactory.createHashAnalyzer();
            else
                return analyzerFactory.createContentAnalyzer();
        }
    });

    indexConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    indexConfig.setSimilarity(analyzerFactory.createSimilarity());

    this.indexWriter = new IndexWriter(this.indexDirectory, indexConfig);

    // Ensure index exists
    if (!DirectoryReader.indexExists(directory))
        this.indexWriter.commit();

    // Read channels status
    IndexSearcher searcher = this.getIndexSearcher();

    Query query = this.queryBuilder.getChannels(this.documentBuilder);
    TopDocs docs = searcher.search(query, 1);

    if (docs.scoreDocs.length > 0) {
        Document channelsDocument = searcher.doc(docs.scoreDocs[0].doc);
        this.channels = this.documentBuilder.asChannels(channelsDocument);
    } else {
        this.channels = new HashMap<>();
    }
}
 
Example 8
Source File: DependentTermQueryBuilderTest.java    From querqy with Apache License 2.0 4 votes vote down vote up
@Test
public void testCreateWeight() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);

    TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f2", "v1 v1", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();

    Term qTerm1 = new Term("f1", "v1");
    Term qTerm2 = new Term("f2", "v1");
    dfc.newClause();
    dfc.prepareTerm(qTerm1);
    dfc.prepareTerm(qTerm2);
    dfc.finishedUserQuery();

    DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm1, fieldBoost1);
    DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm2, fieldBoost2);


    TopDocs topDocs = indexSearcher.search(query2, 10);

    final Weight weight2 = query2.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight2.explain(indexReader.leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();
    assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field)
    assertTrue(explainText.contains("4 = docFreq")); // 4 * df of f1:v1
    assertTrue(explainText.contains("2.0 = freq")); // don't use tf

    indexReader.close();
    directory.close();
    analyzer.close();

}
 
Example 9
Source File: SimilarityTermQueryBuilderTest.java    From querqy with Apache License 2.0 2 votes vote down vote up
@Test
public void testCreateWeight() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);


    TestUtil.addNumDocsWithTextField("f1", "v1 v1", indexWriter, 4);
    TestUtil.addNumDocsWithTextField("f1", "v2", indexWriter, 1);

    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    Term term = new Term("f1", "v1");

    SimilarityTermQuery query = new SimilarityTermQueryBuilder().createTermQuery(term, fieldBoost2);

    TopDocs topDocs = indexSearcher.search(query, 10);

    final Weight weight = query.createWeight(indexSearcher, ScoreMode.COMPLETE, 4.5f);
    final Explanation explain = weight.explain(indexReader.getContext().leaves().get(0), topDocs.scoreDocs[0].doc);

    String explainText = explain.toString();

    assertTrue(explainText.contains("9.0 = boost")); // 4.5 (query) * 2.0 (field)
    assertTrue(explainText.contains("4 = docFreq")); // 4 * v1
    assertTrue(explainText.contains("2.0 = freq")); // 2 * v1 in field

    indexReader.close();
    directory.close();
    analyzer.close();

}
 
Example 10
Source File: DependentTermQueryBuilderTest.java    From querqy with Apache License 2.0 2 votes vote down vote up
@Test
public void testPostingsVsMaxScore() throws Exception {

    Analyzer analyzer = new StandardAnalyzer();

    Directory directory = new ByteBuffersDirectory();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setSimilarity(new ClassicSimilarity());
    IndexWriter indexWriter = new IndexWriter(directory, config);

    TestUtil.addNumDocsWithTextField("f1", "v1", indexWriter, 1);
    TestUtil.addNumDocsWithTextField("f2", "v1 v2", indexWriter, 1);


    indexWriter.close();

    IndexReader indexReader = DirectoryReader.open(directory);
    IndexSearcher indexSearcher = new IndexSearcher(indexReader);
    indexSearcher.setSimilarity(new ClassicSimilarity());


    DocumentFrequencyCorrection dfc = new DocumentFrequencyCorrection();

    Term qTerm1 = new Term("f2", "v1");
    Term qTerm2 = new Term("f2", "v2");
    dfc.newClause();
    dfc.prepareTerm(qTerm1);
    dfc.newClause();
    dfc.prepareTerm(qTerm2);
    dfc.finishedUserQuery();


    DependentTermQueryBuilder.DependentTermQuery query1 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm1, fieldBoost1);
    DependentTermQueryBuilder.DependentTermQuery query2 = new DependentTermQueryBuilder(dfc)
            .createTermQuery(qTerm2, fieldBoost2);


    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(query1, BooleanClause.Occur.SHOULD);
    builder.add(query2, BooleanClause.Occur.SHOULD);
    builder.setMinimumNumberShouldMatch(2);

    BooleanQuery bq = builder.build();

    // Query execution will call org.apache.lucene.search.Scorer.getMaxScore which might consume
    // the postingsEnum so that we don't get any hit
    TopDocs topDocs = indexSearcher.search(bq, 10);
    assertEquals(1, topDocs.scoreDocs.length);



}