Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setIndexSort()

The following examples show how to use org.apache.lucene.index.IndexWriterConfig#setIndexSort() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void testIndexSortDocValuesWithSingleValue(boolean reverse) throws IOException{
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG, reverse));
  iwc.setIndexSort(indexSort);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  writer.addDocument(createDocument("field", 42));

  DirectoryReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);

  assertEquals(1, searcher.count(createQuery("field", 42, 43)));
  assertEquals(1, searcher.count(createQuery("field", 42, 42)));
  assertEquals(0, searcher.count(createQuery("field", 41, 41)));
  assertEquals(0, searcher.count(createQuery("field", 43, 43)));

  writer.close();
  reader.close();
  dir.close();
}
 
Example 2
Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test that the index sort optimization is not activated when the sort is
 * on the wrong field.
 */
public void testIndexSortOnWrongField() throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("other-field", SortField.Type.LONG));
  iwc.setIndexSort(indexSort);

  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  writer.addDocument(createDocument("field", 0));

  testIndexSortOptimizationDeactivated(writer);

  writer.close();
  dir.close();
}
 
Example 3
Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Test that the index sort optimization is not activated when some documents
 * have multiple values.
 */
public void testMultiDocValues() throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG));
  iwc.setIndexSort(indexSort);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  Document doc = new Document();
  doc.add(new SortedNumericDocValuesField("field", 0));
  doc.add(new SortedNumericDocValuesField("field", 10));
  writer.addDocument(doc);

  testIndexSortOptimizationDeactivated(writer);

  writer.close();
  dir.close();
}
 
Example 4
Source File: test.java    From vscode-extension with MIT License 5 votes vote down vote up
private IndexWriterConfig getIndexWriterConfig() {
    final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
    iwc.setCommitOnClose(false); // we by default don't commit on close
    iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
    // with tests.verbose, lucene sets this up: plumb to align with filesystem stream
    boolean verbose = false;
    try {
        verbose = Boolean.parseBoolean(System.getProperty("tests.verbose"));
    } catch (Exception ignore) {
    }
    iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger));
    iwc.setMergeScheduler(mergeScheduler);
    // Give us the opportunity to upgrade old segments while performing
    // background merges
    MergePolicy mergePolicy = config().getMergePolicy();
    // always configure soft-deletes field so an engine with soft-deletes disabled can open a Lucene index with soft-deletes.
    iwc.setSoftDeletesField(Lucene.SOFT_DELETES_FIELD);
    if (softDeleteEnabled) {
        mergePolicy = new RecoverySourcePruneMergePolicy(SourceFieldMapper.RECOVERY_SOURCE_NAME, softDeletesPolicy::getRetentionQuery,
            new SoftDeletesRetentionMergePolicy(Lucene.SOFT_DELETES_FIELD, softDeletesPolicy::getRetentionQuery, mergePolicy));
    }
    iwc.setMergePolicy(new ElasticsearchMergePolicy(mergePolicy));
    iwc.setSimilarity(engineConfig.getSimilarity());
    iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().getMbFrac());
    iwc.setCodec(engineConfig.getCodec());
    iwc.setUseCompoundFile(true); // always use compound on flush - reduces # of file-handles on refresh
    if (config().getIndexSort() != null) {
        iwc.setIndexSort(config().getIndexSort());
    }
    return iwc;
}
 
Example 5
Source File: AnalyzingInfixSuggester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Override this to customize index settings, e.g. which
 *  codec to use. */
protected IndexWriterConfig getIndexWriterConfig(Analyzer indexAnalyzer, IndexWriterConfig.OpenMode openMode) {
  IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer);
  iwc.setOpenMode(openMode);

  // This way all merged segments will be sorted at
  // merge time, allow for per-segment early termination
  // when those segments are searched:
  iwc.setIndexSort(SORT);

  return iwc;
}
 
Example 6
Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testIndexSortMissingValues() throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  SortField sortField = new SortedNumericSortField("field", SortField.Type.LONG);
  sortField.setMissingValue(random().nextLong());
  iwc.setIndexSort(new Sort(sortField));
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  writer.addDocument(createDocument("field", -80));
  writer.addDocument(createDocument("field", -5));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 35));

  writer.addDocument(createDocument("other-field", 0));
  writer.addDocument(createDocument("other-field", 10));
  writer.addDocument(createDocument("other-field", 20));

  DirectoryReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);

  assertEquals(2, searcher.count(createQuery("field", -70, 0)));
  assertEquals(2, searcher.count(createQuery("field", -2, 35)));

  assertEquals(4, searcher.count(createQuery("field", -80, 35)));
  assertEquals(4, searcher.count(createQuery("field", Long.MIN_VALUE, Long.MAX_VALUE)));

  writer.close();
  reader.close();
  dir.close();
}
 
Example 7
Source File: SolrIndexConfig.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
  IndexSchema schema = core.getLatestSchema();
  IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
  if (maxBufferedDocs != -1)
    iwc.setMaxBufferedDocs(maxBufferedDocs);

  if (ramBufferSizeMB != -1)
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

  if (ramPerThreadHardLimitMB != -1) {
    iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB);
  }

  iwc.setSimilarity(schema.getSimilarity());
  MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema);
  iwc.setMergePolicy(mergePolicy);
  MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader());
  iwc.setMergeScheduler(mergeScheduler);
  iwc.setInfoStream(infoStream);

  if (mergePolicy instanceof SortingMergePolicy) {
    Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
    iwc.setIndexSort(indexSort);
  }

  iwc.setUseCompoundFile(useCompoundFile);

  if (mergedSegmentWarmerInfo != null) {
    // TODO: add infostream -> normal logging system (there is an issue somewhere)
    @SuppressWarnings({"rawtypes"})
    IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
                                                                      IndexReaderWarmer.class,
                                                                      null,
                                                                      new Class[] { InfoStream.class },
                                                                      new Object[] { iwc.getInfoStream() });
    iwc.setMergedSegmentWarmer(warmer);
  }

  return iwc;
}
 
Example 8
Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testSameHitsAsPointRangeQuery() throws IOException {
  final int iters = atLeast(10);
  for (int iter = 0; iter < iters; ++iter) {
    Directory dir = newDirectory();

    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
    boolean reverse = random().nextBoolean();
    SortField sortField = new SortedNumericSortField("dv", SortField.Type.LONG, reverse);
    sortField.setMissingValue(random().nextLong());
    iwc.setIndexSort(new Sort(sortField));

    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

    final int numDocs = atLeast(100);
    for (int i = 0; i < numDocs; ++i) {
      Document doc = new Document();
      final int numValues = TestUtil.nextInt(random(), 0, 1);
      for (int j = 0; j < numValues; ++j) {
        final long value = TestUtil.nextLong(random(), -100, 10000);
        doc.add(new SortedNumericDocValuesField("dv", value));
        doc.add(new LongPoint("idx", value));
      }
      iw.addDocument(doc);
    }
    if (random().nextBoolean()) {
      iw.deleteDocuments(LongPoint.newRangeQuery("idx", 0L, 10L));
    }
    final IndexReader reader = iw.getReader();
    final IndexSearcher searcher = newSearcher(reader, false);
    iw.close();

    for (int i = 0; i < 100; ++i) {
      final long min = random().nextBoolean() ? Long.MIN_VALUE : TestUtil.nextLong(random(), -100, 10000);
      final long max = random().nextBoolean() ? Long.MAX_VALUE : TestUtil.nextLong(random(), -100, 10000);
      final Query q1 = LongPoint.newRangeQuery("idx", min, max);
      final Query q2 = createQuery("dv", min, max);
      assertSameHits(searcher, q1, q2, false);
    }

    reader.close();
    dir.close();
  }
}
 
Example 9
Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testIndexSortDocValuesWithEvenLength(boolean reverse) throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG, reverse));
  iwc.setIndexSort(indexSort);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  writer.addDocument(createDocument("field", -80));
  writer.addDocument(createDocument("field", -5));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 30));
  writer.addDocument(createDocument("field", 35));

  DirectoryReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);

  // Test ranges consisting of one value.
  assertEquals(1, searcher.count(createQuery("field", -80, -80)));
  assertEquals(1, searcher.count(createQuery("field", -5, -5)));
  assertEquals(2, searcher.count(createQuery("field", 0, 0)));
  assertEquals(1, searcher.count(createQuery("field", 30, 30)));
  assertEquals(1, searcher.count(createQuery("field", 35, 35)));

  assertEquals(0, searcher.count(createQuery("field", -90, -90)));
  assertEquals(0, searcher.count(createQuery("field", 5, 5)));
  assertEquals(0, searcher.count(createQuery("field", 40, 40)));

  // Test the lower end of the document value range.
  assertEquals(2, searcher.count(createQuery("field", -90, -4)));
  assertEquals(2, searcher.count(createQuery("field", -80, -4)));
  assertEquals(1, searcher.count(createQuery("field", -70, -4)));
  assertEquals(2, searcher.count(createQuery("field", -80, -5)));

  // Test the upper end of the document value range.
  assertEquals(1, searcher.count(createQuery("field", 25, 34)));
  assertEquals(2, searcher.count(createQuery("field", 25, 35)));
  assertEquals(2, searcher.count(createQuery("field", 25, 36)));
  assertEquals(2, searcher.count(createQuery("field", 30, 35)));

  // Test multiple occurrences of the same value.
  assertEquals(2, searcher.count(createQuery("field", -4, 4)));
  assertEquals(2, searcher.count(createQuery("field", -4, 0)));
  assertEquals(2, searcher.count(createQuery("field", 0, 4)));
  assertEquals(3, searcher.count(createQuery("field", 0, 30)));

  // Test ranges that span all documents.
  assertEquals(6, searcher.count(createQuery("field", -80, 35)));
  assertEquals(6, searcher.count(createQuery("field", -90, 40)));

  writer.close();
  reader.close();
  dir.close();
}
 
Example 10
Source File: TestIndexSortSortedNumericDocValuesRangeQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testIndexSortDocValuesWithOddLength(boolean reverse) throws Exception {
  Directory dir = newDirectory();

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortedNumericSortField("field", SortField.Type.LONG, reverse));
  iwc.setIndexSort(indexSort);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  writer.addDocument(createDocument("field", -80));
  writer.addDocument(createDocument("field", -5));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 0));
  writer.addDocument(createDocument("field", 5));
  writer.addDocument(createDocument("field", 30));
  writer.addDocument(createDocument("field", 35));

  DirectoryReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);

  // Test ranges consisting of one value.
  assertEquals(1, searcher.count(createQuery("field", -80, -80)));
  assertEquals(1, searcher.count(createQuery("field", -5, -5)));
  assertEquals(2, searcher.count(createQuery("field", 0, 0)));
  assertEquals(1, searcher.count(createQuery("field", 5, 5)));
  assertEquals(1, searcher.count(createQuery("field", 30, 30)));
  assertEquals(1, searcher.count(createQuery("field", 35, 35)));

  assertEquals(0, searcher.count(createQuery("field", -90, -90)));
  assertEquals(0, searcher.count(createQuery("field", 6, 6)));
  assertEquals(0, searcher.count(createQuery("field", 40, 40)));

  // Test the lower end of the document value range.
  assertEquals(2, searcher.count(createQuery("field", -90, -4)));
  assertEquals(2, searcher.count(createQuery("field", -80, -4)));
  assertEquals(1, searcher.count(createQuery("field", -70, -4)));
  assertEquals(2, searcher.count(createQuery("field", -80, -5)));

  // Test the upper end of the document value range.
  assertEquals(1, searcher.count(createQuery("field", 25, 34)));
  assertEquals(2, searcher.count(createQuery("field", 25, 35)));
  assertEquals(2, searcher.count(createQuery("field", 25, 36)));
  assertEquals(2, searcher.count(createQuery("field", 30, 35)));

  // Test multiple occurrences of the same value.
  assertEquals(2, searcher.count(createQuery("field", -4, 4)));
  assertEquals(2, searcher.count(createQuery("field", -4, 0)));
  assertEquals(2, searcher.count(createQuery("field", 0, 4)));
  assertEquals(4, searcher.count(createQuery("field", 0, 30)));

  // Test ranges that span all documents.
  assertEquals(7, searcher.count(createQuery("field", -80, 35)));
  assertEquals(7, searcher.count(createQuery("field", -90, 40)));

  writer.close();
  reader.close();
  dir.close();
}
 
Example 11
Source File: TestTopFieldCollectorEarlyTermination.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void createRandomIndex(boolean singleSortedSegment) throws IOException {
  dir = newDirectory();
  numDocs = atLeast(150);
  final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
  Set<String> randomTerms = new HashSet<>();
  while (randomTerms.size() < numTerms) {
    randomTerms.add(TestUtil.randomSimpleString(random()));
  }
  terms = new ArrayList<>(randomTerms);
  final long seed = random().nextLong();
  final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
  if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
    // MockRandomMP randomly wraps the leaf readers which makes merging angry
    iwc.setMergePolicy(newTieredMergePolicy());
  }
  iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
  iwc.setIndexSort(sort);
  iw = new RandomIndexWriter(new Random(seed), dir, iwc);
  iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
  for (int i = 0; i < numDocs; ++i) {
    final Document doc = randomDocument();
    iw.addDocument(doc);
    if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
      iw.commit();
    }
    if (random().nextInt(15) == 0) {
      final String term = RandomPicks.randomFrom(random(), terms);
      iw.deleteDocuments(new Term("s", term));
    }
  }
  if (singleSortedSegment) {
    iw.forceMerge(1);
  }
  else if (random().nextBoolean()) {
    iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT);
  }
  reader = iw.getReader();
  if (reader.numDocs() == 0) {
    iw.addDocument(new Document());
    reader.close();
    reader = iw.getReader();
  }
}