Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setMaxBufferedDocs()

The following examples show how to use org.apache.lucene.index.IndexWriterConfig#setMaxBufferedDocs() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: BaseShapeTestCase.java From lucene-solr with Apache License 2.0

6 votes

private void verify(Object... shapes) throws Exception {
  IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setMergeScheduler(new SerialMergeScheduler());
  int mbd = iwc.getMaxBufferedDocs();
  if (mbd != -1 && mbd < shapes.length / 100) {
    iwc.setMaxBufferedDocs(shapes.length / 100);
  }
  Directory dir;
  if (shapes.length > 1000) {
    dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
  } else {
    dir = newDirectory();
  }
  IndexWriter w = new IndexWriter(dir, iwc);

  // index random polygons
  indexRandomShapes(w, shapes);

  // query testing
  final IndexReader reader = DirectoryReader.open(w);
  // test random bbox queries
  verifyRandomQueries(reader, shapes);
  IOUtils.close(w, reader, dir);
}

Example 2

Source File: FunctionTestSetup.java From lucene-solr with Apache License 2.0

5 votes

protected static void createIndex(boolean doMultiSegment) throws Exception {
  if (VERBOSE) {
    System.out.println("TEST: setUp");
  }
  // prepare a small index with just a few documents.
  dir = newDirectory();
  anlzr = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(anlzr).setMergePolicy(newLogMergePolicy());
  if (doMultiSegment) {
    iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 7));
  }
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
  // add docs not exactly in natural ID order, to verify we do check the order of docs by scores
  int remaining = N_DOCS;
  boolean done[] = new boolean[N_DOCS];
  int i = 0;
  while (remaining > 0) {
    if (done[i]) {
      throw new Exception("to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!");
    }
    addDoc(iw, i);
    done[i] = true;
    i = (i + 4) % N_DOCS;
    remaining --;
  }
  if (!doMultiSegment) {
    if (VERBOSE) {
      System.out.println("TEST: setUp full merge");
    }
    iw.forceMerge(1);
  }
  iw.close();
  if (VERBOSE) {
    System.out.println("TEST: setUp done close");
  }
}

Example 3

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

5 votes

private IndexWriter createFastIndexWriter(Directory dir, int maxBufferedDocs) throws IOException {
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(maxBufferedDocs);
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  return new IndexWriter(dir, conf);
}

Example 4

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

5 votes

private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  Document doc = new Document();
  Field storedField = newStringField("stored", "", Field.Store.YES);
  Field dvField = new NumericDocValuesField("dv", 0);
  doc.add(storedField);
  doc.add(dvField);

  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier longs = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    if (random().nextDouble() > density) {
      writer.addDocument(new Document());
      continue;
    }
    long value = longs.getAsLong();
    storedField.setStringValue(Long.toString(value));
    dvField.setLongValue(value);
    writer.addDocument(doc);
  }

  writer.forceMerge(1);

  writer.close();
  
  // compare
  assertDVIterate(dir);
  assertDVAdvance(dir, 1); // Tests all jump-lengths from 1 to maxDoc (quite slow ~= 1 minute for 200K docs)

  dir.close();
}

Example 5

Source File: SolrIndexConfig.java From lucene-solr with Apache License 2.0

5 votes

public IndexWriterConfig toIndexWriterConfig(SolrCore core) throws IOException {
  IndexSchema schema = core.getLatestSchema();
  IndexWriterConfig iwc = new IndexWriterConfig(new DelayedSchemaAnalyzer(core));
  if (maxBufferedDocs != -1)
    iwc.setMaxBufferedDocs(maxBufferedDocs);

  if (ramBufferSizeMB != -1)
    iwc.setRAMBufferSizeMB(ramBufferSizeMB);

  if (ramPerThreadHardLimitMB != -1) {
    iwc.setRAMPerThreadHardLimitMB(ramPerThreadHardLimitMB);
  }

  iwc.setSimilarity(schema.getSimilarity());
  MergePolicy mergePolicy = buildMergePolicy(core.getResourceLoader(), schema);
  iwc.setMergePolicy(mergePolicy);
  MergeScheduler mergeScheduler = buildMergeScheduler(core.getResourceLoader());
  iwc.setMergeScheduler(mergeScheduler);
  iwc.setInfoStream(infoStream);

  if (mergePolicy instanceof SortingMergePolicy) {
    Sort indexSort = ((SortingMergePolicy) mergePolicy).getSort();
    iwc.setIndexSort(indexSort);
  }

  iwc.setUseCompoundFile(useCompoundFile);

  if (mergedSegmentWarmerInfo != null) {
    // TODO: add infostream -> normal logging system (there is an issue somewhere)
    @SuppressWarnings({"rawtypes"})
    IndexReaderWarmer warmer = core.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className,
                                                                      IndexReaderWarmer.class,
                                                                      null,
                                                                      new Class[] { InfoStream.class },
                                                                      new Object[] { iwc.getInfoStream() });
    iwc.setMergedSegmentWarmer(warmer);
  }

  return iwc;
}

Example 6

Source File: TestOrdValues.java From lucene-solr with Apache License 2.0

5 votes

protected static void createIndex(boolean doMultiSegment) throws Exception {
  if (VERBOSE) {
    System.out.println("TEST: setUp");
  }
  // prepare a small index with just a few documents.
  dir = newDirectory();
  anlzr = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(anlzr).setMergePolicy(newLogMergePolicy());
  if (doMultiSegment) {
    iwc.setMaxBufferedDocs(TestUtil.nextInt(random(), 2, 7));
  }
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
  // add docs not exactly in natural ID order, to verify we do check the order of docs by scores
  int remaining = N_DOCS;
  boolean done[] = new boolean[N_DOCS];
  int i = 0;
  while (remaining > 0) {
    if (done[i]) {
      throw new Exception("to set this test correctly N_DOCS=" + N_DOCS + " must be primary and greater than 2!");
    }
    addDoc(iw, i);
    done[i] = true;
    i = (i + 4) % N_DOCS;
    remaining --;
  }
  if (!doMultiSegment) {
    if (VERBOSE) {
      System.out.println("TEST: setUp full merge");
    }
    iw.forceMerge(1);
  }
  iw.close();
  if (VERBOSE) {
    System.out.println("TEST: setUp done close");
  }
}

Example 7

Source File: TestSearcherTaxonomyManager.java From lucene-solr with Apache License 2.0

4 votes

public void testNRT() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  // Don't allow tiny maxBufferedDocs; it can make this
  // test too slow:
  iwc.setMaxBufferedDocs(Math.max(500, iwc.getMaxBufferedDocs()));

  // MockRandom/AlcololicMergePolicy are too slow:
  TieredMergePolicy tmp = new TieredMergePolicy();
  tmp.setFloorSegmentMB(.001);
  iwc.setMergePolicy(tmp);
  final IndexWriter w = new IndexWriter(dir, iwc);
  final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
  final FacetsConfig config = new FacetsConfig();
  config.setMultiValued("field", true);
  final AtomicBoolean stop = new AtomicBoolean();

  // How many unique facets to index before stopping:
  final int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

  Thread indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

  final SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(w, true, null, tw);

  Thread reopener = new Thread() {
      @Override
      public void run() {
        while(!stop.get()) {
          try {
            // Sleep for up to 20 msec:
            Thread.sleep(random().nextInt(20));

            if (VERBOSE) {
              System.out.println("TEST: reopen");
            }

            mgr.maybeRefresh();

            if (VERBOSE) {
              System.out.println("TEST: reopen done");
            }
          } catch (Exception ioe) {
            throw new RuntimeException(ioe);
          }
        }
      }
    };

  reopener.setName("reopener");
  reopener.start();

  indexer.setName("indexer");
  indexer.start();

  try {
    while (!stop.get()) {
      SearcherAndTaxonomy pair = mgr.acquire();
      try {
        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
        FacetsCollector sfc = new FacetsCollector();
        pair.searcher.search(new MatchAllDocsQuery(), sfc);
        Facets facets = getTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
        FacetResult result = facets.getTopChildren(10, "field");
        if (pair.searcher.getIndexReader().numDocs() > 0) { 
          //System.out.println(pair.taxonomyReader.getSize());
          assertTrue(result.childCount > 0);
          assertTrue(result.labelValues.length > 0);
        }

        //if (VERBOSE) {
        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
        //}
      } finally {
        mgr.release(pair);
      }
    }
  } finally {
    indexer.join();
    reopener.join();
  }

  if (VERBOSE) {
    System.out.println("TEST: now stop");
  }

  w.close();
  IOUtils.close(mgr, tw, taxoDir, dir);
}

Example 8

Source File: TestCompressingStoredFieldsFormat.java From lucene-solr with Apache License 2.0

4 votes

public void testDeletePartiallyWrittenFilesIfAbort() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig iwConf = newIndexWriterConfig(new MockAnalyzer(random()));
  iwConf.setMaxBufferedDocs(RandomNumbers.randomIntBetween(random(), 2, 30));
  iwConf.setCodec(getCodec());
  // disable CFS because this test checks file names
  iwConf.setMergePolicy(newLogMergePolicy(false));
  iwConf.setUseCompoundFile(false);

  // Cannot use RIW because this test wants CFS to stay off:
  IndexWriter iw = new IndexWriter(dir, iwConf);

  final Document validDoc = new Document();
  validDoc.add(new IntPoint("id", 0));
  validDoc.add(new StoredField("id", 0));
  iw.addDocument(validDoc);
  iw.commit();
  
  // make sure that #writeField will fail to trigger an abort
  final Document invalidDoc = new Document();
  FieldType fieldType = new FieldType();
  fieldType.setStored(true);
  invalidDoc.add(new Field("invalid", fieldType) {
    
    @Override
    public String stringValue() {
      // TODO: really bad & scary that this causes IW to
      // abort the segment!!  We should fix this.
      return null;
    }
    
  });
  
  try {
    iw.addDocument(invalidDoc);
    iw.commit();
  } catch(IllegalArgumentException iae) {
    // expected
    assertEquals(iae, iw.getTragicException());
  }
  // Writer should be closed by tragedy
  assertFalse(iw.isOpen());
  dir.close();
}

Example 9

Source File: TestLucene80DocValuesFormat.java From lucene-solr with Apache License 2.0

4 votes

private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  
  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier values = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    
    int valueCount = (int) counts.getAsLong();
    long valueArray[] = new long[valueCount];
    for (int j = 0; j < valueCount; j++) {
      long value = values.getAsLong();
      valueArray[j] = value;
      doc.add(new SortedNumericDocValuesField("dv", value));
    }
    Arrays.sort(valueArray);
    for (int j = 0; j < valueCount; j++) {
      doc.add(new StoredField("stored", Long.toString(valueArray[j])));
    }
    writer.addDocument(doc);
    if (random().nextInt(31) == 0) {
      writer.commit();
    }
  }
  writer.forceMerge(1);

  writer.close();
  
  // compare
  DirectoryReader ir = DirectoryReader.open(dir);
  TestUtil.checkReader(ir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();
    SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
    for (int i = 0; i < r.maxDoc(); i++) {
      if (i > docValues.docID()) {
        docValues.nextDoc();
      }
      String expected[] = r.document(i).getValues("stored");
      if (i < docValues.docID()) {
        assertEquals(0, expected.length);
      } else {
        String actual[] = new String[docValues.docValueCount()];
        for (int j = 0; j < actual.length; j++) {
          actual[j] = Long.toString(docValues.nextValue());
        }
        assertArrayEquals(expected, actual);
      }
    }
  }
  ir.close();
  dir.close();
}

Example 10

Source File: TestPerFieldPostingsFormat2.java From lucene-solr with Apache License 2.0

4 votes

@Test
public void testChangeCodecAndMerge() throws IOException {
  Directory dir = newDirectory();
  if (VERBOSE) {
    System.out.println("TEST: make new index");
  }
  IndexWriterConfig iwconf = newIndexWriterConfig(new MockAnalyzer(random()))
                               .setOpenMode(OpenMode.CREATE).setCodec(new MockCodec());
  iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
  //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
  IndexWriter writer = newWriter(dir, iwconf);

  addDocs(writer, 10);
  writer.commit();
  assertQuery(new Term("content", "aaa"), dir, 10);
  if (VERBOSE) {
    System.out.println("TEST: addDocs3");
  }
  addDocs3(writer, 10);
  writer.commit();
  writer.close();

  assertQuery(new Term("content", "ccc"), dir, 10);
  assertQuery(new Term("content", "aaa"), dir, 10);
  Codec codec = iwconf.getCodec();

  iwconf = newIndexWriterConfig(new MockAnalyzer(random()))
      .setOpenMode(OpenMode.APPEND).setCodec(codec);
  //((LogMergePolicy) iwconf.getMergePolicy()).setNoCFSRatio(0.0);
  //((LogMergePolicy) iwconf.getMergePolicy()).setMergeFactor(10);
  iwconf.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);

  iwconf.setCodec(new MockCodec()); // uses standard for field content
  writer = newWriter(dir, iwconf);
  // swap in new codec for currently written segments
  if (VERBOSE) {
    System.out.println("TEST: add docs w/ Standard codec for content field");
  }
  addDocs2(writer, 10);
  writer.commit();
  codec = iwconf.getCodec();
  assertEquals(30, writer.getDocStats().maxDoc);
  assertQuery(new Term("content", "bbb"), dir, 10);
  assertQuery(new Term("content", "ccc"), dir, 10);   ////
  assertQuery(new Term("content", "aaa"), dir, 10);

  if (VERBOSE) {
    System.out.println("TEST: add more docs w/ new codec");
  }
  addDocs2(writer, 10);
  writer.commit();
  assertQuery(new Term("content", "ccc"), dir, 10);
  assertQuery(new Term("content", "bbb"), dir, 20);
  assertQuery(new Term("content", "aaa"), dir, 10);
  assertEquals(40, writer.getDocStats().maxDoc);

  if (VERBOSE) {
    System.out.println("TEST: now optimize");
  }
  writer.forceMerge(1);
  assertEquals(40, writer.getDocStats().maxDoc);
  writer.close();
  assertQuery(new Term("content", "ccc"), dir, 10);
  assertQuery(new Term("content", "bbb"), dir, 20);
  assertQuery(new Term("content", "aaa"), dir, 10);

  dir.close();
}

Example 11

Source File: TestLatLonShape.java From lucene-solr with Apache License 2.0

4 votes

/** test we can search for a point with a large number of vertices*/
public void testLargeVertexPolygon() throws Exception {
  int numVertices = TEST_NIGHTLY ? TestUtil.nextInt(random(), 200000, 500000) : TestUtil.nextInt(random(), 20000, 50000);
  IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setMergeScheduler(new SerialMergeScheduler());
  int mbd = iwc.getMaxBufferedDocs();
  if (mbd != -1 && mbd < numVertices/100) {
    iwc.setMaxBufferedDocs(numVertices/100);
  }
  Directory dir = newFSDirectory(createTempDir(getClass().getSimpleName()));
  IndexWriter writer = new IndexWriter(dir, iwc);

  // add a random polygon without a hole
  Polygon p = GeoTestUtil.createRegularPolygon(0, 90, atLeast(1000000), numVertices);
  Document document = new Document();
  addPolygonsToDoc(FIELDNAME, document, p);
  writer.addDocument(document);

  // add a random polygon with a hole
  Polygon inner = new Polygon(new double[] {-1d, -1d, 1d, 1d, -1d},
      new double[] {-91d, -89d, -89d, -91.0, -91.0});
  Polygon outer = GeoTestUtil.createRegularPolygon(0, -90, atLeast(1000000), numVertices);

  document = new Document();
  addPolygonsToDoc(FIELDNAME, document, new Polygon(outer.getPolyLats(), outer.getPolyLons(), inner));
  writer.addDocument(document);

  ////// search /////
  // search an intersecting bbox
  IndexReader reader = DirectoryReader.open(writer);
  writer.close();
  IndexSearcher searcher = newSearcher(reader);
  Query q = newRectQuery(FIELDNAME, -1d, 1d, p.minLon, p.maxLon);
  assertEquals(1, searcher.count(q));

  // search a disjoint bbox
  q = newRectQuery(FIELDNAME, p.minLat-1d, p.minLat+1, p.minLon-1d, p.minLon+1d);
  assertEquals(0, searcher.count(q));

  // search a bbox in the hole
  q = newRectQuery(FIELDNAME, inner.minLat + 1e-6, inner.maxLat - 1e-6, inner.minLon + 1e-6, inner.maxLon - 1e-6);
  assertEquals(0, searcher.count(q));

  IOUtils.close(reader, dir);
}