Java Code Examples for org.apache.lucene.index.LogMergePolicy

The following examples show how to use org.apache.lucene.index.LogMergePolicy. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: SimplePrimaryNode.java    License: Apache License 2.0 6 votes vote down vote up
private static IndexWriter initWriter(int id, Random random, Path indexPath, boolean doCheckIndexOnClose) throws IOException {
  Directory dir = SimpleReplicaNode.getDirectory(random, id, indexPath, doCheckIndexOnClose);

  MockAnalyzer analyzer = new MockAnalyzer(random);
  analyzer.setMaxTokenLength(TestUtil.nextInt(random, 1, IndexWriter.MAX_TERM_LENGTH));
  IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(random, analyzer);

  MergePolicy mp = iwc.getMergePolicy();
  //iwc.setInfoStream(new PrintStreamInfoStream(System.out));

  // Force more frequent merging so we stress merge warming:
  if (mp instanceof TieredMergePolicy) {
    TieredMergePolicy tmp = (TieredMergePolicy) mp;
    tmp.setSegmentsPerTier(3);
    tmp.setMaxMergeAtOnce(3);
  } else if (mp instanceof LogMergePolicy) {
    LogMergePolicy lmp = (LogMergePolicy) mp;
    lmp.setMergeFactor(3);
  }

  IndexWriter writer = new IndexWriter(dir, iwc);

  TestUtil.reduceOpenFiles(writer);
  return writer;
}
 
Example 2
Source Project: lucene-solr   Source File: TestUtil.java    License: Apache License 2.0 6 votes vote down vote up
/** just tries to configure things to keep the open file
 * count lowish */
public static void reduceOpenFiles(IndexWriter w) {
  // keep number of open files lowish
  MergePolicy mp = w.getConfig().getMergePolicy();
  mp.setNoCFSRatio(1.0);
  if (mp instanceof LogMergePolicy) {
    LogMergePolicy lmp = (LogMergePolicy) mp;
    lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
  } else if (mp instanceof TieredMergePolicy) {
    TieredMergePolicy tmp = (TieredMergePolicy) mp;
    tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
    tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier()));
  }
  MergeScheduler ms = w.getConfig().getMergeScheduler();
  if (ms instanceof ConcurrentMergeScheduler) {
    // wtf... shouldnt it be even lower since it's 1 by default?!?!
    ((ConcurrentMergeScheduler) ms).setMaxMergesAndThreads(3, 2);
  }
}
 
Example 3
Source Project: lucene-solr   Source File: TestMergePolicyConfig.java    License: Apache License 2.0 6 votes vote down vote up
private void implTestLogMergePolicyConfig(String solrConfigFileName,
    Class<? extends LogMergePolicy> mpClass) throws Exception {

  initCore(solrConfigFileName, "schema-minimal.xml");
  IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());

  // verify some props set to -1 get lucene internal defaults
  assertEquals(-1, solrConfig.indexConfig.maxBufferedDocs);
  assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, 
               iwc.getMaxBufferedDocs());
  assertEquals(-1, solrConfig.indexConfig.ramBufferSizeMB, 0.0D);
  assertEquals(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB, 
               iwc.getRAMBufferSizeMB(), 0.0D);


  LogMergePolicy logMP = assertAndCast(mpClass, iwc.getMergePolicy());

  assertEquals(11, logMP.getMergeFactor());
  assertEquals(456, logMP.getMaxMergeDocs());
}
 
Example 4
Source Project: everywhere   Source File: IndexUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new SmartChineseAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setMergeFactor(50);
    mergePolicy.setMaxMergeDocs(5000);
    if (create){
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    return new IndexWriter(dir, iwc);
}
 
Example 5
Source Project: lucene-solr   Source File: TestMergePolicyConfig.java    License: Apache License 2.0 5 votes vote down vote up
public void testLogMergePolicyFactoryConfig() throws Exception {

    final boolean byteSizeMP = random().nextBoolean();
    final Class<? extends LogMergePolicy> mpClass = byteSizeMP
        ? LogByteSizeMergePolicy.class : LogDocMergePolicy.class;
    final Class<? extends MergePolicyFactory> mpfClass = byteSizeMP
        ? LogByteSizeMergePolicyFactory.class : LogDocMergePolicyFactory.class;

    System.setProperty("solr.test.log.merge.policy.factory", mpfClass.getName());

    implTestLogMergePolicyConfig("solrconfig-logmergepolicyfactory.xml", mpClass);
  }
 
Example 6
Source Project: lucene-solr   Source File: TestPerfTasksLogic.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Test that IndexWriter settings stick.
 */
public void testIndexWriterSettings() throws Exception {
  // 1. alg definition (required in every "logic" test)
  String algLines[] = {
      "# ----- properties ",
      "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
      "docs.file=" + getReuters20LinesFile(),
      "content.source.log.step=3",
      "ram.flush.mb=-1",
      "max.buffered=2",
      "compound=cmpnd:true:false",
      "doc.term.vector=vector:false:true",
      "content.source.forever=false",
      "directory=ByteBuffersDirectory",
      "doc.stored=false",
      "merge.factor=3",
      "doc.tokenized=false",
      "debug.level=1",
      "# ----- alg ",
      "{ \"Rounds\"",
      "  ResetSystemErase",
      "  CreateIndex",
      "  { \"AddDocs\"  AddDoc > : * ",
      "  NewRound",
      "} : 2",
  };

  // 2. execute the algorithm  (required in every "logic" test)
  Benchmark benchmark = execBenchmark(algLines);
  final IndexWriter writer = benchmark.getRunData().getIndexWriter();
  assertEquals(2, writer.getConfig().getMaxBufferedDocs());
  assertEquals(IndexWriterConfig.DISABLE_AUTO_FLUSH, (int) writer.getConfig().getRAMBufferSizeMB());
  assertEquals(3, ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor());
  assertEquals(0.0d, writer.getConfig().getMergePolicy().getNoCFSRatio(), 0.0);
  writer.close();
  Directory dir = benchmark.getRunData().getDirectory();
  IndexReader reader = DirectoryReader.open(dir);
  Fields tfv = reader.getTermVectors(0);
  assertNotNull(tfv);
  assertTrue(tfv.size() > 0);
  reader.close();
}
 
Example 7
Source Project: lucene-solr   Source File: TestDirectoryTaxonomyReader.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testOpenIfChangedManySegments() throws Exception {
  // test openIfChanged() when the taxonomy contains many segments
  Directory dir = newDirectory();
  
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
    @Override
    protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
      IndexWriterConfig conf = super.createIndexWriterConfig(openMode);
      LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
      lmp.setMergeFactor(2);
      return conf;
    }
  };
  TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
  
  int numRounds = random().nextInt(10) + 10;
  int numCategories = 1; // one for root
  for (int i = 0; i < numRounds; i++) {
    int numCats = random().nextInt(4) + 1;
    for (int j = 0; j < numCats; j++) {
      writer.addCategory(new FacetLabel(Integer.toString(i), Integer.toString(j)));
    }
    numCategories += numCats + 1 /* one for round-parent */;
    TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
    assertNotNull(newtr);
    reader.close();
    reader = newtr;
    
    // assert categories
    assertEquals(numCategories, reader.getSize());
    int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
    int[] parents = reader.getParallelTaxonomyArrays().parents();
    assertEquals(0, parents[roundOrdinal]); // round's parent is root
    for (int j = 0; j < numCats; j++) {
      int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
      assertEquals(roundOrdinal, parents[ord]); // round's parent is root
    }
  }
  
  reader.close();
  writer.close();
  dir.close();
}
 
Example 8
Source Project: lucene-solr   Source File: TestMergeSchedulerExternal.java    License: Apache License 2.0 4 votes vote down vote up
public void testSubclassConcurrentMergeScheduler() throws IOException {
  MockDirectoryWrapper dir = newMockDirectory();
  dir.failOn(new FailOnlyOnMerge());

  Document doc = new Document();
  Field idField = newStringField("id", "", Field.Store.YES);
  doc.add(idField);

  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()))
    .setMergeScheduler(new MyMergeScheduler())
    .setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
    .setMergePolicy(newLogMergePolicy());

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  infoStream = new PrintStreamInfoStream(new PrintStream(baos, true, IOUtils.UTF_8));
  iwc.setInfoStream(infoStream);

  IndexWriter writer = new IndexWriter(dir, iwc);
  LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy();
  logMP.setMergeFactor(10);
  for(int i=0;i<20;i++) {
    writer.addDocument(doc);
  }

  try {
    ((MyMergeScheduler) writer.getConfig().getMergeScheduler()).sync();
  } catch (IllegalStateException ise) {
    // OK
  }
  writer.rollback();

  try {
    assertTrue(mergeThreadCreated);
    assertTrue(mergeCalled);
    assertTrue(excCalled);
  } catch (AssertionError ae) {
    System.out.println("TEST FAILED; IW infoStream output:");
    System.out.println(baos.toString(IOUtils.UTF_8));
    throw ae;
  }
  dir.close();
}
 
Example 9
Source Project: lucene-solr   Source File: FileBasedSpellChecker.java    License: Apache License 2.0 4 votes vote down vote up
private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
  try {
    IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
    // Get the field's analyzer
    if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
      FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
      // Do index-time analysis using the given fieldType's analyzer
      Directory ramDir = new ByteBuffersDirectory();

      LogMergePolicy mp = new LogByteSizeMergePolicy();
      mp.setMergeFactor(300);

      IndexWriter writer = new IndexWriter(
          ramDir,
          new IndexWriterConfig(fieldType.getIndexAnalyzer()).
              setMaxBufferedDocs(150).
              setMergePolicy(mp).
              setOpenMode(IndexWriterConfig.OpenMode.CREATE)
              // TODO: if we enable this, codec gets angry since field won't exist in the schema
              // .setCodec(core.getCodec())
      );

      List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);

      for (String s : lines) {
        Document d = new Document();
        d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
        writer.addDocument(d);
      }
      writer.forceMerge(1);
      writer.close();

      dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir),
              WORD_FIELD_NAME, 0.0f);
    } else {
      // check if character encoding is defined
      if (characterEncoding == null) {
        dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
      } else {
        dictionary = new PlainTextDictionary(new InputStreamReader(core.getResourceLoader().openResource(sourceLocation), characterEncoding));
      }
    }


  } catch (IOException e) {
    log.error( "Unable to load spellings", e);
  }
}
 
Example 10
Source Project: examples   Source File: TreeMergeOutputFormat.java    License: Apache License 2.0 4 votes vote down vote up
@Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
      writeShardNumberFile(context);      
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
        
        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
            .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;
          
        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
//        writerConfig.setRAMBufferSizeMB(100); // improve performance
//        writerConfig.setMaxThreadStates(1);
        
        // disable compound file to improve performance
        // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);          
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);       
//          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);
        
        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
        
        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        long start = System.nanoTime();
        
        writer.addIndexes(indexes); 
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) 
        // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746
        
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        float secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Logical merge took {} secs", secs);        
        int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        start = System.nanoTime();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments); 
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data 
          // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
        
        start = System.nanoTime();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }