Java Code Examples for org.apache.lucene.index.TieredMergePolicy

The following examples show how to use org.apache.lucene.index.TieredMergePolicy. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: linden   Source File: SortingMergePolicyFactory.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public MergePolicy getInstance(Map<String, String> params) throws IOException {
  String field = params.get(SORT_FIELD);
  SortField.Type sortFieldType = SortField.Type.DOC;
  if (params.containsKey(SORT_FIELD_TYPE)) {
    sortFieldType = SortField.Type.valueOf(params.get(SORT_FIELD_TYPE).toUpperCase());
  }

  if (sortFieldType == SortField.Type.DOC) {
    throw new IOException(
        "Relying on internal lucene DocIDs is not guaranteed to work, this is only an implementation detail.");
  }

  boolean desc = true;
  if (params.containsKey(SORT_DESC)) {
    try {
      desc = Boolean.valueOf(params.get(SORT_DESC));
    } catch (Exception e) {
      desc = true;
    }
  }
  SortField sortField = new SortField(field, sortFieldType, desc);
  Sort sort = new Sort(sortField);
  return new SortingMergePolicyDecorator(new TieredMergePolicy(), sort);
}
 
Example 2
Source Project: lucene-solr   Source File: SimplePrimaryNode.java    License: Apache License 2.0 6 votes vote down vote up
private static IndexWriter initWriter(int id, Random random, Path indexPath, boolean doCheckIndexOnClose) throws IOException {
  Directory dir = SimpleReplicaNode.getDirectory(random, id, indexPath, doCheckIndexOnClose);

  MockAnalyzer analyzer = new MockAnalyzer(random);
  analyzer.setMaxTokenLength(TestUtil.nextInt(random, 1, IndexWriter.MAX_TERM_LENGTH));
  IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(random, analyzer);

  MergePolicy mp = iwc.getMergePolicy();
  //iwc.setInfoStream(new PrintStreamInfoStream(System.out));

  // Force more frequent merging so we stress merge warming:
  if (mp instanceof TieredMergePolicy) {
    TieredMergePolicy tmp = (TieredMergePolicy) mp;
    tmp.setSegmentsPerTier(3);
    tmp.setMaxMergeAtOnce(3);
  } else if (mp instanceof LogMergePolicy) {
    LogMergePolicy lmp = (LogMergePolicy) mp;
    lmp.setMergeFactor(3);
  }

  IndexWriter writer = new IndexWriter(dir, iwc);

  TestUtil.reduceOpenFiles(writer);
  return writer;
}
 
Example 3
Source Project: lucene-solr   Source File: TestUtil.java    License: Apache License 2.0 6 votes vote down vote up
/** just tries to configure things to keep the open file
 * count lowish */
public static void reduceOpenFiles(IndexWriter w) {
  // keep number of open files lowish
  MergePolicy mp = w.getConfig().getMergePolicy();
  mp.setNoCFSRatio(1.0);
  if (mp instanceof LogMergePolicy) {
    LogMergePolicy lmp = (LogMergePolicy) mp;
    lmp.setMergeFactor(Math.min(5, lmp.getMergeFactor()));
  } else if (mp instanceof TieredMergePolicy) {
    TieredMergePolicy tmp = (TieredMergePolicy) mp;
    tmp.setMaxMergeAtOnce(Math.min(5, tmp.getMaxMergeAtOnce()));
    tmp.setSegmentsPerTier(Math.min(5, tmp.getSegmentsPerTier()));
  }
  MergeScheduler ms = w.getConfig().getMergeScheduler();
  if (ms instanceof ConcurrentMergeScheduler) {
    // wtf... shouldnt it be even lower since it's 1 by default?!?!
    ((ConcurrentMergeScheduler) ms).setMaxMergesAndThreads(3, 2);
  }
}
 
Example 4
Source Project: lucene-solr   Source File: SolrIndexConfigTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTieredMPSolrIndexConfigCreation() throws Exception {
  String solrConfigFileName = solrConfigFileNameTieredMergePolicyFactory;
  SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileName);
  SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
  IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);
  
  h.getCore().setLatestSchema(indexSchema);
  IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());

  assertNotNull("null mp", iwc.getMergePolicy());
  assertTrue("mp is not TieredMergePolicy", iwc.getMergePolicy() instanceof TieredMergePolicy);
  TieredMergePolicy mp = (TieredMergePolicy) iwc.getMergePolicy();
  assertEquals("mp.maxMergeAtOnceExplicit", 19, mp.getMaxMergeAtOnceExplicit());
  assertEquals("mp.segmentsPerTier",9,(int)mp.getSegmentsPerTier());

  assertNotNull("null ms", iwc.getMergeScheduler());
  assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler);
  ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler)  iwc.getMergeScheduler();
  assertEquals("ms.maxMergeCount", 987, ms.getMaxMergeCount());
  assertEquals("ms.maxThreadCount", 42, ms.getMaxThreadCount());
  assertEquals("ms.isAutoIOThrottle", true, ms.getAutoIOThrottle());

}
 
Example 5
Source Project: lucene-solr   Source File: SolrIndexConfigTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testConcurrentMergeSchedularSolrIndexConfigCreation() throws Exception {
  String solrConfigFileName = solrConfigFileNameConnMSPolicyFactory;
  SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileName);
  SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
  IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);

  h.getCore().setLatestSchema(indexSchema);
  IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());

  assertNotNull("null mp", iwc.getMergePolicy());
  assertTrue("mp is not TieredMergePolicy", iwc.getMergePolicy() instanceof TieredMergePolicy);

  assertNotNull("null ms", iwc.getMergeScheduler());
  assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler);
  ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler)  iwc.getMergeScheduler();
  assertEquals("ms.maxMergeCount", 987, ms.getMaxMergeCount());
  assertEquals("ms.maxThreadCount", 42, ms.getMaxThreadCount());
  assertEquals("ms.isAutoIOThrottle", false, ms.getAutoIOThrottle());

}
 
Example 6
Source Project: lucene-solr   Source File: WrapperMergePolicyFactoryTest.java    License: Apache License 2.0 6 votes vote down vote up
public void testProperlyInitializesWrappedMergePolicy() {
  final TieredMergePolicy defaultTMP = new TieredMergePolicy();
  final int testMaxMergeAtOnce = defaultTMP.getMaxMergeAtOnce() * 2;
  final double testMaxMergedSegmentMB = defaultTMP.getMaxMergedSegmentMB() * 10;

  final MergePolicyFactoryArgs args = new MergePolicyFactoryArgs();
  args.add(WrapperMergePolicyFactory.WRAPPED_PREFIX, "test");
  args.add("test.class", TieredMergePolicyFactory.class.getName());
  args.add("test.maxMergeAtOnce", testMaxMergeAtOnce);
  args.add("test.maxMergedSegmentMB", testMaxMergedSegmentMB);
  MergePolicyFactory mpf = new DefaultingWrapperMergePolicyFactory(resourceLoader, args, null) {
    @Override
    protected MergePolicy getDefaultWrappedMergePolicy() {
      throw new IllegalStateException("Should not have reached here!");
    }
  };
  final MergePolicy mp = mpf.getMergePolicy();
  assertSame(mp.getClass(), TieredMergePolicy.class);
  final TieredMergePolicy tmp = (TieredMergePolicy)mp;
  assertEquals("maxMergeAtOnce", testMaxMergeAtOnce, tmp.getMaxMergeAtOnce());
  assertEquals("maxMergedSegmentMB", testMaxMergedSegmentMB, tmp.getMaxMergedSegmentMB(), 0.0d);
}
 
Example 7
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
    throws IOException {
  HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);

  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
  assertEquals(i, partition);

  Document doc = getDoc(i);
  indexWriter.addDocument(doc);
  indexWriter.close();
}
 
Example 8
Source Project: Elasticsearch   Source File: MergePolicyConfig.java    License: Apache License 2.0 5 votes vote down vote up
public MergePolicyConfig(ESLogger logger, Settings indexSettings) {
    this.logger = logger;
    this.noCFSRatio = parseNoCFSRatio(indexSettings.get(INDEX_COMPOUND_FORMAT, Double.toString(TieredMergePolicy.DEFAULT_NO_CFS_RATIO)));
    double forceMergeDeletesPctAllowed = indexSettings.getAsDouble("index.merge.policy.expunge_deletes_allowed", DEFAULT_EXPUNGE_DELETES_ALLOWED); // percentage
    ByteSizeValue floorSegment = indexSettings.getAsBytesSize("index.merge.policy.floor_segment", DEFAULT_FLOOR_SEGMENT);
    int maxMergeAtOnce = indexSettings.getAsInt("index.merge.policy.max_merge_at_once", DEFAULT_MAX_MERGE_AT_ONCE);
    int maxMergeAtOnceExplicit = indexSettings.getAsInt("index.merge.policy.max_merge_at_once_explicit", DEFAULT_MAX_MERGE_AT_ONCE_EXPLICIT);
    // TODO is this really a good default number for max_merge_segment, what happens for large indices, won't they end up with many segments?
    ByteSizeValue maxMergedSegment = indexSettings.getAsBytesSize("index.merge.policy.max_merged_segment", DEFAULT_MAX_MERGED_SEGMENT);
    double segmentsPerTier = indexSettings.getAsDouble("index.merge.policy.segments_per_tier", DEFAULT_SEGMENTS_PER_TIER);
    double reclaimDeletesWeight = indexSettings.getAsDouble("index.merge.policy.reclaim_deletes_weight", DEFAULT_RECLAIM_DELETES_WEIGHT);
    this.mergesEnabled = indexSettings.getAsBoolean(INDEX_MERGE_ENABLED, true);
    if (mergesEnabled == false) {
        logger.warn("[{}] is set to false, this should only be used in tests and can cause serious problems in production environments", INDEX_MERGE_ENABLED);
    }
    maxMergeAtOnce = adjustMaxMergeAtOnceIfNeeded(maxMergeAtOnce, segmentsPerTier);
    mergePolicy.setNoCFSRatio(noCFSRatio);
    mergePolicy.setForceMergeDeletesPctAllowed(forceMergeDeletesPctAllowed);
    mergePolicy.setFloorSegmentMB(floorSegment.mbFrac());
    mergePolicy.setMaxMergeAtOnce(maxMergeAtOnce);
    mergePolicy.setMaxMergeAtOnceExplicit(maxMergeAtOnceExplicit);
    mergePolicy.setMaxMergedSegmentMB(maxMergedSegment.mbFrac());
    mergePolicy.setSegmentsPerTier(segmentsPerTier);
    mergePolicy.setReclaimDeletesWeight(reclaimDeletesWeight);
    logger.debug("using [tiered] merge mergePolicy with expunge_deletes_allowed[{}], floor_segment[{}], max_merge_at_once[{}], max_merge_at_once_explicit[{}], max_merged_segment[{}], segments_per_tier[{}], reclaim_deletes_weight[{}]",
            forceMergeDeletesPctAllowed, floorSegment, maxMergeAtOnce, maxMergeAtOnceExplicit, maxMergedSegment, segmentsPerTier, reclaimDeletesWeight);
}
 
Example 9
Source Project: linden   Source File: TieredMergePolicyFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public MergePolicy getInstance(Map<String, String> config) throws IOException {
  TieredMergePolicy mergePolicy = new TieredMergePolicy();

  if (config.containsKey(SEGMENTS_PER_TIER)) {
    mergePolicy.setSegmentsPerTier(Double.valueOf(config.get(SEGMENTS_PER_TIER)));
  }
  if (config.containsKey(MAX_MERGE_AT_ONCE)) {
    mergePolicy.setMaxMergeAtOnce(Integer.valueOf(config.get(MAX_MERGE_AT_ONCE)));
  }
  return mergePolicy;
}
 
Example 10
Source Project: lucene-solr   Source File: MonitorConfiguration.java    License: Apache License 2.0 5 votes vote down vote up
private static IndexWriterConfig defaultIndexWriterConfig() {
  IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
  TieredMergePolicy mergePolicy = new TieredMergePolicy();
  mergePolicy.setSegmentsPerTier(4);
  iwc.setMergePolicy(mergePolicy);
  iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
  return iwc;
}
 
Example 11
Source Project: lucene-solr   Source File: TestMergePolicyConfig.java    License: Apache License 2.0 5 votes vote down vote up
public void testSetNoCFSMergePolicyConfig() throws Exception {
  final boolean useCompoundFile = random().nextBoolean();
  System.setProperty("testSetNoCFSMergePolicyConfig.useCompoundFile", String.valueOf(useCompoundFile));
  try {
    initCore("solrconfig-mergepolicyfactory-nocfs.xml","schema-minimal.xml");
    IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
    assertEquals(useCompoundFile, iwc.getUseCompoundFile());

    TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class,
                                               iwc.getMergePolicy());
    assertEquals(0.5D, tieredMP.getNoCFSRatio(), 0.0D);
  } finally {
    System.getProperties().remove("testSetNoCFSMergePolicyConfig.useCompoundFile");
  }
}
 
Example 12
Source Project: lucene-solr   Source File: TestMergePolicyConfig.java    License: Apache License 2.0 5 votes vote down vote up
public void testDefaultMergePolicyConfig() throws Exception {
  initCore("solrconfig-mergepolicy-defaults.xml","schema-minimal.xml");
  IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
  assertEquals(false, iwc.getUseCompoundFile());

  TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class,
                                             iwc.getMergePolicy());
  assertEquals(TieredMergePolicy.DEFAULT_NO_CFS_RATIO, tieredMP.getNoCFSRatio(), 0.0D);

  assertCommitSomeNewDocs();
  assertCompoundSegments(h.getCore(), false);
}
 
Example 13
Source Project: lucene-solr   Source File: TestMergePolicyConfig.java    License: Apache License 2.0 5 votes vote down vote up
public void testLegacyMergePolicyConfig() throws Exception {
  final boolean expectCFS = Boolean.parseBoolean(System.getProperty("useCompoundFile"));

  initCore("solrconfig-mergepolicy-legacy.xml","schema-minimal.xml");
  IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
  assertEquals(expectCFS, iwc.getUseCompoundFile());

  TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class, iwc.getMergePolicy());

  assertEquals(10, tieredMP.getMaxMergeAtOnce());
  assertEquals(10.0D, tieredMP.getSegmentsPerTier(), 0.0D);

  assertCommitSomeNewDocs();
  assertCompoundSegments(h.getCore(), expectCFS);
}
 
Example 14
Source Project: lucene-solr   Source File: TestMergePolicyConfig.java    License: Apache License 2.0 5 votes vote down vote up
public void testTieredMergePolicyConfig() throws Exception {
  final boolean expectCFS 
    = Boolean.parseBoolean(System.getProperty("useCompoundFile"));

  initCore("solrconfig-tieredmergepolicyfactory.xml","schema-minimal.xml");
  IndexWriterConfig iwc = solrConfig.indexConfig.toIndexWriterConfig(h.getCore());
  assertEquals(expectCFS, iwc.getUseCompoundFile());


  TieredMergePolicy tieredMP = assertAndCast(TieredMergePolicy.class,
                                             iwc.getMergePolicy());

  // set by legacy <mergeFactor> setting
  assertEquals(7, tieredMP.getMaxMergeAtOnce());
  
  // mp-specific setters
  assertEquals(19, tieredMP.getMaxMergeAtOnceExplicit());
  assertEquals(0.1D, tieredMP.getNoCFSRatio(), 0.0D);
  // make sure we overrode segmentsPerTier 
  // (split from maxMergeAtOnce out of mergeFactor)
  assertEquals(9D, tieredMP.getSegmentsPerTier(), 0.001);
  
  assertCommitSomeNewDocs();
  // even though we have a single segment (which is 100% of the size of 
  // the index which is higher then our 0.6D threshold) the
  // compound ratio doesn't matter because the segment was never merged
  assertCompoundSegments(h.getCore(), expectCFS);

  assertCommitSomeNewDocs();
  assertNumSegments(h.getCore(), 2);
  assertCompoundSegments(h.getCore(), expectCFS);

  assertU(optimize("maxSegments", "1"));
  assertNumSegments(h.getCore(), 1);
  // we've now forced a merge, and the MP ratio should be in play
  assertCompoundSegments(h.getCore(), false);
}
 
Example 15
Source Project: incubator-retired-blur   Source File: IndexImporterTest.java    License: Apache License 2.0 5 votes vote down vote up
private void setupWriter(Configuration configuration) throws IOException {
  TableDescriptor tableDescriptor = new TableDescriptor();
  tableDescriptor.setName("test-table");
  String uuid = UUID.randomUUID().toString();

  tableDescriptor.setTableUri(new Path(_base, "table-table").toUri().toString());
  tableDescriptor.setShardCount(2);

  TableContext tableContext = TableContext.create(tableDescriptor);
  ShardContext shardContext = ShardContext.create(tableContext, "shard-00000000");
  Path tablePath = new Path(_base, "table-table");
  _shardPath = new Path(tablePath, "shard-00000000");
  String indexDirName = "index_" + uuid;
  _path = new Path(_shardPath, indexDirName + ".commit");
  _fileSystem.mkdirs(_path);
  _badRowIdsPath = new Path(_shardPath, indexDirName + ".badrowids");
  _badIndexPath = new Path(_shardPath, indexDirName + ".badindex");
  _inUsePath = new Path(_shardPath, indexDirName + ".inuse");
  Directory commitDirectory = new HdfsDirectory(configuration, _path);
  _mainDirectory = new HdfsDirectory(configuration, _shardPath);
  _fieldManager = tableContext.getFieldManager();
  Analyzer analyzerForIndex = _fieldManager.getAnalyzerForIndex();
  IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, analyzerForIndex);
  // conf.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  _commitWriter = new IndexWriter(commitDirectory, conf.clone());

  // Make sure there's an empty index...
  new IndexWriter(_mainDirectory, conf.clone()).close();
  _mainWriter = new IndexWriter(_mainDirectory, conf.clone());
  BufferStore.initNewBuffer(128, 128 * 128);

  _indexImporter = new IndexImporter(_timer, getBlurIndex(shardContext, _mainDirectory), shardContext,
      TimeUnit.MINUTES, 10, 10, null, _mainDirectory);
}
 
Example 16
Source Project: lucene-solr   Source File: TestSearcherTaxonomyManager.java    License: Apache License 2.0 4 votes vote down vote up
public void testNRT() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  // Don't allow tiny maxBufferedDocs; it can make this
  // test too slow:
  iwc.setMaxBufferedDocs(Math.max(500, iwc.getMaxBufferedDocs()));

  // MockRandom/AlcololicMergePolicy are too slow:
  TieredMergePolicy tmp = new TieredMergePolicy();
  tmp.setFloorSegmentMB(.001);
  iwc.setMergePolicy(tmp);
  final IndexWriter w = new IndexWriter(dir, iwc);
  final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
  final FacetsConfig config = new FacetsConfig();
  config.setMultiValued("field", true);
  final AtomicBoolean stop = new AtomicBoolean();

  // How many unique facets to index before stopping:
  final int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

  Thread indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

  final SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(w, true, null, tw);

  Thread reopener = new Thread() {
      @Override
      public void run() {
        while(!stop.get()) {
          try {
            // Sleep for up to 20 msec:
            Thread.sleep(random().nextInt(20));

            if (VERBOSE) {
              System.out.println("TEST: reopen");
            }

            mgr.maybeRefresh();

            if (VERBOSE) {
              System.out.println("TEST: reopen done");
            }
          } catch (Exception ioe) {
            throw new RuntimeException(ioe);
          }
        }
      }
    };

  reopener.setName("reopener");
  reopener.start();

  indexer.setName("indexer");
  indexer.start();

  try {
    while (!stop.get()) {
      SearcherAndTaxonomy pair = mgr.acquire();
      try {
        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
        FacetsCollector sfc = new FacetsCollector();
        pair.searcher.search(new MatchAllDocsQuery(), sfc);
        Facets facets = getTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
        FacetResult result = facets.getTopChildren(10, "field");
        if (pair.searcher.getIndexReader().numDocs() > 0) { 
          //System.out.println(pair.taxonomyReader.getSize());
          assertTrue(result.childCount > 0);
          assertTrue(result.labelValues.length > 0);
        }

        //if (VERBOSE) {
        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
        //}
      } finally {
        mgr.release(pair);
      }
    }
  } finally {
    indexer.join();
    reopener.join();
  }

  if (VERBOSE) {
    System.out.println("TEST: now stop");
  }

  w.close();
  IOUtils.close(mgr, tw, taxoDir, dir);
}
 
Example 17
Source Project: lucene-solr   Source File: TieredMergePolicyFactory.java    License: Apache License 2.0 4 votes vote down vote up
@Override
protected MergePolicy getMergePolicyInstance() {
  return new TieredMergePolicy();
}
 
Example 18
Source Project: lucene-solr   Source File: DefaultMergePolicyFactory.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public final MergePolicy getMergePolicy() {
  return new TieredMergePolicy();
}
 
Example 19
Source Project: examples   Source File: TreeMergeOutputFormat.java    License: Apache License 2.0 4 votes vote down vote up
@Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
      writeShardNumberFile(context);      
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
        
        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
            .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;
          
        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
//        writerConfig.setRAMBufferSizeMB(100); // improve performance
//        writerConfig.setMaxThreadStates(1);
        
        // disable compound file to improve performance
        // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);          
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);       
//          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);
        
        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
        
        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        long start = System.nanoTime();
        
        writer.addIndexes(indexes); 
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) 
        // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746
        
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        float secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Logical merge took {} secs", secs);        
        int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        start = System.nanoTime();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments); 
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data 
          // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
        
        start = System.nanoTime();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }
 
Example 20
@Test
public void testMulipleCommitsAndReopens() throws IOException {
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  conf.setMergeScheduler(new SerialMergeScheduler());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);

  Set<String> fileSet = new TreeSet<String>();
  long seed = new Random().nextLong();
  System.out.println("Seed:" + seed);
  Random random = new Random(seed);
  int docCount = 0;
  int passes = 10;
  byte[] segmentsGenContents = null;
  for (int run = 0; run < passes; run++) {
    final FastHdfsKeyValueDirectory directory = new FastHdfsKeyValueDirectory(false, _timer, _configuration,
        new Path(_path, "test_multiple_commits_reopens"));
    if (segmentsGenContents != null) {
      byte[] segmentsGenContentsCurrent = readSegmentsGen(directory);
      assertTrue(Arrays.equals(segmentsGenContents, segmentsGenContentsCurrent));
    }
    assertFiles(fileSet, run, -1, directory);
    assertEquals(docCount, getDocumentCount(directory));
    IndexWriter writer = new IndexWriter(directory, conf.clone());
    int numberOfCommits = random.nextInt(100);
    for (int i = 0; i < numberOfCommits; i++) {
      assertFiles(fileSet, run, i, directory);
      addDocuments(writer, random.nextInt(100));
      // Before Commit
      writer.commit();
      // After Commit

      // Set files after commit
      {
        fileSet.clear();
        List<IndexCommit> listCommits = DirectoryReader.listCommits(directory);
        assertEquals(1, listCommits.size());
        IndexCommit indexCommit = listCommits.get(0);
        fileSet.addAll(indexCommit.getFileNames());
      }
      segmentsGenContents = readSegmentsGen(directory);
    }
    docCount = getDocumentCount(directory);
  }
}
 
Example 21
Source Project: crate   Source File: EsTieredMergePolicy.java    License: Apache License 2.0 4 votes vote down vote up
EsTieredMergePolicy() {
    super(new TieredMergePolicy());
    regularMergePolicy = (TieredMergePolicy) in;
    forcedMergePolicy = new TieredMergePolicy();
    forcedMergePolicy.setMaxMergedSegmentMB(Double.POSITIVE_INFINITY); // unlimited
}
 
Example 22
Source Project: lucene-solr   Source File: TestConfig.java    License: Apache License 2.0 2 votes vote down vote up
@Test
public void testDefaults() throws Exception {

  int numDefaultsTested = 0;
  int numNullDefaults = 0;

  SolrConfig sc = new SolrConfig(TEST_PATH().resolve("collection1"), "solrconfig-defaults.xml");
  SolrIndexConfig sic = sc.indexConfig;

  ++numDefaultsTested; assertEquals("default useCompoundFile", false, sic.useCompoundFile);

  ++numDefaultsTested; assertEquals("default maxBufferedDocs", -1, sic.maxBufferedDocs);

  ++numDefaultsTested; assertEquals("default ramBufferSizeMB", 100.0D, sic.ramBufferSizeMB, 0.0D);
  ++numDefaultsTested; assertEquals("default ramPerThreadHardLimitMB", -1, sic.ramPerThreadHardLimitMB);
  ++numDefaultsTested; assertEquals("default writeLockTimeout", -1, sic.writeLockTimeout);
  ++numDefaultsTested; assertEquals("default LockType", DirectoryFactory.LOCK_TYPE_NATIVE, sic.lockType);

  ++numDefaultsTested; assertEquals("default infoStream", InfoStream.NO_OUTPUT, sic.infoStream);

  ++numDefaultsTested; assertNotNull("default metrics", sic.metricsInfo);

  ++numDefaultsTested; ++numNullDefaults;
  assertNull("default mergePolicyFactoryInfo", sic.mergePolicyFactoryInfo);

  ++numDefaultsTested; ++numNullDefaults; assertNull("default mergeSchedulerInfo", sic.mergeSchedulerInfo);
  ++numDefaultsTested; ++numNullDefaults; assertNull("default mergedSegmentWarmerInfo", sic.mergedSegmentWarmerInfo);

  IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema("schema.xml", solrConfig);
  IndexWriterConfig iwc = sic.toIndexWriterConfig(h.getCore());

  assertNotNull("null mp", iwc.getMergePolicy());
  assertTrue("mp is not TieredMergePolicy", iwc.getMergePolicy() instanceof TieredMergePolicy);

  assertNotNull("null ms", iwc.getMergeScheduler());
  assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler);

  assertNull("non-null mergedSegmentWarmer", iwc.getMergedSegmentWarmer());

  final int numDefaultsMapped = sic.toMap(new LinkedHashMap<>()).size();
  assertEquals("numDefaultsTested vs. numDefaultsMapped+numNullDefaults ="+sic.toMap(new LinkedHashMap<>()).keySet(), numDefaultsTested, numDefaultsMapped+numNullDefaults);
}