Java Code Examples for org.apache.lucene.index.IndexWriterConfig#getMergePolicy()

The following examples show how to use org.apache.lucene.index.IndexWriterConfig#getMergePolicy() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SimplePrimaryNode.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static IndexWriter initWriter(int id, Random random, Path indexPath, boolean doCheckIndexOnClose) throws IOException {
  Directory dir = SimpleReplicaNode.getDirectory(random, id, indexPath, doCheckIndexOnClose);

  MockAnalyzer analyzer = new MockAnalyzer(random);
  analyzer.setMaxTokenLength(TestUtil.nextInt(random, 1, IndexWriter.MAX_TERM_LENGTH));
  IndexWriterConfig iwc = LuceneTestCase.newIndexWriterConfig(random, analyzer);

  MergePolicy mp = iwc.getMergePolicy();
  //iwc.setInfoStream(new PrintStreamInfoStream(System.out));

  // Force more frequent merging so we stress merge warming:
  if (mp instanceof TieredMergePolicy) {
    TieredMergePolicy tmp = (TieredMergePolicy) mp;
    tmp.setSegmentsPerTier(3);
    tmp.setMaxMergeAtOnce(3);
  } else if (mp instanceof LogMergePolicy) {
    LogMergePolicy lmp = (LogMergePolicy) mp;
    lmp.setMergeFactor(3);
  }

  IndexWriter writer = new IndexWriter(dir, iwc);

  TestUtil.reduceOpenFiles(writer);
  return writer;
}
 
Example 2
Source File: SolrIndexConfigTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Test
public void testTieredMPSolrIndexConfigCreation() throws Exception {
  String solrConfigFileName = solrConfigFileNameTieredMergePolicyFactory;
  SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileName);
  SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
  IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);
  
  h.getCore().setLatestSchema(indexSchema);
  IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());

  assertNotNull("null mp", iwc.getMergePolicy());
  assertTrue("mp is not TieredMergePolicy", iwc.getMergePolicy() instanceof TieredMergePolicy);
  TieredMergePolicy mp = (TieredMergePolicy) iwc.getMergePolicy();
  assertEquals("mp.maxMergeAtOnceExplicit", 19, mp.getMaxMergeAtOnceExplicit());
  assertEquals("mp.segmentsPerTier",9,(int)mp.getSegmentsPerTier());

  assertNotNull("null ms", iwc.getMergeScheduler());
  assertTrue("ms is not CMS", iwc.getMergeScheduler() instanceof ConcurrentMergeScheduler);
  ConcurrentMergeScheduler ms = (ConcurrentMergeScheduler)  iwc.getMergeScheduler();
  assertEquals("ms.maxMergeCount", 987, ms.getMaxMergeCount());
  assertEquals("ms.maxThreadCount", 42, ms.getMaxThreadCount());
  assertEquals("ms.isAutoIOThrottle", true, ms.getAutoIOThrottle());

}
 
Example 3
Source File: SolrIndexConfigTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testSortingMPSolrIndexConfigCreation() throws Exception {
  final String expectedFieldName = "timestamp_i_dvo";
  final SortField.Type expectedFieldType = SortField.Type.INT;
  final boolean expectedFieldSortDescending = true;

  SolrConfig solrConfig = new SolrConfig(instanceDir, solrConfigFileNameSortingMergePolicyFactory);
  SolrIndexConfig solrIndexConfig = new SolrIndexConfig(solrConfig, null, null);
  assertNotNull(solrIndexConfig);
  IndexSchema indexSchema = IndexSchemaFactory.buildIndexSchema(schemaFileName, solrConfig);

  h.getCore().setLatestSchema(indexSchema);
  IndexWriterConfig iwc = solrIndexConfig.toIndexWriterConfig(h.getCore());

  final MergePolicy mergePolicy = iwc.getMergePolicy();
  assertNotNull("null mergePolicy", mergePolicy);
  assertTrue("mergePolicy ("+mergePolicy+") is not a SortingMergePolicy", mergePolicy instanceof SortingMergePolicy);
  final SortingMergePolicy sortingMergePolicy = (SortingMergePolicy) mergePolicy;
  final Sort expected = new Sort(new SortField(expectedFieldName, expectedFieldType, expectedFieldSortDescending));
  final Sort actual = sortingMergePolicy.getSort();
  assertEquals("SortingMergePolicy.getSort", expected, actual);
}
 
Example 4
Source File: TableShardCountCollapserTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
    throws IOException {
  HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);

  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
  assertEquals(i, partition);

  Document doc = getDoc(i);
  indexWriter.addDocument(doc);
  indexWriter.close();
}
 
Example 5
Source File: IndexImporterTest.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void setupWriter(Configuration configuration) throws IOException {
  TableDescriptor tableDescriptor = new TableDescriptor();
  tableDescriptor.setName("test-table");
  String uuid = UUID.randomUUID().toString();

  tableDescriptor.setTableUri(new Path(_base, "table-table").toUri().toString());
  tableDescriptor.setShardCount(2);

  TableContext tableContext = TableContext.create(tableDescriptor);
  ShardContext shardContext = ShardContext.create(tableContext, "shard-00000000");
  Path tablePath = new Path(_base, "table-table");
  _shardPath = new Path(tablePath, "shard-00000000");
  String indexDirName = "index_" + uuid;
  _path = new Path(_shardPath, indexDirName + ".commit");
  _fileSystem.mkdirs(_path);
  _badRowIdsPath = new Path(_shardPath, indexDirName + ".badrowids");
  _badIndexPath = new Path(_shardPath, indexDirName + ".badindex");
  _inUsePath = new Path(_shardPath, indexDirName + ".inuse");
  Directory commitDirectory = new HdfsDirectory(configuration, _path);
  _mainDirectory = new HdfsDirectory(configuration, _shardPath);
  _fieldManager = tableContext.getFieldManager();
  Analyzer analyzerForIndex = _fieldManager.getAnalyzerForIndex();
  IndexWriterConfig conf = new IndexWriterConfig(LUCENE_VERSION, analyzerForIndex);
  // conf.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES);
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  _commitWriter = new IndexWriter(commitDirectory, conf.clone());

  // Make sure there's an empty index...
  new IndexWriter(_mainDirectory, conf.clone()).close();
  _mainWriter = new IndexWriter(_mainDirectory, conf.clone());
  BufferStore.initNewBuffer(128, 128 * 128);

  _indexImporter = new IndexImporter(_timer, getBlurIndex(shardContext, _mainDirectory), shardContext,
      TimeUnit.MINUTES, 10, 10, null, _mainDirectory);
}
 
Example 6
Source File: TestDirectoryTaxonomyReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testOpenIfChangedManySegments() throws Exception {
  // test openIfChanged() when the taxonomy contains many segments
  Directory dir = newDirectory();
  
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
    @Override
    protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
      IndexWriterConfig conf = super.createIndexWriterConfig(openMode);
      LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
      lmp.setMergeFactor(2);
      return conf;
    }
  };
  TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
  
  int numRounds = random().nextInt(10) + 10;
  int numCategories = 1; // one for root
  for (int i = 0; i < numRounds; i++) {
    int numCats = random().nextInt(4) + 1;
    for (int j = 0; j < numCats; j++) {
      writer.addCategory(new FacetLabel(Integer.toString(i), Integer.toString(j)));
    }
    numCategories += numCats + 1 /* one for round-parent */;
    TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
    assertNotNull(newtr);
    reader.close();
    reader = newtr;
    
    // assert categories
    assertEquals(numCategories, reader.getSize());
    int roundOrdinal = reader.getOrdinal(new FacetLabel(Integer.toString(i)));
    int[] parents = reader.getParallelTaxonomyArrays().parents();
    assertEquals(0, parents[roundOrdinal]); // round's parent is root
    for (int j = 0; j < numCats; j++) {
      int ord = reader.getOrdinal(new FacetLabel(Integer.toString(i), Integer.toString(j)));
      assertEquals(roundOrdinal, parents[ord]); // round's parent is root
    }
  }
  
  reader.close();
  writer.close();
  dir.close();
}
 
Example 7
Source File: TestSearchForDuplicates.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doTest(Random random, PrintWriter out, boolean useCompoundFiles, int MAX_DOCS) throws Exception {
    Directory directory = newDirectory();
    Analyzer analyzer = new MockAnalyzer(random);
    IndexWriterConfig conf = newIndexWriterConfig(analyzer);
    final MergePolicy mp = conf.getMergePolicy();
    mp.setNoCFSRatio(useCompoundFiles ? 1.0 : 0.0);
    IndexWriter writer = new IndexWriter(directory, conf);
    if (VERBOSE) {
      System.out.println("TEST: now build index MAX_DOCS=" + MAX_DOCS);
    }

    for (int j = 0; j < MAX_DOCS; j++) {
      Document d = new Document();
      d.add(newTextField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES));
      d.add(new StoredField(ID_FIELD, j));
      d.add(new NumericDocValuesField(ID_FIELD, j));
      writer.addDocument(d);
    }
    writer.close();

    // try a search without OR
    IndexReader reader = DirectoryReader.open(directory);
    IndexSearcher searcher = newSearcher(reader);

    Query query = new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY));
    out.println("Query: " + query.toString(PRIORITY_FIELD));
    if (VERBOSE) {
      System.out.println("TEST: search query=" + query);
    }

    final Sort sort = new Sort(SortField.FIELD_SCORE,
                               new SortField(ID_FIELD, SortField.Type.INT));

    ScoreDoc[] hits = searcher.search(query, MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    // try a new search with OR
    searcher = newSearcher(reader);
    hits = null;

    BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder();
    booleanQuery.add(new TermQuery(new Term(PRIORITY_FIELD, HIGH_PRIORITY)), BooleanClause.Occur.SHOULD);
    booleanQuery.add(new TermQuery(new Term(PRIORITY_FIELD, MED_PRIORITY)), BooleanClause.Occur.SHOULD);
    out.println("Query: " + booleanQuery.build().toString(PRIORITY_FIELD));

    hits = searcher.search(booleanQuery.build(), MAX_DOCS, sort).scoreDocs;
    printHits(out, hits, searcher);
    checkHits(hits, MAX_DOCS, searcher);

    reader.close();
    directory.close();
}
 
Example 8
Source File: TestTopFieldCollectorEarlyTermination.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void createRandomIndex(boolean singleSortedSegment) throws IOException {
  dir = newDirectory();
  numDocs = atLeast(150);
  final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5);
  Set<String> randomTerms = new HashSet<>();
  while (randomTerms.size() < numTerms) {
    randomTerms.add(TestUtil.randomSimpleString(random()));
  }
  terms = new ArrayList<>(randomTerms);
  final long seed = random().nextLong();
  final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed)));
  if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) {
    // MockRandomMP randomly wraps the leaf readers which makes merging angry
    iwc.setMergePolicy(newTieredMergePolicy());
  }
  iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests
  iwc.setIndexSort(sort);
  iw = new RandomIndexWriter(new Random(seed), dir, iwc);
  iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP
  for (int i = 0; i < numDocs; ++i) {
    final Document doc = randomDocument();
    iw.addDocument(doc);
    if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) {
      iw.commit();
    }
    if (random().nextInt(15) == 0) {
      final String term = RandomPicks.randomFrom(random(), terms);
      iw.deleteDocuments(new Term("s", term));
    }
  }
  if (singleSortedSegment) {
    iw.forceMerge(1);
  }
  else if (random().nextBoolean()) {
    iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT);
  }
  reader = iw.getReader();
  if (reader.numDocs() == 0) {
    iw.addDocument(new Document());
    reader.close();
    reader = iw.getReader();
  }
}
 
Example 9
Source File: TreeMergeOutputFormat.java    From examples with Apache License 2.0 4 votes vote down vote up
@Override
    public void close(TaskAttemptContext context) throws IOException {
      LOG.debug("Task " + context.getTaskAttemptID() + " merging into dstDir: " + workDir + ", srcDirs: " + shards);
      writeShardNumberFile(context);      
      heartBeater.needHeartBeat();
      try {
        Directory mergedIndex = new HdfsDirectory(workDir, context.getConfiguration());
        
        // TODO: shouldn't we pull the Version from the solrconfig.xml?
        IndexWriterConfig writerConfig = new IndexWriterConfig(Version.LUCENE_CURRENT, null)
            .setOpenMode(OpenMode.CREATE).setUseCompoundFile(false)
            //.setMergePolicy(mergePolicy) // TODO: grab tuned MergePolicy from solrconfig.xml?
            //.setMergeScheduler(...) // TODO: grab tuned MergeScheduler from solrconfig.xml?
            ;
          
        if (LOG.isDebugEnabled()) {
          writerConfig.setInfoStream(System.out);
        }
//        writerConfig.setRAMBufferSizeMB(100); // improve performance
//        writerConfig.setMaxThreadStates(1);
        
        // disable compound file to improve performance
        // also see http://lucene.472066.n3.nabble.com/Questions-on-compound-file-format-td489105.html
        // also see defaults in SolrIndexConfig
        MergePolicy mergePolicy = writerConfig.getMergePolicy();
        LOG.debug("mergePolicy was: {}", mergePolicy);
        if (mergePolicy instanceof TieredMergePolicy) {
          ((TieredMergePolicy) mergePolicy).setNoCFSRatio(0.0);
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnceExplicit(10000);          
//          ((TieredMergePolicy) mergePolicy).setMaxMergeAtOnce(10000);       
//          ((TieredMergePolicy) mergePolicy).setSegmentsPerTier(10000);
        } else if (mergePolicy instanceof LogMergePolicy) {
          ((LogMergePolicy) mergePolicy).setNoCFSRatio(0.0);
        }
        LOG.info("Using mergePolicy: {}", mergePolicy);
        
        IndexWriter writer = new IndexWriter(mergedIndex, writerConfig);
        
        Directory[] indexes = new Directory[shards.size()];
        for (int i = 0; i < shards.size(); i++) {
          indexes[i] = new HdfsDirectory(shards.get(i), context.getConfiguration());
        }

        context.setStatus("Logically merging " + shards.size() + " shards into one shard");
        LOG.info("Logically merging " + shards.size() + " shards into one shard: " + workDir);
        long start = System.nanoTime();
        
        writer.addIndexes(indexes); 
        // TODO: avoid intermediate copying of files into dst directory; rename the files into the dir instead (cp -> rename) 
        // This can improve performance and turns this phase into a true "logical" merge, completing in constant time.
        // See https://issues.apache.org/jira/browse/LUCENE-4746
        
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.LOGICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        float secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Logical merge took {} secs", secs);        
        int maxSegments = context.getConfiguration().getInt(TreeMergeMapper.MAX_SEGMENTS_ON_TREE_MERGE, Integer.MAX_VALUE);
        context.setStatus("Optimizing Solr: forcing mtree merge down to " + maxSegments + " segments");
        LOG.info("Optimizing Solr: forcing tree merge down to {} segments", maxSegments);
        start = System.nanoTime();
        if (maxSegments < Integer.MAX_VALUE) {
          writer.forceMerge(maxSegments); 
          // TODO: consider perf enhancement for no-deletes merges: bulk-copy the postings data 
          // see http://lucene.472066.n3.nabble.com/Experience-with-large-merge-factors-tp1637832p1647046.html
        }
        if (LOG.isDebugEnabled()) {
          context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_TREE_MERGE_TIME.toString()).increment(System.currentTimeMillis() - start);
        }
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: done forcing tree merge down to {} segments in {} secs", maxSegments, secs);
        
        start = System.nanoTime();
        LOG.info("Optimizing Solr: Closing index writer");
        writer.close();
        secs = (System.nanoTime() - start) / (float)(10^9);
        LOG.info("Optimizing Solr: Done closing index writer in {} secs", secs);
        context.setStatus("Done");
      } finally {
        heartBeater.cancelHeartBeat();
        heartBeater.close();
      }
    }
 
Example 10
Source File: FastHdfsKeyValueDirectoryTest.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Test
public void testMulipleCommitsAndReopens() throws IOException {
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  conf.setMergeScheduler(new SerialMergeScheduler());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);

  Set<String> fileSet = new TreeSet<String>();
  long seed = new Random().nextLong();
  System.out.println("Seed:" + seed);
  Random random = new Random(seed);
  int docCount = 0;
  int passes = 10;
  byte[] segmentsGenContents = null;
  for (int run = 0; run < passes; run++) {
    final FastHdfsKeyValueDirectory directory = new FastHdfsKeyValueDirectory(false, _timer, _configuration,
        new Path(_path, "test_multiple_commits_reopens"));
    if (segmentsGenContents != null) {
      byte[] segmentsGenContentsCurrent = readSegmentsGen(directory);
      assertTrue(Arrays.equals(segmentsGenContents, segmentsGenContentsCurrent));
    }
    assertFiles(fileSet, run, -1, directory);
    assertEquals(docCount, getDocumentCount(directory));
    IndexWriter writer = new IndexWriter(directory, conf.clone());
    int numberOfCommits = random.nextInt(100);
    for (int i = 0; i < numberOfCommits; i++) {
      assertFiles(fileSet, run, i, directory);
      addDocuments(writer, random.nextInt(100));
      // Before Commit
      writer.commit();
      // After Commit

      // Set files after commit
      {
        fileSet.clear();
        List<IndexCommit> listCommits = DirectoryReader.listCommits(directory);
        assertEquals(1, listCommits.size());
        IndexCommit indexCommit = listCommits.get(0);
        fileSet.addAll(indexCommit.getFileNames());
      }
      segmentsGenContents = readSegmentsGen(directory);
    }
    docCount = getDocumentCount(directory);
  }
}