org.apache.lucene.index.LogByteSizeMergePolicy Java Examples

The following examples show how to use org.apache.lucene.index.LogByteSizeMergePolicy. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: Index.java    From dacapobench with Apache License 2.0 6 votes vote down vote up
/**
 * Index all text files under a directory.
 */
public void main(final File INDEX_DIR, final String[] args) throws IOException {
  IndexWriterConfig IWConfig = new IndexWriterConfig();
  IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
  IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
  IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
  for (int arg = 0; arg < args.length; arg++) {
    final File docDir = new File(args[arg]);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
      throw new IOException("Cannot read from document directory");
    }

    indexDocs(writer, docDir);
    System.out.println("Optimizing...");
    writer.forceMerge(1);
  }
  writer.close();
}
 
Example #2
Source File: IndexUtil.java    From everywhere with Apache License 2.0 5 votes vote down vote up
public static IndexWriter getIndexWriter(String indexPath, boolean create) throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new SmartChineseAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
    mergePolicy.setMergeFactor(50);
    mergePolicy.setMaxMergeDocs(5000);
    if (create){
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    return new IndexWriter(dir, iwc);
}
 
Example #3
Source File: TestMergePolicyConfig.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testLogMergePolicyFactoryConfig() throws Exception {

    final boolean byteSizeMP = random().nextBoolean();
    final Class<? extends LogMergePolicy> mpClass = byteSizeMP
        ? LogByteSizeMergePolicy.class : LogDocMergePolicy.class;
    final Class<? extends MergePolicyFactory> mpfClass = byteSizeMP
        ? LogByteSizeMergePolicyFactory.class : LogDocMergePolicyFactory.class;

    System.setProperty("solr.test.log.merge.policy.factory", mpfClass.getName());

    implTestLogMergePolicyConfig("solrconfig-logmergepolicyfactory.xml", mpClass);
  }
 
Example #4
Source File: TestDirectoryTaxonomyReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testOpenIfChangedMergedSegment() throws Exception {
  // test openIfChanged() when all index segments were merged - used to be
  // a bug in ParentArray, caught by testOpenIfChangedManySegments - only
  // this test is not random
  Directory dir = newDirectory();
  
  // hold onto IW to forceMerge
  // note how we don't close it, since DTW will close it.
  final IndexWriter iw = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
          .setMergePolicy(new LogByteSizeMergePolicy()));
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
    @Override
    protected IndexWriter openIndexWriter(Directory directory,
        IndexWriterConfig config) throws IOException {
      return iw;
    }
  };
  
  TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
  assertEquals(1, reader.getSize());
  assertEquals(1, reader.getParallelTaxonomyArrays().parents().length);

  // add category and call forceMerge -- this should flush IW and merge segments down to 1
  // in ParentArray.initFromReader, this used to fail assuming there are no parents.
  writer.addCategory(new FacetLabel("1"));
  iw.forceMerge(1);
  
  // now calling openIfChanged should trip on the bug
  TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
  assertNotNull(newtr);
  reader.close();
  reader = newtr;
  assertEquals(2, reader.getSize());
  assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
  
  reader.close();
  writer.close();
  dir.close();
}
 
Example #5
Source File: TestDirectoryTaxonomyReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testOpenIfChangedNoChangesButSegmentMerges() throws Exception {
  // test openIfChanged() when the taxonomy hasn't really changed, but segments
  // were merged. The NRT reader will be reopened, and ParentArray used to assert
  // that the new reader contains more ordinals than were given from the old
  // TaxReader version
  Directory dir = newDirectory();
  
  // hold onto IW to forceMerge
  // note how we don't close it, since DTW will close it.
  final IndexWriter iw = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
          .setMergePolicy(new LogByteSizeMergePolicy()));
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
    @Override
    protected IndexWriter openIndexWriter(Directory directory,
        IndexWriterConfig config) throws IOException {
      return iw;
    }
  };
  
  // add a category so that the following DTR open will cause a flush and 
  // a new segment will be created
  writer.addCategory(new FacetLabel("a"));
  
  TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
  assertEquals(2, reader.getSize());
  assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);

  // merge all the segments so that NRT reader thinks there's a change 
  iw.forceMerge(1);
  
  // now calling openIfChanged should trip on the wrong assert in ParetArray's ctor
  TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
  assertNotNull(newtr);
  reader.close();
  reader = newtr;
  assertEquals(2, reader.getSize());
  assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
  
  reader.close();
  writer.close();
  dir.close();
}
 
Example #6
Source File: LogByteSizeMergePolicyFactory.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
protected MergePolicy getMergePolicyInstance() {
  return new LogByteSizeMergePolicy();
}
 
Example #7
Source File: FileBasedSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void loadExternalFileDictionary(SolrCore core, SolrIndexSearcher searcher) {
  try {
    IndexSchema schema = null == searcher ? core.getLatestSchema() : searcher.getSchema();
    // Get the field's analyzer
    if (fieldTypeName != null && schema.getFieldTypeNoEx(fieldTypeName) != null) {
      FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
      // Do index-time analysis using the given fieldType's analyzer
      Directory ramDir = new ByteBuffersDirectory();

      LogMergePolicy mp = new LogByteSizeMergePolicy();
      mp.setMergeFactor(300);

      IndexWriter writer = new IndexWriter(
          ramDir,
          new IndexWriterConfig(fieldType.getIndexAnalyzer()).
              setMaxBufferedDocs(150).
              setMergePolicy(mp).
              setOpenMode(IndexWriterConfig.OpenMode.CREATE)
              // TODO: if we enable this, codec gets angry since field won't exist in the schema
              // .setCodec(core.getCodec())
      );

      List<String> lines = core.getResourceLoader().getLines(sourceLocation, characterEncoding);

      for (String s : lines) {
        Document d = new Document();
        d.add(new TextField(WORD_FIELD_NAME, s, Field.Store.NO));
        writer.addDocument(d);
      }
      writer.forceMerge(1);
      writer.close();

      dictionary = new HighFrequencyDictionary(DirectoryReader.open(ramDir),
              WORD_FIELD_NAME, 0.0f);
    } else {
      // check if character encoding is defined
      if (characterEncoding == null) {
        dictionary = new PlainTextDictionary(core.getResourceLoader().openResource(sourceLocation));
      } else {
        dictionary = new PlainTextDictionary(new InputStreamReader(core.getResourceLoader().openResource(sourceLocation), characterEncoding));
      }
    }


  } catch (IOException e) {
    log.error( "Unable to load spellings", e);
  }
}
 
Example #8
Source File: DirectoryTaxonomyWriter.java    From lucene-solr with Apache License 2.0 3 votes vote down vote up
/**
 * Create the {@link IndexWriterConfig} that would be used for opening the internal index writer.
 * <br>Extensions can configure the {@link IndexWriter} as they see fit,
 * including setting a {@link org.apache.lucene.index.MergeScheduler merge-scheduler}, or
 * {@link org.apache.lucene.index.IndexDeletionPolicy deletion-policy}, different RAM size
 * etc.<br>
 * <br><b>NOTE:</b> internal docids of the configured index must not be altered.
 * For that, categories are never deleted from the taxonomy index.
 * In addition, merge policy in effect must not merge none adjacent segments.
 * 
 * @see #openIndexWriter(Directory, IndexWriterConfig)
 * 
 * @param openMode see {@link OpenMode}
 */
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
  // TODO: should we use a more optimized Codec?
  // The taxonomy has a unique structure, where each term is associated with one document

  // Make sure we use a MergePolicy which always merges adjacent segments and thus
  // keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
  return new IndexWriterConfig(null).setOpenMode(openMode).setMergePolicy(
      new LogByteSizeMergePolicy());
}