Java Code Examples for org.apache.lucene.index.IndexWriterConfig.OpenMode#CREATE

The following examples show how to use org.apache.lucene.index.IndexWriterConfig.OpenMode#CREATE . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: IndexFiles.java    From Java-Data-Science-Cookbook with MIT License 6 votes vote down vote up
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
	try (InputStream stream = Files.newInputStream(file)) {
		Document doc = new Document();
		Field pathField = new StringField("path", file.toString(), Field.Store.YES);
		doc.add(pathField);
		doc.add(new LongPoint("modified", lastModified));
		doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

		if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
			System.out.println("adding " + file);
			writer.addDocument(doc);
		} else {
			System.out.println("updating " + file);
			writer.updateDocument(new Term("path", file.toString()), doc);
		}
	}
}
 
Example 2
Source File: PersistentSnapshotDeletionPolicy.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * {@link PersistentSnapshotDeletionPolicy} wraps another
 * {@link IndexDeletionPolicy} to enable flexible snapshotting.
 * 
 * @param primary
 *          the {@link IndexDeletionPolicy} that is used on non-snapshotted
 *          commits. Snapshotted commits, by definition, are not deleted until
 *          explicitly released via {@link #release}.
 * @param dir
 *          the {@link Directory} which will be used to persist the snapshots
 *          information.
 * @param mode
 *          specifies whether a new index should be created, deleting all
 *          existing snapshots information (immediately), or open an existing
 *          index, initializing the class with the snapshots information.
 */
public PersistentSnapshotDeletionPolicy(IndexDeletionPolicy primary,
    Directory dir, OpenMode mode) throws IOException {
  super(primary);

  this.dir = dir;

  if (mode == OpenMode.CREATE) {
    clearPriorSnapshots();
  }

  loadPriorSnapshots();

  if (mode == OpenMode.APPEND && nextWriteGen == 0) {
    throw new IllegalStateException("no snapshots stored in this directory");
  }
}
 
Example 3
Source File: TestMultipleIndexFields.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testDefault() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);
  FacetsConfig config = getConfig();

  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  // Obtain facets results and hand-test them
  assertCorrectResults(getTaxonomyFacetCounts(tr, config, sfc));

  assertOrdinalsExist("$facets", ir);

  iw.close();
  IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}
 
Example 4
Source File: TestMultipleIndexFields.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testCustom() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

  FacetsConfig config = getConfig();
  config.setIndexFieldName("Author", "$author");
  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  Map<String,Facets> facetsMap = new HashMap<>();
  facetsMap.put("Author", getTaxonomyFacetCounts(tr, config, sfc, "$author"));
  Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));

  // Obtain facets results and hand-test them
  assertCorrectResults(facets);

  assertOrdinalsExist("$facets", ir);
  assertOrdinalsExist("$author", ir);

  iw.close();
  IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}
 
Example 5
Source File: TestDirectoryTaxonomyWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testRecreateAndRefresh() throws Exception {
  // DirTaxoWriter lost the INDEX_EPOCH property if it was opened in
  // CREATE_OR_APPEND (or commit(userData) called twice), which could lead to
  // DirTaxoReader succeeding to refresh().
  try (Directory dir = newDirectory()) {

    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
    touchTaxo(taxoWriter, new FacetLabel("a"));

    TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);

    touchTaxo(taxoWriter, new FacetLabel("b"));

    TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader);
    taxoReader.close();
    taxoReader = newtr;
    assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));

    // now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again.
    taxoWriter.close();

    taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
    touchTaxo(taxoWriter, new FacetLabel("c"));
    taxoWriter.close();

    taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
    touchTaxo(taxoWriter, new FacetLabel("d"));
    taxoWriter.close();

    newtr = TaxonomyReader.openIfChanged(taxoReader);
    taxoReader.close();
    taxoReader = newtr;
    assertEquals(2, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
    taxoReader.close();
  }
}
 
Example 6
Source File: TestDirectoryTaxonomyWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testReaderFreshness() throws Exception {
  // ensures that the internal index reader is always kept fresh. Previously,
  // this simple scenario failed, if the cache just evicted the category that
  // is being added.
  Directory dir = newDirectory();
  DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, NO_OP_CACHE);
  int o1 = taxoWriter.addCategory(new FacetLabel("a"));
  int o2 = taxoWriter.addCategory(new FacetLabel("a"));
  assertTrue("ordinal for same category that is added twice should be the same !", o1 == o2);
  taxoWriter.close();
  dir.close();
}
 
Example 7
Source File: TestDirectoryTaxonomyReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testOpenIfChangedReuseAfterRecreate() throws Exception {
  // tests that if the taxonomy is recreated, no data is reused from the previous taxonomy
  Directory dir = newDirectory();
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
  FacetLabel cp_a = new FacetLabel("a");
  writer.addCategory(cp_a);
  writer.close();
  
  DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(dir);
  // fill r1's caches
  assertEquals(1, r1.getOrdinal(cp_a));
  assertEquals(cp_a, r1.getPath(1));
  
  // now recreate, add a different category
  writer = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
  FacetLabel cp_b = new FacetLabel("b");
  writer.addCategory(cp_b);
  writer.close();
  
  DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
  assertNotNull(r2);
  
  // fill r2's caches
  assertEquals(1, r2.getOrdinal(cp_b));
  assertEquals(cp_b, r2.getPath(1));
  
  // check that r1 doesn't see cp_b
  assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
  assertEquals(cp_a, r1.getPath(1));

  // check that r2 doesn't see cp_a
  assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
  assertEquals(cp_b, r2.getPath(1));

  r2.close();
  r1.close();
  dir.close();
}
 
Example 8
Source File: TestPersistentSnapshotDeletionPolicy.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoSnapshotInfos() throws Exception {
  Directory dir = newDirectory();
  new PersistentSnapshotDeletionPolicy(
      new KeepOnlyLastCommitDeletionPolicy(), dir, OpenMode.CREATE);
  dir.close();
}
 
Example 9
Source File: SolrSnapshotMetaDataManager.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * A constructor.
 *
 * @param dir The directory where the snapshot meta-data is stored.
 * @param mode CREATE If previous meta-data should be erased.
 *             APPEND If previous meta-data should be read and updated.
 *             CREATE_OR_APPEND Creates a new meta-data structure if one does not exist
 *                              Updates the existing structure if one exists.
 * @throws IOException in case of errors.
 */
public SolrSnapshotMetaDataManager(SolrCore solrCore, Directory dir, OpenMode mode) throws IOException {
  this.solrCore = solrCore;
  this.dir = dir;

  if (mode == OpenMode.CREATE) {
    deleteSnapshotMetadataFiles();
  }

  loadFromSnapshotMetadataFile();

  if (mode == OpenMode.APPEND && nextWriteGen == 0) {
    throw new IllegalStateException("no snapshots stored in this directory");
  }
}
 
Example 10
Source File: IndexFiles.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
  try (InputStream stream = Files.newInputStream(file)) {
    // make a new, empty document
    Document doc = new Document();
    
    // Add the path of the file as a field named "path".  Use a
    // field that is indexed (i.e. searchable), but don't tokenize 
    // the field into separate words and don't index term frequency
    // or positional information:
    Field pathField = new StringField("path", file.toString(), Field.Store.YES);
    doc.add(pathField);
    
    // Add the last modified date of the file a field named "modified".
    // Use a LongPoint that is indexed (i.e. efficiently filterable with
    // PointRangeQuery).  This indexes to milli-second resolution, which
    // is often too fine.  You could instead create a number based on
    // year/month/day/hour/minutes/seconds, down the resolution you require.
    // For example the long value 2011021714 would mean
    // February 17, 2011, 2-3 PM.
    doc.add(new LongPoint("modified", lastModified));
    
    // Add the contents of the file to a field named "contents".  Specify a Reader,
    // so that the text of the file is tokenized and indexed, but not stored.
    // Note that FileReader expects the file to be in UTF-8 encoding.
    // If that's not the case searching for special characters will fail.
    doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));
    
    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
      // New index, so we just add the document (no old document can be there):
      System.out.println("adding " + file);
      writer.addDocument(doc);
    } else {
      // Existing index (an old copy of this document may have been indexed) so 
      // we use updateDocument instead to replace the old one matching the exact 
      // path, if present:
      System.out.println("updating " + file);
      writer.updateDocument(new Term("path", file.toString()), doc);
    }
  }
}
 
Example 11
Source File: TestMultipleIndexFields.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testTwoCustomsSameField() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

  FacetsConfig config = getConfig();
  config.setIndexFieldName("Band", "$music");
  config.setIndexFieldName("Composer", "$music");
  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  Map<String,Facets> facetsMap = new HashMap<>();
  Facets facets2 = getTaxonomyFacetCounts(tr, config, sfc, "$music");
  facetsMap.put("Band", facets2);
  facetsMap.put("Composer", facets2);
  Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));

  // Obtain facets results and hand-test them
  assertCorrectResults(facets);

  assertOrdinalsExist("$facets", ir);
  assertOrdinalsExist("$music", ir);
  assertOrdinalsExist("$music", ir);

  iw.close();
  IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}
 
Example 12
Source File: TestMultipleIndexFields.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testDifferentFieldsAndText() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();

  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

  FacetsConfig config = getConfig();
  config.setIndexFieldName("Band", "$bands");
  config.setIndexFieldName("Composer", "$composers");
  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  Map<String,Facets> facetsMap = new HashMap<>();
  facetsMap.put("Band", getTaxonomyFacetCounts(tr, config, sfc, "$bands"));
  facetsMap.put("Composer", getTaxonomyFacetCounts(tr, config, sfc, "$composers"));
  Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));

  // Obtain facets results and hand-test them
  assertCorrectResults(facets);
  assertOrdinalsExist("$facets", ir);
  assertOrdinalsExist("$bands", ir);
  assertOrdinalsExist("$composers", ir);

  iw.close();
  IOUtils.close(tr, ir, tw, indexDir, taxoDir);
}
 
Example 13
Source File: TestMultipleIndexFields.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testSomeSameSomeDifferent() throws Exception {
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  // create and open an index writer
  RandomIndexWriter iw = new RandomIndexWriter(random(), indexDir, newIndexWriterConfig(
      new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  // create and open a taxonomy writer
  TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir, OpenMode.CREATE);

  FacetsConfig config = getConfig();
  config.setIndexFieldName("Band", "$music");
  config.setIndexFieldName("Composer", "$music");
  config.setIndexFieldName("Author", "$literature");
  seedIndex(tw, iw, config);

  IndexReader ir = iw.getReader();
  tw.commit();

  // prepare index reader and taxonomy.
  TaxonomyReader tr = new DirectoryTaxonomyReader(taxoDir);

  // prepare searcher to search against
  IndexSearcher searcher = newSearcher(ir);

  FacetsCollector sfc = performSearch(tr, ir, searcher);

  Map<String,Facets> facetsMap = new HashMap<>();
  Facets facets2 = getTaxonomyFacetCounts(tr, config, sfc, "$music");
  facetsMap.put("Band", facets2);
  facetsMap.put("Composer", facets2);
  facetsMap.put("Author", getTaxonomyFacetCounts(tr, config, sfc, "$literature"));
  Facets facets = new MultiFacets(facetsMap, getTaxonomyFacetCounts(tr, config, sfc));

  // Obtain facets results and hand-test them
  assertCorrectResults(facets);
  assertOrdinalsExist("$music", ir);
  assertOrdinalsExist("$literature", ir);

  iw.close();
  IOUtils.close(tr, ir, iw, tw, indexDir, taxoDir);
}
 
Example 14
Source File: TestDirectoryTaxonomyReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
  Directory dir = null;
  TaxonomyWriter tw = null;
  TaxonomyReader tr = null;
  
  // prepare a few categories
  int  n = 10;
  FacetLabel[] cp = new FacetLabel[n];
  for (int i=0; i<n; i++) {
    cp[i] = new FacetLabel("a", Integer.toString(i));
  }
  
  try {
    dir = newDirectory();
    
    tw = new DirectoryTaxonomyWriter(dir);
    tw.addCategory(new FacetLabel("a"));
    tw.close();
    
    tr = new DirectoryTaxonomyReader(dir);
    int baseNumCategories = tr.getSize();
    
    for (int i=0; i<n; i++) {
      int k = random.nextInt(n);
      tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
      for (int j = 0; j <= k; j++) {
        tw.addCategory(cp[j]);
      }
      tw.close();
      if (closeReader) {
        tr.close();
        tr = new DirectoryTaxonomyReader(dir);
      } else {
        TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
        assertNotNull(newtr);
        tr.close();
        tr = newtr;
      }
      assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumCategories + 1 + k, tr.getSize());
    }
  } finally {
    IOUtils.close(tr, tw, dir);
  }
}
 
Example 15
Source File: TestPersistentSnapshotDeletionPolicy.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private SnapshotDeletionPolicy getDeletionPolicy(Directory dir) throws IOException {
  return new PersistentSnapshotDeletionPolicy(
      new KeepOnlyLastCommitDeletionPolicy(), dir, OpenMode.CREATE);
}
 
Example 16
Source File: BuildIndex.java    From fnlp with GNU Lesser General Public License v3.0 4 votes vote down vote up
/**
 * @param args
 * @throws IOException 
 * @throws LoadModelException 
 */
public static void main(String[] args) throws IOException, LoadModelException {
	String indexPath = "../tmp/lucene";
	System.out.println("Indexing to directory '" + indexPath  + "'...");
	Date start = new Date();
	Directory dir = FSDirectory.open(new File(indexPath));//Dirctory dir-->FSDirectory
	//需要先初始化 CNFactory
	CNFactory factory = CNFactory.getInstance("../models",Models.SEG_TAG);
	Analyzer analyzer = new FNLPAnalyzer(Version.LUCENE_47);
	IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
	iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
	IndexWriter writer = new IndexWriter(dir, iwc);

	String[] strs = new String[]{
			"终端的保修期为一年。",
			"凡在保修期内非人为损坏,均可免费保修。",
			"人为损坏的终端将视情况收取维修费用。",
			"中国"
	};
	//Date start = new Date();
	for(int i=0;i<strs.length;i++){

		Document doc = new Document();

		Field field = new TextField("content", strs[i] , Field.Store.YES);
		doc.add(field);
		if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
			writer.addDocument(doc);
		} else {
			writer.updateDocument(new Term("content",strs[i]), doc);
		}
	}
	writer.close();
	
	//!!这句话是不是漏了
	//dir.close();
	//!!这句话是不是漏了

	Date end = new Date();
	System.out.println(end.getTime() - start.getTime() + " total milliseconds");

}