org.apache.lucene.search.spell.LuceneDictionary Java Examples

The following examples show how to use org.apache.lucene.search.spell.LuceneDictionary. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VocabularyNeo4jImpl.java    From SciGraph with Apache License 2.0 6 votes vote down vote up
@Inject
public VocabularyNeo4jImpl(GraphDatabaseService graph,
    @Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil,
    NodeTransformer transformer) throws IOException {
  this.graph = graph;
  this.curieUtil = curieUtil;
  this.transformer = transformer;
  if (null != neo4jLocation) {
    Directory indexDirectory =
        FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/node/node_auto_index"))
            .toPath());
    Directory spellDirectory =
        FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/spellchecker"))
            .toPath());
    spellChecker = new SpellChecker(spellDirectory);
    try (IndexReader reader = DirectoryReader.open(indexDirectory)) {
      IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer());
      spellChecker.indexDictionary(new LuceneDictionary(reader, NodeProperties.LABEL
          + LuceneUtils.EXACT_SUFFIX), config, true);
    }
  } else {
    spellChecker = null;
  }
}
 
Example #2
Source File: IndexHelper.java    From document-management-system with GNU General Public License v2.0 5 votes vote down vote up
protected void buildSpellCheckerIndex(SearchFactory searchFactory) {
	IndexReader reader = null;
	Directory dir = null;
	long _entr = System.currentTimeMillis();
	File spellCheckIndexDir = new File("lucene_index/spellcheck");
	log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath());
	ReaderProvider readerProvider = searchFactory.getReaderProvider();

	try {
		reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]);
		dir = FSDirectory.open(spellCheckIndexDir);
		SpellChecker spell = new SpellChecker(dir);
		spell.clearIndex();
		spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD));
		spell.close();
		dir.close();
		dir = null;
		long _exit = System.currentTimeMillis();
		log.info("Took {1} (ms) to build SpellChecker index in {0}",
				spellCheckIndexDir.getAbsolutePath(), String.valueOf((_exit - _entr)));
	} catch (Exception exc) {
		log.error("Failed to build spell checker index!", exc);
	} finally {
		if (dir != null) {
			try {
				dir.close();
			} catch (Exception zzz) {
			}
		}
		if (reader != null) {
			readerProvider.closeReader(reader);
		}
	}
}
 
Example #3
Source File: AutoCompleter.java    From webdsl with Apache License 2.0 5 votes vote down vote up
/**
  * Indexes the data from the given reader.
* @param reader Source index reader, from which autocomplete words are obtained for the defined field
* @param field the field of the source index reader to index for autocompletion
* @param mergeFactor mergeFactor to use when indexing
* @param ramMB the max amount or memory in MB to use
* @param optimize whether or not the autocomplete index should be optimized
  * @throws AlreadyClosedException if the Autocompleter is already closed
  * @throws IOException
  */
 public final void indexDictionary(IndexReader reader, String field, int mergeFactor, int ramMB, boolean optimize) throws IOException {
   synchronized (modifyCurrentIndexLock) {
     ensureOpen();
     final Directory dir = this.autoCompleteIndex;
     final Dictionary dict = new LuceneDictionary(reader, field);
     final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
     IndexSearcher indexSearcher = obtainSearcher();
     final List<IndexReader> readers = new ArrayList<IndexReader>();

     if (searcher.maxDoc() > 0) {
       ReaderUtil.gatherSubReaders(readers, searcher.getIndexReader());
     }

     //clear the index
     writer.deleteAll();

     try {
       Iterator<String> iter = dict.getWordsIterator();

     while (iter.hasNext()) {
         String word = iter.next();

         // ok index the word
         Document doc = createDocument(word, reader.docFreq(new Term(field, word)));
         writer.addDocument(doc);
       }
     } finally {
       releaseSearcher(indexSearcher);
     }
     // close writer
     if (optimize)
       writer.optimize();
     writer.close();
     // also re-open the autocomplete index to see our own changes when the next suggestion
     // is fetched:
     swapSearcher(dir);
   }
 }
 
Example #4
Source File: SearchSpellChecker.java    From olat with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a new spell-check index based on search-index
 */
public void createSpellIndex() {
    if (isSpellCheckEnabled) {
        IndexReader indexReader = null;
        try {
            log.info("Start generating Spell-Index...");
            long startSpellIndexTime = 0;
            if (log.isDebugEnabled()) {
                startSpellIndexTime = System.currentTimeMillis();
            }
            final Directory indexDir = FSDirectory.open(new File(indexPath));
            indexReader = IndexReader.open(indexDir);
            // 1. Create content spellIndex
            final File spellDictionaryFile = new File(spellDictionaryPath);
            final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true
            final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
            final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
            contentSpellChecker.indexDictionary(contentDictionary);
            // 2. Create title spellIndex
            final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true
            final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
            final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
            titleSpellChecker.indexDictionary(titleDictionary);
            // 3. Create description spellIndex
            final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
            final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
            final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            descriptionSpellChecker.indexDictionary(descriptionDictionary);
            // 4. Create author spellIndex
            final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true
            final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
            final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
            authorSpellChecker.indexDictionary(authorDictionary);

            // Merge all part spell indexes (content,title etc.) to one common spell index
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
            final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
            merger.addIndexesNoOptimize(directories);
            merger.optimize();
            merger.close();
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
            if (log.isDebugEnabled()) {
                log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
            }
            log.info("New generated Spell-Index ready to use.");
        } catch (final IOException ioEx) {
            log.warn("Can not create SpellIndex", ioEx);
        } finally {
            if (indexReader != null) {
                try {
                    indexReader.close();
                } catch (final IOException e) {
                    log.warn("Can not close indexReader properly", e);
                }
            }
        }
    }
}
 
Example #5
Source File: SearchSpellChecker.java    From olat with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a new spell-check index based on search-index
 */
public static void createSpellIndex(final SearchModule searchModule) {
    final String tempSearchIndexPath = searchModule.getTempSearchIndexPath();
    final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath();

    IndexReader indexReader = null;
    try {
        log.info("Start generating spell check index ...");

        long startSpellIndexTime = 0;
        if (log.isDebugEnabled()) {
            startSpellIndexTime = System.currentTimeMillis();
        }
        final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main"));
        indexReader = IndexReader.open(indexDir);

        // 1. Create content spellIndex
        log.info("Generating 'content' spell check index ...");
        final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH);
        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath);
        final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
        final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
        contentSpellChecker.indexDictionary(contentDictionary);

        // 2. Create title spellIndex
        log.info("Generating 'title' spell check index ...");
        final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath);
        final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
        final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
        titleSpellChecker.indexDictionary(titleDictionary);

        // 3. Create description spellIndex
        log.info("Generating 'description' spell check index ...");
        final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath);
        final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
        final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
        descriptionSpellChecker.indexDictionary(descriptionDictionary);

        // 4. Create author spellIndex
        log.info("Generating 'author' spell check index ...");
        final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);
        final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath);
        final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
        final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
        authorSpellChecker.indexDictionary(authorDictionary);

        log.info("Merging spell check indices ...");
        // Merge all part spell indexes (content,title etc.) to one common spell index
        final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath);
        FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true);
        final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir);
        final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
        final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
        merger.addIndexesNoOptimize(directories);

        log.info("Optimizing spell check index ...");
        merger.optimize();
        merger.close();

        tempSpellIndexDirectory.close();

        contentSpellChecker.close();
        contentSpellIndexDirectory.close();

        titleSpellChecker.close();
        titleSpellIndexDirectory.close();

        descriptionSpellChecker.close();
        descriptionSpellIndexDirectory.close();

        authorSpellChecker.close();
        authorSpellIndexDirectory.close();

        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);

        if (log.isDebugEnabled()) {
            log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms.");
        }
    } catch (final IOException ioEx) {
        log.warn("Can not create spell check index.", ioEx);
    } finally {
        if (indexReader != null) {
            try {
                indexReader.close();
            } catch (final IOException e) {
                log.warn("Can not close indexReader properly", e);
            }
        }
    }
}