org.apache.lucene.search.spell.SpellChecker Java Examples
The following examples show how to use
org.apache.lucene.search.spell.SpellChecker.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VocabularyNeo4jImpl.java From SciGraph with Apache License 2.0 | 6 votes |
@Inject public VocabularyNeo4jImpl(GraphDatabaseService graph, @Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil, NodeTransformer transformer) throws IOException { this.graph = graph; this.curieUtil = curieUtil; this.transformer = transformer; if (null != neo4jLocation) { Directory indexDirectory = FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/node/node_auto_index")) .toPath()); Directory spellDirectory = FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/spellchecker")) .toPath()); spellChecker = new SpellChecker(spellDirectory); try (IndexReader reader = DirectoryReader.open(indexDirectory)) { IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer()); spellChecker.indexDictionary(new LuceneDictionary(reader, NodeProperties.LABEL + LuceneUtils.EXACT_SUFFIX), config, true); } } else { spellChecker = null; } }
Example #2
Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test @SuppressWarnings({"unchecked"}) public void testAlternateDistance() throws Exception { TestSpellChecker checker = new TestSpellChecker(); @SuppressWarnings({"rawtypes"}) NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File indexDir = createTempDir().toFile(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); spellchecker.add(AbstractLuceneSpellChecker.STRING_DISTANCE, JaroWinklerDistance.class.getName()); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); h.getCore().withSearcher(searcher -> { checker.build(core, searcher); SpellChecker sc = checker.getSpellChecker(); assertTrue("sc is null and it shouldn't be", sc != null); StringDistance sd = sc.getStringDistance(); assertTrue("sd is null and it shouldn't be", sd != null); assertTrue("sd is not an instance of " + JaroWinklerDistance.class.getName(), sd instanceof JaroWinklerDistance); return null; }); }
Example #3
Source File: TreatmentCurator.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static SpellChecker createIndexSpellchecker(@NotNull Directory index) throws IOException { Directory spellCheckerDirectory = new RAMDirectory(); IndexReader indexReader = DirectoryReader.open(index); Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); Dictionary dictionary = new HighFrequencyDictionary(indexReader, DRUG_TERMS_FIELD, 0.0f); SpellChecker spellChecker = new SpellChecker(spellCheckerDirectory); spellChecker.indexDictionary(dictionary, config, false); spellChecker.setAccuracy(SPELLCHECK_ACCURACY); return spellChecker; }
Example #4
Source File: TreatmentCurator.java From hmftools with GNU General Public License v3.0 | 5 votes |
@NotNull private static Analyzer spellcheckAnalyzer(@NotNull SpellChecker spellChecker) { return new Analyzer() { @Override protected TokenStreamComponents createComponents(@NotNull String field) { Tokenizer source = new WhitespaceTokenizer(); source.setReader(new StringReader(field)); SpellCheckerTokenFilter spellCheckFilter = new SpellCheckerTokenFilter(defaultTokenFilter(source), spellChecker); TokenFilter concatenatingFilter = new ConcatenatingFilter(spellCheckFilter, ' '); return new TokenStreamComponents(source, concatenatingFilter); } }; }
Example #5
Source File: IndexHelper.java From document-management-system with GNU General Public License v2.0 | 5 votes |
protected void buildSpellCheckerIndex(SearchFactory searchFactory) { IndexReader reader = null; Directory dir = null; long _entr = System.currentTimeMillis(); File spellCheckIndexDir = new File("lucene_index/spellcheck"); log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath()); ReaderProvider readerProvider = searchFactory.getReaderProvider(); try { reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]); dir = FSDirectory.open(spellCheckIndexDir); SpellChecker spell = new SpellChecker(dir); spell.clearIndex(); spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD)); spell.close(); dir.close(); dir = null; long _exit = System.currentTimeMillis(); log.info("Took {1} (ms) to build SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath(), String.valueOf((_exit - _entr))); } catch (Exception exc) { log.error("Failed to build spell checker index!", exc); } finally { if (dir != null) { try { dir.close(); } catch (Exception zzz) { } } if (reader != null) { readerProvider.closeReader(reader); } } }
Example #6
Source File: SearchSuggester.java From webdsl with Apache License 2.0 | 5 votes |
public static synchronized void forceSpellCheckerRenewal(String indexPath){ SpellChecker sp = spellCheckMap.get(indexPath); if(sp!=null) { try { sp.close(); } catch (IOException e) { org.webdsl.logging.Logger.error("EXCEPTION",e); } } spellCheckMap.remove(indexPath); }
Example #7
Source File: Index.java From olat with Apache License 2.0 | 5 votes |
private void createSpellCheckSearcher(boolean indexNewlyBuilt) { try { log.info("Create spell checker on new index ..."); synchronized (createSpellCheckSearcherLock) {// o_clusterOK by:pb if service is only configured on one vm, which is recommended way closeSpellCheckSearcher(); if (indexNewlyBuilt) { replaceSpellCheckFiles(); } final File spellDictionaryFile = new File(searchModule.getSpellCheckerIndexPath()); final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile); if (!IndexReader.indexExists(spellIndexDirectory)) { log.error("SpellChecker index does not exist [" + spellDictionaryFile.getAbsolutePath() + "]"); return; } spellChecker = new SpellChecker(spellIndexDirectory); spellChecker.setAccuracy(0.7f); } if (indexNewlyBuilt) { log.info("Cleanup old spell checker index files ..."); cleanupSpellCheckFiles(); } } catch (IOException ex) { log.error("SpellChecker couldn't be created.", ex); } }
Example #8
Source File: AbstractLuceneSpellChecker.java From lucene-solr with Apache License 2.0 | 4 votes |
public SpellChecker getSpellChecker() { return spellChecker; }
Example #9
Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0 | 4 votes |
@Override public SpellChecker getSpellChecker(){ return spellChecker; }
Example #10
Source File: SpellCheckerTokenFilter.java From hmftools with GNU General Public License v3.0 | 4 votes |
SpellCheckerTokenFilter(@NotNull final TokenStream tokenStream, @NotNull final SpellChecker spellChecker) { super(tokenStream); this.spellChecker = spellChecker; }
Example #11
Source File: IndexHelper.java From document-management-system with GNU General Public License v2.0 | 4 votes |
public void updateSpellCheckerIndex(NodeDocumentVersion nDocVer) { log.info("Observed Wine added/updated event for {1} from Thread {0}", Thread.currentThread().getName(), String.valueOf(nDocVer)); String text = (nDocVer != null) ? nDocVer.getText() : null; if (text != null) { Dictionary dictionary = null; try { FullTextEntityManager ftEm = (FullTextEntityManager) entityManager; SearchFactory searchFactory = ftEm.getSearchFactory(); dictionary = new SetDictionary(text, searchFactory.getAnalyzer("wine_en")); } catch (IOException ioExc) { log.error("Failed to analyze dictionary text {0} from Wine {1} to update spell checker due to: {2}" + text + nDocVer.getUuid() + ioExc.toString()); } if (dictionary != null) { Directory dir = null; // only allow one thread to update the index at a time ... // the Dictionary is pre-computed, so it should happen quickly // ... // this synchronized approach only works because this component // is application-scoped synchronized (this) { try { dir = FSDirectory.open(new File("lucene_index/spellcheck")); SpellChecker spell = new SpellChecker(dir); spell.indexDictionary(dictionary); spell.close(); log.info("Successfully updated the spell checker index after Document added/updated."); } catch (Exception exc) { log.error("Failed to update the spell checker index!", exc); } finally { if (dir != null) { try { dir.close(); } catch (Exception zzz) { } } } } } } }
Example #12
Source File: SearchSuggester.java From webdsl with Apache License 2.0 | 4 votes |
@SuppressWarnings("deprecation") public static ArrayList<String> findSpellSuggestionsForField(Class<?> entityClass, String baseDir, String suggestedField, int maxSuggestionCount, float accuracy, boolean morePopular, Analyzer analyzer, String toSuggestOn) { if (toSuggestOn == null || toSuggestOn.isEmpty()) return new ArrayList<String>(); SpellChecker spellChecker = null; IndexReader fieldIR = null; boolean hasSuggestions = false; String indexPath = baseDir+suggestedField; try { spellChecker = getSpellChecker(indexPath); spellChecker.setAccuracy(accuracy); TokenStream tokenStream = analyzer.tokenStream(suggestedField, new StringReader( toSuggestOn)); CharTermAttributeImpl ta = (CharTermAttributeImpl) tokenStream .addAttribute(CharTermAttribute.class); ArrayList<String[]> allSuggestions = new ArrayList<String[]>(); String word; String[] suggestions; while (tokenStream.incrementToken()) { word = ta.term(); suggestions = null; if (!morePopular) { suggestions = spellChecker.suggestSimilar(word, maxSuggestionCount); } else { if (fieldIR == null) fieldIR = getIndexReader(entityClass); suggestions = spellChecker.suggestSimilar(word, maxSuggestionCount, fieldIR, suggestedField, true); } if (suggestions == null || suggestions.length == 0) suggestions = new String[] { word }; else hasSuggestions = true; allSuggestions.add(suggestions); } if (!hasSuggestions) // if no suggestions were found, return empty list return new ArrayList<String>(); else return formSuggestions(maxSuggestionCount, allSuggestions); } catch (Exception e) { org.webdsl.logging.Logger.error("EXCEPTION",e); //if something goes wrong, close and remove current SpellChecker instance, so it gets renewed try { spellChecker.close(); } catch (IOException e2) { org.webdsl.logging.Logger.error("EXCEPTION",e2); } spellCheckMap.remove(indexPath); } finally { searchfactory.getReaderProvider().closeReader(fieldIR); } return new ArrayList<String>(); }
Example #13
Source File: SearchSpellChecker.java From olat with Apache License 2.0 | 4 votes |
/** * Creates a new spell-check index based on search-index */ public void createSpellIndex() { if (isSpellCheckEnabled) { IndexReader indexReader = null; try { log.info("Start generating Spell-Index..."); long startSpellIndexTime = 0; if (log.isDebugEnabled()) { startSpellIndexTime = System.currentTimeMillis(); } final Directory indexDir = FSDirectory.open(new File(indexPath)); indexReader = IndexReader.open(indexDir); // 1. Create content spellIndex final File spellDictionaryFile = new File(spellDictionaryPath); final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory); final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME); contentSpellChecker.indexDictionary(contentDictionary); // 2. Create title spellIndex final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory); final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME); titleSpellChecker.indexDictionary(titleDictionary); // 3. Create description spellIndex final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory); final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME); descriptionSpellChecker.indexDictionary(descriptionDictionary); // 4. Create author spellIndex final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory); final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME); authorSpellChecker.indexDictionary(authorDictionary); // Merge all part spell indexes (content,title etc.) to one common spell index final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED); final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory }; merger.addIndexesNoOptimize(directories); merger.optimize(); merger.close(); spellChecker = new SpellChecker(spellIndexDirectory); spellChecker.setAccuracy(0.7f); if (log.isDebugEnabled()) { log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms"); } log.info("New generated Spell-Index ready to use."); } catch (final IOException ioEx) { log.warn("Can not create SpellIndex", ioEx); } finally { if (indexReader != null) { try { indexReader.close(); } catch (final IOException e) { log.warn("Can not close indexReader properly", e); } } } } }
Example #14
Source File: SearchSpellChecker.java From olat with Apache License 2.0 | 4 votes |
/** * Creates a new spell-check index based on search-index */ public static void createSpellIndex(final SearchModule searchModule) { final String tempSearchIndexPath = searchModule.getTempSearchIndexPath(); final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath(); IndexReader indexReader = null; try { log.info("Start generating spell check index ..."); long startSpellIndexTime = 0; if (log.isDebugEnabled()) { startSpellIndexTime = System.currentTimeMillis(); } final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main")); indexReader = IndexReader.open(indexDir); // 1. Create content spellIndex log.info("Generating 'content' spell check index ..."); final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH); FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true); final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath); final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory); final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME); contentSpellChecker.indexDictionary(contentDictionary); // 2. Create title spellIndex log.info("Generating 'title' spell check index ..."); final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH); FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true); final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath); final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory); final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME); titleSpellChecker.indexDictionary(titleDictionary); // 3. Create description spellIndex log.info("Generating 'description' spell check index ..."); final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH); FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true); final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath); final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory); final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME); descriptionSpellChecker.indexDictionary(descriptionDictionary); // 4. Create author spellIndex log.info("Generating 'author' spell check index ..."); final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH); FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true); final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath); final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory); final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME); authorSpellChecker.indexDictionary(authorDictionary); log.info("Merging spell check indices ..."); // Merge all part spell indexes (content,title etc.) to one common spell index final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath); FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true); final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir); final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED); final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory }; merger.addIndexesNoOptimize(directories); log.info("Optimizing spell check index ..."); merger.optimize(); merger.close(); tempSpellIndexDirectory.close(); contentSpellChecker.close(); contentSpellIndexDirectory.close(); titleSpellChecker.close(); titleSpellIndexDirectory.close(); descriptionSpellChecker.close(); descriptionSpellIndexDirectory.close(); authorSpellChecker.close(); authorSpellIndexDirectory.close(); FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true); FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true); FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true); FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true); if (log.isDebugEnabled()) { log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms."); } } catch (final IOException ioEx) { log.warn("Can not create spell check index.", ioEx); } finally { if (indexReader != null) { try { indexReader.close(); } catch (final IOException e) { log.warn("Can not close indexReader properly", e); } } } }