org.apache.lucene.search.spell.SpellChecker Java Examples

The following examples show how to use org.apache.lucene.search.spell.SpellChecker. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VocabularyNeo4jImpl.java    From SciGraph with Apache License 2.0 6 votes vote down vote up
@Inject
public VocabularyNeo4jImpl(GraphDatabaseService graph,
    @Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil,
    NodeTransformer transformer) throws IOException {
  this.graph = graph;
  this.curieUtil = curieUtil;
  this.transformer = transformer;
  if (null != neo4jLocation) {
    Directory indexDirectory =
        FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/node/node_auto_index"))
            .toPath());
    Directory spellDirectory =
        FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/spellchecker"))
            .toPath());
    spellChecker = new SpellChecker(spellDirectory);
    try (IndexReader reader = DirectoryReader.open(indexDirectory)) {
      IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer());
      spellChecker.indexDictionary(new LuceneDictionary(reader, NodeProperties.LABEL
          + LuceneUtils.EXACT_SUFFIX), config, true);
    }
  } else {
    spellChecker = null;
  }
}
 
Example #2
Source File: IndexBasedSpellCheckerTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings({"unchecked"})
public void testAlternateDistance() throws Exception {
  TestSpellChecker checker = new TestSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = createTempDir().toFile();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  spellchecker.add(AbstractLuceneSpellChecker.STRING_DISTANCE, JaroWinklerDistance.class.getName());
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);
    SpellChecker sc = checker.getSpellChecker();
    assertTrue("sc is null and it shouldn't be", sc != null);
    StringDistance sd = sc.getStringDistance();
    assertTrue("sd is null and it shouldn't be", sd != null);
    assertTrue("sd is not an instance of " + JaroWinklerDistance.class.getName(), sd instanceof JaroWinklerDistance);
    return null;
  });

}
 
Example #3
Source File: TreatmentCurator.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static SpellChecker createIndexSpellchecker(@NotNull Directory index) throws IOException {
    Directory spellCheckerDirectory = new RAMDirectory();
    IndexReader indexReader = DirectoryReader.open(index);
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    Dictionary dictionary = new HighFrequencyDictionary(indexReader, DRUG_TERMS_FIELD, 0.0f);
    SpellChecker spellChecker = new SpellChecker(spellCheckerDirectory);

    spellChecker.indexDictionary(dictionary, config, false);
    spellChecker.setAccuracy(SPELLCHECK_ACCURACY);
    return spellChecker;
}
 
Example #4
Source File: TreatmentCurator.java    From hmftools with GNU General Public License v3.0 5 votes vote down vote up
@NotNull
private static Analyzer spellcheckAnalyzer(@NotNull SpellChecker spellChecker) {
    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(@NotNull String field) {
            Tokenizer source = new WhitespaceTokenizer();
            source.setReader(new StringReader(field));
            SpellCheckerTokenFilter spellCheckFilter = new SpellCheckerTokenFilter(defaultTokenFilter(source), spellChecker);
            TokenFilter concatenatingFilter = new ConcatenatingFilter(spellCheckFilter, ' ');
            return new TokenStreamComponents(source, concatenatingFilter);
        }
    };
}
 
Example #5
Source File: IndexHelper.java    From document-management-system with GNU General Public License v2.0 5 votes vote down vote up
protected void buildSpellCheckerIndex(SearchFactory searchFactory) {
	IndexReader reader = null;
	Directory dir = null;
	long _entr = System.currentTimeMillis();
	File spellCheckIndexDir = new File("lucene_index/spellcheck");
	log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath());
	ReaderProvider readerProvider = searchFactory.getReaderProvider();

	try {
		reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]);
		dir = FSDirectory.open(spellCheckIndexDir);
		SpellChecker spell = new SpellChecker(dir);
		spell.clearIndex();
		spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD));
		spell.close();
		dir.close();
		dir = null;
		long _exit = System.currentTimeMillis();
		log.info("Took {1} (ms) to build SpellChecker index in {0}",
				spellCheckIndexDir.getAbsolutePath(), String.valueOf((_exit - _entr)));
	} catch (Exception exc) {
		log.error("Failed to build spell checker index!", exc);
	} finally {
		if (dir != null) {
			try {
				dir.close();
			} catch (Exception zzz) {
			}
		}
		if (reader != null) {
			readerProvider.closeReader(reader);
		}
	}
}
 
Example #6
Source File: SearchSuggester.java    From webdsl with Apache License 2.0 5 votes vote down vote up
public static synchronized void forceSpellCheckerRenewal(String indexPath){
    SpellChecker sp = spellCheckMap.get(indexPath);
    if(sp!=null) {
        try {
            sp.close();
        } catch (IOException e) {
            org.webdsl.logging.Logger.error("EXCEPTION",e);
        }
    }
    spellCheckMap.remove(indexPath);
}
 
Example #7
Source File: Index.java    From olat with Apache License 2.0 5 votes vote down vote up
private void createSpellCheckSearcher(boolean indexNewlyBuilt) {
    try {
        log.info("Create spell checker on new index ...");
        synchronized (createSpellCheckSearcherLock) {// o_clusterOK by:pb if service is only configured on one vm, which is recommended way
            closeSpellCheckSearcher();
            if (indexNewlyBuilt) {
                replaceSpellCheckFiles();
            }

            final File spellDictionaryFile = new File(searchModule.getSpellCheckerIndexPath());
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);
            if (!IndexReader.indexExists(spellIndexDirectory)) {
                log.error("SpellChecker index does not exist [" + spellDictionaryFile.getAbsolutePath() + "]");
                return;
            }
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
        }

        if (indexNewlyBuilt) {
            log.info("Cleanup old spell checker index files ...");
            cleanupSpellCheckFiles();
        }
    } catch (IOException ex) {
        log.error("SpellChecker couldn't be created.", ex);
    }

}
 
Example #8
Source File: AbstractLuceneSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public SpellChecker getSpellChecker() {
  return spellChecker;
}
 
Example #9
Source File: IndexBasedSpellCheckerTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public SpellChecker getSpellChecker(){
  return spellChecker;
}
 
Example #10
Source File: SpellCheckerTokenFilter.java    From hmftools with GNU General Public License v3.0 4 votes vote down vote up
SpellCheckerTokenFilter(@NotNull final TokenStream tokenStream, @NotNull final SpellChecker spellChecker) {
    super(tokenStream);
    this.spellChecker = spellChecker;
}
 
Example #11
Source File: IndexHelper.java    From document-management-system with GNU General Public License v2.0 4 votes vote down vote up
public void updateSpellCheckerIndex(NodeDocumentVersion nDocVer) {
	log.info("Observed Wine added/updated event for {1} from Thread {0}",
			Thread.currentThread().getName(), String.valueOf(nDocVer));
	String text = (nDocVer != null) ? nDocVer.getText() : null;

	if (text != null) {
		Dictionary dictionary = null;

		try {
			FullTextEntityManager ftEm = (FullTextEntityManager) entityManager;
			SearchFactory searchFactory = ftEm.getSearchFactory();
			dictionary = new SetDictionary(text, searchFactory.getAnalyzer("wine_en"));
		} catch (IOException ioExc) {
			log.error("Failed to analyze dictionary text {0} from Wine {1} to update spell checker due to: {2}" +
					text + nDocVer.getUuid() + ioExc.toString());
		}

		if (dictionary != null) {
			Directory dir = null;
			// only allow one thread to update the index at a time ...
			// the Dictionary is pre-computed, so it should happen quickly
			// ...
			// this synchronized approach only works because this component
			// is application-scoped
			synchronized (this) {
				try {
					dir = FSDirectory.open(new File("lucene_index/spellcheck"));
					SpellChecker spell = new SpellChecker(dir);
					spell.indexDictionary(dictionary);
					spell.close();
					log.info("Successfully updated the spell checker index after Document added/updated.");
				} catch (Exception exc) {
					log.error("Failed to update the spell checker index!", exc);
				} finally {
					if (dir != null) {
						try {
							dir.close();
						} catch (Exception zzz) {
						}
					}
				}
			}
		}
	}
}
 
Example #12
Source File: SearchSuggester.java    From webdsl with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("deprecation")
public static ArrayList<String> findSpellSuggestionsForField(Class<?> entityClass, String baseDir,
        String suggestedField, int maxSuggestionCount, float accuracy, boolean morePopular,
        Analyzer analyzer, String toSuggestOn) {

    if (toSuggestOn == null || toSuggestOn.isEmpty())
        return new ArrayList<String>();

    SpellChecker spellChecker = null;
    IndexReader fieldIR = null;
    boolean hasSuggestions = false;

    String indexPath = baseDir+suggestedField;
    try {
        spellChecker = getSpellChecker(indexPath);

        spellChecker.setAccuracy(accuracy);

        TokenStream tokenStream = analyzer.tokenStream(suggestedField, new StringReader(
                toSuggestOn));
        CharTermAttributeImpl ta = (CharTermAttributeImpl) tokenStream
                .addAttribute(CharTermAttribute.class);

        ArrayList<String[]> allSuggestions = new ArrayList<String[]>();
        String word;
        String[] suggestions;
        while (tokenStream.incrementToken()) {
            word = ta.term();
            suggestions = null;
            if (!morePopular) {
                suggestions = spellChecker.suggestSimilar(word, maxSuggestionCount);
            } else {
                if (fieldIR == null)
                    fieldIR = getIndexReader(entityClass);
                suggestions = spellChecker.suggestSimilar(word, maxSuggestionCount, fieldIR,
                        suggestedField, true);
            }

            if (suggestions == null || suggestions.length == 0)
                suggestions = new String[] { word };
            else
                hasSuggestions = true;

            allSuggestions.add(suggestions);
        }

        if (!hasSuggestions)
            // if no suggestions were found, return empty list
            return new ArrayList<String>();
        else
            return formSuggestions(maxSuggestionCount, allSuggestions);

    } catch (Exception e) {
        org.webdsl.logging.Logger.error("EXCEPTION",e);
        //if something goes wrong, close and remove current SpellChecker instance, so it gets renewed
        try {
            spellChecker.close();
        } catch (IOException e2) {
            org.webdsl.logging.Logger.error("EXCEPTION",e2);
        }
        spellCheckMap.remove(indexPath);
    }
    finally {
        searchfactory.getReaderProvider().closeReader(fieldIR);
    }
    return new ArrayList<String>();
}
 
Example #13
Source File: SearchSpellChecker.java    From olat with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a new spell-check index based on search-index
 */
public void createSpellIndex() {
    if (isSpellCheckEnabled) {
        IndexReader indexReader = null;
        try {
            log.info("Start generating Spell-Index...");
            long startSpellIndexTime = 0;
            if (log.isDebugEnabled()) {
                startSpellIndexTime = System.currentTimeMillis();
            }
            final Directory indexDir = FSDirectory.open(new File(indexPath));
            indexReader = IndexReader.open(indexDir);
            // 1. Create content spellIndex
            final File spellDictionaryFile = new File(spellDictionaryPath);
            final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true
            final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
            final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
            contentSpellChecker.indexDictionary(contentDictionary);
            // 2. Create title spellIndex
            final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true
            final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
            final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
            titleSpellChecker.indexDictionary(titleDictionary);
            // 3. Create description spellIndex
            final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
            final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
            final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            descriptionSpellChecker.indexDictionary(descriptionDictionary);
            // 4. Create author spellIndex
            final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true
            final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
            final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
            authorSpellChecker.indexDictionary(authorDictionary);

            // Merge all part spell indexes (content,title etc.) to one common spell index
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
            final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
            merger.addIndexesNoOptimize(directories);
            merger.optimize();
            merger.close();
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
            if (log.isDebugEnabled()) {
                log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
            }
            log.info("New generated Spell-Index ready to use.");
        } catch (final IOException ioEx) {
            log.warn("Can not create SpellIndex", ioEx);
        } finally {
            if (indexReader != null) {
                try {
                    indexReader.close();
                } catch (final IOException e) {
                    log.warn("Can not close indexReader properly", e);
                }
            }
        }
    }
}
 
Example #14
Source File: SearchSpellChecker.java    From olat with Apache License 2.0 4 votes vote down vote up
/**
 * Creates a new spell-check index based on search-index
 */
public static void createSpellIndex(final SearchModule searchModule) {
    final String tempSearchIndexPath = searchModule.getTempSearchIndexPath();
    final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath();

    IndexReader indexReader = null;
    try {
        log.info("Start generating spell check index ...");

        long startSpellIndexTime = 0;
        if (log.isDebugEnabled()) {
            startSpellIndexTime = System.currentTimeMillis();
        }
        final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main"));
        indexReader = IndexReader.open(indexDir);

        // 1. Create content spellIndex
        log.info("Generating 'content' spell check index ...");
        final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH);
        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath);
        final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
        final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
        contentSpellChecker.indexDictionary(contentDictionary);

        // 2. Create title spellIndex
        log.info("Generating 'title' spell check index ...");
        final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath);
        final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
        final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
        titleSpellChecker.indexDictionary(titleDictionary);

        // 3. Create description spellIndex
        log.info("Generating 'description' spell check index ...");
        final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath);
        final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
        final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
        descriptionSpellChecker.indexDictionary(descriptionDictionary);

        // 4. Create author spellIndex
        log.info("Generating 'author' spell check index ...");
        final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);
        final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath);
        final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
        final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
        authorSpellChecker.indexDictionary(authorDictionary);

        log.info("Merging spell check indices ...");
        // Merge all part spell indexes (content,title etc.) to one common spell index
        final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath);
        FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true);
        final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir);
        final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
        final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
        merger.addIndexesNoOptimize(directories);

        log.info("Optimizing spell check index ...");
        merger.optimize();
        merger.close();

        tempSpellIndexDirectory.close();

        contentSpellChecker.close();
        contentSpellIndexDirectory.close();

        titleSpellChecker.close();
        titleSpellIndexDirectory.close();

        descriptionSpellChecker.close();
        descriptionSpellIndexDirectory.close();

        authorSpellChecker.close();
        authorSpellIndexDirectory.close();

        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);

        if (log.isDebugEnabled()) {
            log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms.");
        }
    } catch (final IOException ioEx) {
        log.warn("Can not create spell check index.", ioEx);
    } finally {
        if (indexReader != null) {
            try {
                indexReader.close();
            } catch (final IOException e) {
                log.warn("Can not close indexReader properly", e);
            }
        }
    }
}