org.apache.lucene.search.spell.SpellChecker Java Exaples

Source File: VocabularyNeo4jImpl.java From SciGraph with Apache License 2.0

6 votes

@Inject
public VocabularyNeo4jImpl(GraphDatabaseService graph,
    @Nullable @IndicatesNeo4jGraphLocation String neo4jLocation, CurieUtil curieUtil,
    NodeTransformer transformer) throws IOException {
  this.graph = graph;
  this.curieUtil = curieUtil;
  this.transformer = transformer;
  if (null != neo4jLocation) {
    Directory indexDirectory =
        FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/node/node_auto_index"))
            .toPath());
    Directory spellDirectory =
        FSDirectory.open((new File(new File(neo4jLocation), "index/lucene/spellchecker"))
            .toPath());
    spellChecker = new SpellChecker(spellDirectory);
    try (IndexReader reader = DirectoryReader.open(indexDirectory)) {
      IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer());
      spellChecker.indexDictionary(new LuceneDictionary(reader, NodeProperties.LABEL
          + LuceneUtils.EXACT_SUFFIX), config, true);
    }
  } else {
    spellChecker = null;
  }
}

Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0

5 votes

@Test
@SuppressWarnings({"unchecked"})
public void testAlternateDistance() throws Exception {
  TestSpellChecker checker = new TestSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = createTempDir().toFile();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  spellchecker.add(AbstractLuceneSpellChecker.STRING_DISTANCE, JaroWinklerDistance.class.getName());
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);
    SpellChecker sc = checker.getSpellChecker();
    assertTrue("sc is null and it shouldn't be", sc != null);
    StringDistance sd = sc.getStringDistance();
    assertTrue("sd is null and it shouldn't be", sd != null);
    assertTrue("sd is not an instance of " + JaroWinklerDistance.class.getName(), sd instanceof JaroWinklerDistance);
    return null;
  });

}

Source File: TreatmentCurator.java From hmftools with GNU General Public License v3.0

5 votes

@NotNull
private static SpellChecker createIndexSpellchecker(@NotNull Directory index) throws IOException {
    Directory spellCheckerDirectory = new RAMDirectory();
    IndexReader indexReader = DirectoryReader.open(index);
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    Dictionary dictionary = new HighFrequencyDictionary(indexReader, DRUG_TERMS_FIELD, 0.0f);
    SpellChecker spellChecker = new SpellChecker(spellCheckerDirectory);

    spellChecker.indexDictionary(dictionary, config, false);
    spellChecker.setAccuracy(SPELLCHECK_ACCURACY);
    return spellChecker;
}

Source File: TreatmentCurator.java From hmftools with GNU General Public License v3.0

5 votes

@NotNull
private static Analyzer spellcheckAnalyzer(@NotNull SpellChecker spellChecker) {
    return new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(@NotNull String field) {
            Tokenizer source = new WhitespaceTokenizer();
            source.setReader(new StringReader(field));
            SpellCheckerTokenFilter spellCheckFilter = new SpellCheckerTokenFilter(defaultTokenFilter(source), spellChecker);
            TokenFilter concatenatingFilter = new ConcatenatingFilter(spellCheckFilter, ' ');
            return new TokenStreamComponents(source, concatenatingFilter);
        }
    };
}

Source File: IndexHelper.java From document-management-system with GNU General Public License v2.0

5 votes

protected void buildSpellCheckerIndex(SearchFactory searchFactory) {
	IndexReader reader = null;
	Directory dir = null;
	long _entr = System.currentTimeMillis();
	File spellCheckIndexDir = new File("lucene_index/spellcheck");
	log.info("Building SpellChecker index in {0}", spellCheckIndexDir.getAbsolutePath());
	ReaderProvider readerProvider = searchFactory.getReaderProvider();

	try {
		reader = readerProvider.openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]);
		dir = FSDirectory.open(spellCheckIndexDir);
		SpellChecker spell = new SpellChecker(dir);
		spell.clearIndex();
		spell.indexDictionary(new LuceneDictionary(reader, NodeDocument.TEXT_FIELD));
		spell.close();
		dir.close();
		dir = null;
		long _exit = System.currentTimeMillis();
		log.info("Took {1} (ms) to build SpellChecker index in {0}",
				spellCheckIndexDir.getAbsolutePath(), String.valueOf((_exit - _entr)));
	} catch (Exception exc) {
		log.error("Failed to build spell checker index!", exc);
	} finally {
		if (dir != null) {
			try {
				dir.close();
			} catch (Exception zzz) {
			}
		}
		if (reader != null) {
			readerProvider.closeReader(reader);
		}
	}
}

Source File: SearchSuggester.java From webdsl with Apache License 2.0

5 votes

public static synchronized void forceSpellCheckerRenewal(String indexPath){
    SpellChecker sp = spellCheckMap.get(indexPath);
    if(sp!=null) {
        try {
            sp.close();
        } catch (IOException e) {
            org.webdsl.logging.Logger.error("EXCEPTION",e);
        }
    }
    spellCheckMap.remove(indexPath);
}

Source File: Index.java From olat with Apache License 2.0

5 votes

private void createSpellCheckSearcher(boolean indexNewlyBuilt) {
    try {
        log.info("Create spell checker on new index ...");
        synchronized (createSpellCheckSearcherLock) {// o_clusterOK by:pb if service is only configured on one vm, which is recommended way
            closeSpellCheckSearcher();
            if (indexNewlyBuilt) {
                replaceSpellCheckFiles();
            }

            final File spellDictionaryFile = new File(searchModule.getSpellCheckerIndexPath());
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);
            if (!IndexReader.indexExists(spellIndexDirectory)) {
                log.error("SpellChecker index does not exist [" + spellDictionaryFile.getAbsolutePath() + "]");
                return;
            }
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
        }

        if (indexNewlyBuilt) {
            log.info("Cleanup old spell checker index files ...");
            cleanupSpellCheckFiles();
        }
    } catch (IOException ex) {
        log.error("SpellChecker couldn't be created.", ex);
    }

}

Source File: AbstractLuceneSpellChecker.java From lucene-solr with Apache License 2.0

4 votes

public SpellChecker getSpellChecker() {
  return spellChecker;
}

Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0

4 votes

@Override
public SpellChecker getSpellChecker(){
  return spellChecker;
}

Source File: SpellCheckerTokenFilter.java From hmftools with GNU General Public License v3.0

4 votes

SpellCheckerTokenFilter(@NotNull final TokenStream tokenStream, @NotNull final SpellChecker spellChecker) {
    super(tokenStream);
    this.spellChecker = spellChecker;
}

Source File: IndexHelper.java From document-management-system with GNU General Public License v2.0

4 votes

public void updateSpellCheckerIndex(NodeDocumentVersion nDocVer) {
	log.info("Observed Wine added/updated event for {1} from Thread {0}",
			Thread.currentThread().getName(), String.valueOf(nDocVer));
	String text = (nDocVer != null) ? nDocVer.getText() : null;

	if (text != null) {
		Dictionary dictionary = null;

		try {
			FullTextEntityManager ftEm = (FullTextEntityManager) entityManager;
			SearchFactory searchFactory = ftEm.getSearchFactory();
			dictionary = new SetDictionary(text, searchFactory.getAnalyzer("wine_en"));
		} catch (IOException ioExc) {
			log.error("Failed to analyze dictionary text {0} from Wine {1} to update spell checker due to: {2}" +
					text + nDocVer.getUuid() + ioExc.toString());
		}

		if (dictionary != null) {
			Directory dir = null;
			// only allow one thread to update the index at a time ...
			// the Dictionary is pre-computed, so it should happen quickly
			// ...
			// this synchronized approach only works because this component
			// is application-scoped
			synchronized (this) {
				try {
					dir = FSDirectory.open(new File("lucene_index/spellcheck"));
					SpellChecker spell = new SpellChecker(dir);
					spell.indexDictionary(dictionary);
					spell.close();
					log.info("Successfully updated the spell checker index after Document added/updated.");
				} catch (Exception exc) {
					log.error("Failed to update the spell checker index!", exc);
				} finally {
					if (dir != null) {
						try {
							dir.close();
						} catch (Exception zzz) {
						}
					}
				}
			}
		}
	}
}

Source File: SearchSuggester.java From webdsl with Apache License 2.0

4 votes

@SuppressWarnings("deprecation")
public static ArrayList<String> findSpellSuggestionsForField(Class<?> entityClass, String baseDir,
        String suggestedField, int maxSuggestionCount, float accuracy, boolean morePopular,
        Analyzer analyzer, String toSuggestOn) {

    if (toSuggestOn == null || toSuggestOn.isEmpty())
        return new ArrayList<String>();

    SpellChecker spellChecker = null;
    IndexReader fieldIR = null;
    boolean hasSuggestions = false;

    String indexPath = baseDir+suggestedField;
    try {
        spellChecker = getSpellChecker(indexPath);

        spellChecker.setAccuracy(accuracy);

        TokenStream tokenStream = analyzer.tokenStream(suggestedField, new StringReader(
                toSuggestOn));
        CharTermAttributeImpl ta = (CharTermAttributeImpl) tokenStream
                .addAttribute(CharTermAttribute.class);

        ArrayList<String[]> allSuggestions = new ArrayList<String[]>();
        String word;
        String[] suggestions;
        while (tokenStream.incrementToken()) {
            word = ta.term();
            suggestions = null;
            if (!morePopular) {
                suggestions = spellChecker.suggestSimilar(word, maxSuggestionCount);
            } else {
                if (fieldIR == null)
                    fieldIR = getIndexReader(entityClass);
                suggestions = spellChecker.suggestSimilar(word, maxSuggestionCount, fieldIR,
                        suggestedField, true);
            }

            if (suggestions == null || suggestions.length == 0)
                suggestions = new String[] { word };
            else
                hasSuggestions = true;

            allSuggestions.add(suggestions);
        }

        if (!hasSuggestions)
            // if no suggestions were found, return empty list
            return new ArrayList<String>();
        else
            return formSuggestions(maxSuggestionCount, allSuggestions);

    } catch (Exception e) {
        org.webdsl.logging.Logger.error("EXCEPTION",e);
        //if something goes wrong, close and remove current SpellChecker instance, so it gets renewed
        try {
            spellChecker.close();
        } catch (IOException e2) {
            org.webdsl.logging.Logger.error("EXCEPTION",e2);
        }
        spellCheckMap.remove(indexPath);
    }
    finally {
        searchfactory.getReaderProvider().closeReader(fieldIR);
    }
    return new ArrayList<String>();
}

Source File: SearchSpellChecker.java From olat with Apache License 2.0

4 votes

/**
 * Creates a new spell-check index based on search-index
 */
public void createSpellIndex() {
    if (isSpellCheckEnabled) {
        IndexReader indexReader = null;
        try {
            log.info("Start generating Spell-Index...");
            long startSpellIndexTime = 0;
            if (log.isDebugEnabled()) {
                startSpellIndexTime = System.currentTimeMillis();
            }
            final Directory indexDir = FSDirectory.open(new File(indexPath));
            indexReader = IndexReader.open(indexDir);
            // 1. Create content spellIndex
            final File spellDictionaryFile = new File(spellDictionaryPath);
            final Directory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));// true
            final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
            final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
            contentSpellChecker.indexDictionary(contentDictionary);
            // 2. Create title spellIndex
            final Directory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));// true
            final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
            final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
            titleSpellChecker.indexDictionary(titleDictionary);
            // 3. Create description spellIndex
            final Directory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));// true
            final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
            final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
            descriptionSpellChecker.indexDictionary(descriptionDictionary);
            // 4. Create author spellIndex
            final Directory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));// true
            final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
            final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
            authorSpellChecker.indexDictionary(authorDictionary);

            // Merge all part spell indexes (content,title etc.) to one common spell index
            final Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);// true
            final IndexWriter merger = new IndexWriter(spellIndexDirectory, new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.UNLIMITED);
            final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
            merger.addIndexesNoOptimize(directories);
            merger.optimize();
            merger.close();
            spellChecker = new SpellChecker(spellIndexDirectory);
            spellChecker.setAccuracy(0.7f);
            if (log.isDebugEnabled()) {
                log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
            }
            log.info("New generated Spell-Index ready to use.");
        } catch (final IOException ioEx) {
            log.warn("Can not create SpellIndex", ioEx);
        } finally {
            if (indexReader != null) {
                try {
                    indexReader.close();
                } catch (final IOException e) {
                    log.warn("Can not close indexReader properly", e);
                }
            }
        }
    }
}

Source File: SearchSpellChecker.java From olat with Apache License 2.0

4 votes

/**
 * Creates a new spell-check index based on search-index
 */
public static void createSpellIndex(final SearchModule searchModule) {
    final String tempSearchIndexPath = searchModule.getTempSearchIndexPath();
    final String tempSpellCheckIndexPath = searchModule.getTempSpellCheckerIndexPath();

    IndexReader indexReader = null;
    try {
        log.info("Start generating spell check index ...");

        long startSpellIndexTime = 0;
        if (log.isDebugEnabled()) {
            startSpellIndexTime = System.currentTimeMillis();
        }
        final Directory indexDir = FSDirectory.open(new File(tempSearchIndexPath, "main"));
        indexReader = IndexReader.open(indexDir);

        // 1. Create content spellIndex
        log.info("Generating 'content' spell check index ...");
        final File contentSpellIndexPath = new File(tempSpellCheckIndexPath + CONTENT_PATH);
        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        final Directory contentSpellIndexDirectory = FSDirectory.open(contentSpellIndexPath);
        final SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
        final Dictionary contentDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.CONTENT_FIELD_NAME);
        contentSpellChecker.indexDictionary(contentDictionary);

        // 2. Create title spellIndex
        log.info("Generating 'title' spell check index ...");
        final File titleSpellIndexPath = new File(tempSpellCheckIndexPath + TITLE_PATH);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        final Directory titleSpellIndexDirectory = FSDirectory.open(titleSpellIndexPath);
        final SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
        final Dictionary titleDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.TITLE_FIELD_NAME);
        titleSpellChecker.indexDictionary(titleDictionary);

        // 3. Create description spellIndex
        log.info("Generating 'description' spell check index ...");
        final File descriptionSpellIndexPath = new File(tempSpellCheckIndexPath + DESCRIPTION_PATH);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        final Directory descriptionSpellIndexDirectory = FSDirectory.open(descriptionSpellIndexPath);
        final SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
        final Dictionary descriptionDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.DESCRIPTION_FIELD_NAME);
        descriptionSpellChecker.indexDictionary(descriptionDictionary);

        // 4. Create author spellIndex
        log.info("Generating 'author' spell check index ...");
        final File authorSpellIndexPath = new File(tempSpellCheckIndexPath + AUTHOR_PATH);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);
        final Directory authorSpellIndexDirectory = FSDirectory.open(authorSpellIndexPath);
        final SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
        final Dictionary authorDictionary = new LuceneDictionary(indexReader, AbstractOlatDocument.AUTHOR_FIELD_NAME);
        authorSpellChecker.indexDictionary(authorDictionary);

        log.info("Merging spell check indices ...");
        // Merge all part spell indexes (content,title etc.) to one common spell index
        final File tempSpellCheckIndexDir = new File(tempSpellCheckIndexPath);
        FileUtils.deleteDirsAndFiles(tempSpellCheckIndexDir, true, true);
        final Directory tempSpellIndexDirectory = FSDirectory.open(tempSpellCheckIndexDir);
        final IndexWriter merger = new IndexWriter(tempSpellIndexDirectory, new StandardAnalyzer(Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
        final Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory };
        merger.addIndexesNoOptimize(directories);

        log.info("Optimizing spell check index ...");
        merger.optimize();
        merger.close();

        tempSpellIndexDirectory.close();

        contentSpellChecker.close();
        contentSpellIndexDirectory.close();

        titleSpellChecker.close();
        titleSpellIndexDirectory.close();

        descriptionSpellChecker.close();
        descriptionSpellIndexDirectory.close();

        authorSpellChecker.close();
        authorSpellIndexDirectory.close();

        FileUtils.deleteDirsAndFiles(contentSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(titleSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(descriptionSpellIndexPath, true, true);
        FileUtils.deleteDirsAndFiles(authorSpellIndexPath, true, true);

        if (log.isDebugEnabled()) {
            log.debug("Spell check index created in " + (System.currentTimeMillis() - startSpellIndexTime) + " ms.");
        }
    } catch (final IOException ioEx) {
        log.warn("Can not create spell check index.", ioEx);
    } finally {
        if (indexReader != null) {
            try {
                indexReader.close();
            } catch (final IOException e) {
                log.warn("Can not close indexReader properly", e);
            }
        }
    }
}

org.apache.lucene.search.spell.SpellChecker Java Examples