Java Code Examples for org.apache.lucene.search.spell.DirectSpellChecker

The following examples show how to use org.apache.lucene.search.spell.DirectSpellChecker. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Elasticsearch   Source File: TermSuggester.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    );
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        );
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
 
Example 2
Source Project: Elasticsearch   Source File: DirectCandidateGenerator.java    License: Apache License 2.0 6 votes vote down vote up
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood,  int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}
 
Example 3
@Override
public void indexingDone() {
	try {
		spellChecker = new DirectSpellChecker();
		spellChecker.setMaxEdits(2);
		spellChecker.setAccuracy(0.1f);
		spellChecker.setMinPrefix(0);
		reader = DirectoryReader.open(writer);

		fuzzySuggester = new FuzzySuggester(directory, "", writer.getAnalyzer());
		Dictionary dict = new DocumentValueSourceDictionary(reader, WORD_FIELD, new LongValuesSource() {
			
			@Override
			public boolean needsScores() {
				return false;
			}
			
			@Override
			public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
				return null;
			}
		});
		fuzzySuggester.build(dict);
		
		writer.close();
		searcher = new IndexSearcher(DirectoryReader.open(directory));
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
}
 
Example 4
Source Project: Elasticsearch   Source File: DirectCandidateGenerator.java    License: Apache License 2.0 4 votes vote down vote up
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
    this(spellchecker, field, suggestMode, reader,  nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}
 
Example 5
Source Project: lucene-solr   Source File: DirectSolrSpellChecker.java    License: Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings({"unchecked"})
public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) {

  SolrParams params = config.toSolrParams();

  log.info("init: {}", config);
  String name = super.init(config, core);
  
  Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR;
  String compClass = (String) config.get(COMPARATOR_CLASS);
  if (compClass != null) {
    if (compClass.equalsIgnoreCase(SCORE_COMP))
      comp = SuggestWordQueue.DEFAULT_COMPARATOR;
    else if (compClass.equalsIgnoreCase(FREQ_COMP))
      comp = new SuggestWordFrequencyComparator();
    else //must be a FQCN
      comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
  }
  
  StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
  String distClass = (String) config.get(STRING_DISTANCE);
  if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE))
    sd = core.getResourceLoader().newInstance(distClass, StringDistance.class);

  float minAccuracy = DEFAULT_ACCURACY;
  Float accuracy = params.getFloat(ACCURACY);
  if (accuracy != null)
    minAccuracy = accuracy;
  
  int maxEdits = DEFAULT_MAXEDITS;
  Integer edits = params.getInt(MAXEDITS);
  if (edits != null)
    maxEdits = edits;
  
  int minPrefix = DEFAULT_MINPREFIX;
  Integer prefix = params.getInt(MINPREFIX);
  if (prefix != null)
    minPrefix = prefix;
  
  int maxInspections = DEFAULT_MAXINSPECTIONS;
  Integer inspections = params.getInt(MAXINSPECTIONS);
  if (inspections != null)
    maxInspections = inspections;
  
  float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
  Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY);
  if (threshold != null)
    minThreshold = threshold;
  
  int minQueryLength = DEFAULT_MINQUERYLENGTH;
  Integer queryLength = params.getInt(MINQUERYLENGTH);
  if (queryLength != null)
    minQueryLength = queryLength;

  int maxQueryLength = DEFAULT_MAXQUERYLENGTH;
  Integer overriddenMaxQueryLength = params.getInt(MAXQUERYLENGTH);
  if (overriddenMaxQueryLength != null)
    maxQueryLength = overriddenMaxQueryLength;
  
  float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
  Float queryFreq = params.getFloat(MAXQUERYFREQUENCY);
  if (queryFreq != null)
    maxQueryFrequency = queryFreq;
  
  checker.setComparator(comp);
  checker.setDistance(sd);
  checker.setMaxEdits(maxEdits);
  checker.setMinPrefix(minPrefix);
  checker.setAccuracy(minAccuracy);
  checker.setThresholdFrequency(minThreshold);
  checker.setMaxInspections(maxInspections);
  checker.setMinQueryLength(minQueryLength);
  checker.setMaxQueryLength(maxQueryLength);
  checker.setMaxQueryFrequency(maxQueryFrequency);
  checker.setLowerCaseTerms(false);
  
  return name;
}