org.apache.lucene.search.spell.SuggestMode Java Examples

The following examples show how to use org.apache.lucene.search.spell.SuggestMode. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DirectCandidateGenerator.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood,  int numCandidates, Analyzer preFilter, Analyzer postFilter, Terms terms) throws IOException {
    if (terms == null) {
        throw new IllegalArgumentException("generator field [" + field + "] doesn't exist");
    }
    this.spellchecker = spellchecker;
    this.field = field;
    this.numCandidates = numCandidates;
    this.suggestMode = suggestMode;
    this.reader = reader;
    final long dictSize = terms.getSumTotalTermFreq();
    this.useTotalTermFrequency = dictSize != -1;
    this.dictSize =  dictSize == -1 ? reader.maxDoc() : dictSize;
    this.preFilter = preFilter;
    this.postFilter = postFilter;
    this.nonErrorLikelihood = nonErrorLikelihood;
    float thresholdFrequency = spellchecker.getThresholdFrequency();
    this.frequencyPlateau = thresholdFrequency >= 1.0f ? (int) thresholdFrequency: (int)(dictSize * thresholdFrequency);
    termsEnum = terms.iterator();
}
 
Example #2
Source File: DirectCandidateGenerator.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
    Candidate original = set.originalTerm;
    BytesRef term = preFilter(original.term, spare, byteSpare);
    final long frequency = original.frequency;
    spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
    SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
    List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
    for (int i = 0; i < suggestSimilar.length; i++) {
        SuggestWord suggestWord = suggestSimilar[i];
        BytesRef candidate = new BytesRef(suggestWord.string);
        postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
    }
    set.addCandidates(candidates);
    return set;
}
 
Example #3
Source File: WordBreakCompoundRewriter.java    From querqy with Apache License 2.0 6 votes vote down vote up
protected List<SuggestWord[]> suggestWordbreaks(final Term term) throws IOException {
    final SuggestWord[][] rawSuggestions = wordBreakSpellChecker
            .suggestWordBreaks(toLuceneTerm(term), decompoundsToQuery, indexReader, SuggestMode.SUGGEST_ALWAYS,
                    WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);

    if (rawSuggestions.length == 0) {
        return Collections.emptyList();
    }

    if (!verifyDecompoundCollation) {
        return Arrays.stream(rawSuggestions)
                .filter(suggestion -> suggestion != null && suggestion.length > 1)
                .limit(maxDecompoundExpansions).collect(Collectors.toList());
    }

    final IndexSearcher searcher = new IndexSearcher(indexReader);
    return Arrays.stream(rawSuggestions)
            .filter(suggestion -> suggestion != null && suggestion.length > 1)
            .map(suggestion -> new MaxSortable<>(suggestion, countCollatedMatches(suggestion, searcher)))
            .filter(sortable -> sortable.count > 0)
            .sorted()
            .limit(maxDecompoundExpansions) // TODO: use PriorityQueue
            .map(sortable -> sortable.obj)
            .collect(Collectors.toList());

}
 
Example #4
Source File: LuceneWordSearch.java    From preDict with GNU Lesser General Public License v3.0 5 votes vote down vote up
private List<String> getUsingSpellcheck(String searchQuery) throws IOException {
	SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS);
	List<String> result = new ArrayList<>();
	for(SuggestWord suggestion : suggestions) {
		result.add(suggestion.string);
	}
	return result;
}
 
Example #5
Source File: SpellingOptions.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, SuggestMode suggestMode, boolean extendedResults,
    float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
 
Example #6
Source File: SpellingOptions.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public SpellingOptions(Collection<Token> tokens, IndexReader reader,
    int count, int alternativeTermCount, SuggestMode suggestMode,
    boolean extendedResults, float accuracy, SolrParams customParams) {
  this.tokens = tokens;
  this.reader = reader;
  this.count = count;
  this.alternativeTermCount = alternativeTermCount;
  this.suggestMode = suggestMode;
  this.extendedResults = extendedResults;
  this.accuracy = accuracy;
  this.customParams = customParams;
}
 
Example #7
Source File: Suggester.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public SpellingResult getSuggestions(SpellingOptions options) throws IOException {
  log.debug("getSuggestions: {}", options.tokens);
  if (lookup == null) {
    log.info("Lookup is null - invoke spellchecker.build first");
    return EMPTY_RESULT;
  }
  SpellingResult res = new SpellingResult();
  CharsRef scratch = new CharsRef();
  for (Token t : options.tokens) {
    scratch.chars = t.buffer();
    scratch.offset = 0;
    scratch.length = t.length();
    boolean onlyMorePopular = (options.suggestMode == SuggestMode.SUGGEST_MORE_POPULAR) &&
      !(lookup instanceof WFSTCompletionLookup) &&
      !(lookup instanceof AnalyzingSuggester);
    List<LookupResult> suggestions = lookup.lookup(scratch, onlyMorePopular, options.count);
    if (suggestions == null) {
      continue;
    }
    if (options.suggestMode != SuggestMode.SUGGEST_MORE_POPULAR) {
      Collections.sort(suggestions);
    }
    for (LookupResult lr : suggestions) {
      res.add(t, lr.key.toString(), (int)lr.value);
    }
  }
  return res;
}
 
Example #8
Source File: DirectSpellcheckerSettings.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public SuggestMode suggestMode() {
    return suggestMode;
}
 
Example #9
Source File: DirectCandidateGenerator.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
public DirectCandidateGenerator(DirectSpellChecker spellchecker, String field, SuggestMode suggestMode, IndexReader reader, double nonErrorLikelihood, int numCandidates) throws IOException {
    this(spellchecker, field, suggestMode, reader,  nonErrorLikelihood, numCandidates, null, null, MultiFields.getTerms(reader, field));
}
 
Example #10
Source File: SpellCheckComponent.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public void process(ResponseBuilder rb) throws IOException {
  SolrParams params = rb.req.getParams();
  if (!params.getBool(COMPONENT_NAME, false) || spellCheckers.isEmpty()) {
    return;
  }
  boolean shardRequest = "true".equals(params.get(ShardParams.IS_SHARD));
  String q = params.get(SPELLCHECK_Q);
  SolrSpellChecker spellChecker = getSpellChecker(params);
  Collection<Token> tokens = null;

  if (q != null) {
    //we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
    tokens = getTokens(q, spellChecker.getQueryAnalyzer());
  } else {
    q = rb.getQueryString();
    if (q == null) {
      q = params.get(CommonParams.Q);
    }
    tokens = queryConverter.convert(q);
  }
  if (tokens != null && tokens.isEmpty() == false) {
    if (spellChecker != null) {
      int count = params.getInt(SPELLCHECK_COUNT, 1);
      boolean onlyMorePopular = params.getBool(SPELLCHECK_ONLY_MORE_POPULAR, DEFAULT_ONLY_MORE_POPULAR);
      boolean extendedResults = params.getBool(SPELLCHECK_EXTENDED_RESULTS, false);
      boolean collate = params.getBool(SPELLCHECK_COLLATE, false);
      float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
      int alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT, 0);
      //If specified, this can be a discrete # of results, or a percentage of fq results.
      Integer maxResultsForSuggest = maxResultsForSuggest(rb);
      
      ModifiableSolrParams customParams = new ModifiableSolrParams();
      for (String checkerName : getDictionaryNames(params)) {
        customParams.add(getCustomParams(checkerName, params));
      }

      Number hitsLong = (Number) rb.rsp.getToLog().get("hits");
      long hits = 0;
      if (hitsLong == null) {
        hits = rb.getNumberDocumentsFound();
      } else {
        hits = hitsLong.longValue();
      }
      
      SpellingResult spellingResult = null;
      if (maxResultsForSuggest == null || hits <= maxResultsForSuggest) {
        SuggestMode suggestMode = SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX;
        if (onlyMorePopular) {
          suggestMode = SuggestMode.SUGGEST_MORE_POPULAR;
        } else if (alternativeTermCount > 0) {
          suggestMode = SuggestMode.SUGGEST_ALWAYS;
        }

        IndexReader reader = rb.req.getSearcher().getIndexReader();
        SpellingOptions options = new SpellingOptions(tokens, reader, count,
            alternativeTermCount, suggestMode, extendedResults, accuracy,
            customParams);
        spellingResult = spellChecker.getSuggestions(options);
      } else {
        spellingResult = new SpellingResult();
      }
      boolean isCorrectlySpelled = hits > (maxResultsForSuggest==null ? 0 : maxResultsForSuggest);

      @SuppressWarnings({"rawtypes"})
      NamedList response = new SimpleOrderedMap();
      @SuppressWarnings({"rawtypes"})
      NamedList suggestions = toNamedList(shardRequest, spellingResult, q, extendedResults);
      response.add("suggestions", suggestions);

      if (extendedResults) {
        response.add("correctlySpelled", isCorrectlySpelled);
      }
      if (collate) {
        addCollationsToResponse(params, spellingResult, rb, q, response, spellChecker.isSuggestionsMayOverlap());
      }
      if (shardRequest) {
        addOriginalTermsToResponse(response, tokens);
      }

      rb.rsp.add("spellcheck", response);

    } else {
      throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
          "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params)));
    }
  }
}
 
Example #11
Source File: IndexBasedSpellCheckerTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings({"unchecked"})
public void testExtendedResults() throws Exception {
  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());

  File indexDir = createTempDir().toFile();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);

    IndexReader reader = searcher.getIndexReader();
    Collection<Token> tokens = queryConverter.convert("documemt");
    SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
    SpellingResult result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    //should be lowercased, b/c we are using a lowercasing analyzer
    Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("documemt is null and it shouldn't be", suggestions != null);
    assertTrue("documemt Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
    Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
    assertTrue(entry.getKey() + " is not equal to " + "document", entry.getKey().equals("document") == true);
    assertTrue(entry.getValue() + " does not equal: " + 2, entry.getValue() == 2);

    //test something not in the spell checker
    spellOpts.tokens = queryConverter.convert("super");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions size should be 0", suggestions.size()==0);

    spellOpts.tokens = queryConverter.convert("document");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions is not null and it should be", suggestions == null);
    return null;
  });
}
 
Example #12
Source File: IndexBasedSpellCheckerTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings({"unchecked"})
public void testAlternateLocation() throws Exception {
  String[] ALT_DOCS = new String[]{
          "jumpin jack flash",
          "Sargent Peppers Lonely Hearts Club Band",
          "Born to Run",
          "Thunder Road",
          "Londons Burning",
          "A Horse with No Name",
          "Sweet Caroline"
  };

  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
  
  File tmpDir = createTempDir().toFile();
  File indexDir = new File(tmpDir, "spellingIdx");
  //create a standalone index
  File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
  Directory dir = newFSDirectory(altIndexDir.toPath());
  IndexWriter iw = new IndexWriter(
      dir,
      new IndexWriterConfig(new WhitespaceAnalyzer())
  );
  for (int i = 0; i < ALT_DOCS.length; i++) {
    Document doc = new Document();
    doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  dir.close();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);

    IndexReader reader = searcher.getIndexReader();
    Collection<Token> tokens = queryConverter.convert("flesh");
    SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
    SpellingResult result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    //should be lowercased, b/c we are using a lowercasing analyzer
    Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("flesh is null and it shouldn't be", suggestions != null);
    assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
    Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
    assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
    assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);

    //test something not in the spell checker
    spellOpts.tokens = queryConverter.convert("super");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions size should be 0", suggestions.size()==0);

    spellOpts.tokens = queryConverter.convert("Caroline");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions is not null and it should be", suggestions == null);
    return null;
  });
}
 
Example #13
Source File: WordBreakCompoundRewriter.java    From querqy with Apache License 2.0 3 votes vote down vote up
protected CombineSuggestion[] suggestCombination(final Iterator<Term> terms) throws IOException {

        final List<org.apache.lucene.index.Term> luceneTerms = new ArrayList<>(COMPOUND_WINDOW);

        terms.forEachRemaining(term -> luceneTerms.add(toLuceneTerm(term)));

        return wordBreakSpellChecker.suggestWordCombinations(
                luceneTerms.toArray(new org.apache.lucene.index.Term[0]), 10, indexReader, SuggestMode.SUGGEST_ALWAYS);
    }