Java Code Examples for org.apache.lucene.search.spell.SuggestWord

The following examples show how to use org.apache.lucene.search.spell.SuggestWord. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Elasticsearch   Source File: TermSuggester.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    );
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        );
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
 
Example 2
Source Project: Elasticsearch   Source File: DirectCandidateGenerator.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
    Candidate original = set.originalTerm;
    BytesRef term = preFilter(original.term, spare, byteSpare);
    final long frequency = original.frequency;
    spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
    SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
    List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
    for (int i = 0; i < suggestSimilar.length; i++) {
        SuggestWord suggestWord = suggestSimilar[i];
        BytesRef candidate = new BytesRef(suggestWord.string);
        postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
    }
    set.addCandidates(candidates);
    return set;
}
 
Example 3
Source Project: lucene-solr   Source File: IndexBasedSpellCheckerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testComparator() throws Exception {
  SpellCheckComponent component = (SpellCheckComponent) h.getCore().getSearchComponent("spellcheck");
  assertNotNull(component);
  AbstractLuceneSpellChecker spellChecker;
  Comparator<SuggestWord> comp;
  spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("freq");
  assertNotNull(spellChecker);
  comp = spellChecker.getSpellChecker().getComparator();
  assertNotNull(comp);
  assertTrue(comp instanceof SuggestWordFrequencyComparator);

  spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("fqcn");
  assertNotNull(spellChecker);
  comp = spellChecker.getSpellChecker().getComparator();
  assertNotNull(comp);
  assertTrue(comp instanceof SampleComparator);


}
 
Example 4
Source Project: querqy   Source File: WordBreakCompoundRewriter.java    License: Apache License 2.0 6 votes vote down vote up
protected void decompound(final Term term) {
    // determine the nodesToAdd based on the term
    try {

        for (final SuggestWord[] decompounded : suggestWordbreaks(term)) {

            if (decompounded != null && decompounded.length > 0) {

                final BooleanQuery bq = new BooleanQuery(term.getParent(), Clause.Occur.SHOULD, true);

                for (final SuggestWord word : decompounded) {
                    final DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(bq, Clause.Occur.MUST, true);
                    bq.addClause(dmq);
                    dmq.addClause(new Term(dmq, term.getField(), word.string, true));
                }
                nodesToAdd.add(bq);

            }

        }

    } catch (final IOException e) {
        // IO is broken, this looks serious -> throw as RTE
        throw new RuntimeException("Error decompounding " + term, e);
    }
}
 
Example 5
Source Project: querqy   Source File: WordBreakCompoundRewriter.java    License: Apache License 2.0 6 votes vote down vote up
protected List<SuggestWord[]> suggestWordbreaks(final Term term) throws IOException {
    final SuggestWord[][] rawSuggestions = wordBreakSpellChecker
            .suggestWordBreaks(toLuceneTerm(term), decompoundsToQuery, indexReader, SuggestMode.SUGGEST_ALWAYS,
                    WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);

    if (rawSuggestions.length == 0) {
        return Collections.emptyList();
    }

    if (!verifyDecompoundCollation) {
        return Arrays.stream(rawSuggestions)
                .filter(suggestion -> suggestion != null && suggestion.length > 1)
                .limit(maxDecompoundExpansions).collect(Collectors.toList());
    }

    final IndexSearcher searcher = new IndexSearcher(indexReader);
    return Arrays.stream(rawSuggestions)
            .filter(suggestion -> suggestion != null && suggestion.length > 1)
            .map(suggestion -> new MaxSortable<>(suggestion, countCollatedMatches(suggestion, searcher)))
            .filter(sortable -> sortable.count > 0)
            .sorted()
            .limit(maxDecompoundExpansions) // TODO: use PriorityQueue
            .map(sortable -> sortable.obj)
            .collect(Collectors.toList());

}
 
Example 6
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testNoDecompoundForSingleToken() throws IOException {

    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1w2", false);


    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1w2", false)
                    )

            )
    );

}
 
Example 7
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testThatDecompoundRespectsLowerCaseInputFalse() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "W1w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    rewriter.rewrite(expandedQuery);

    verify(wordBreakSpellChecker).suggestWordBreaks(eq(new Term("field1", "W1w2")), anyInt(), any(), any(), any());

}
 
Example 8
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testThatDecompoundRespectsLowerCaseInputTrue() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            true, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "W1w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    rewriter.rewrite(expandedQuery);

    verify(wordBreakSpellChecker).suggestWordBreaks(eq(new Term("field1", "w1w2")), anyInt(), any(), any(), any());

}
 
Example 9
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testThatCompoundRespectsLowerCaseInputTrue() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            true, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "W1", false);
    addTerm(query, "W2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    rewriter.rewrite(expandedQuery);

    verify(wordBreakSpellChecker).suggestWordCombinations(eq(new Term[] {
            new Term("field1", "w1"), new Term("field1", "w2")}), anyInt(), any(), any());

}
 
Example 10
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testThatCompoundRespectsLowerCaseInputFalse() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "W1", false);
    addTerm(query, "W2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    rewriter.rewrite(expandedQuery);

    verify(wordBreakSpellChecker).suggestWordCombinations(eq(new Term[] {
            new Term("field1", "W1"), new Term("field1", "W2")}), anyInt(), any(), any());

}
 
Example 11
private List<String> getUsingSpellcheck(String searchQuery) throws IOException {
	SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS);
	List<String> result = new ArrayList<>();
	for(SuggestWord suggestion : suggestions) {
		result.add(suggestion.string);
	}
	return result;
}
 
Example 12
Source Project: querqy   Source File: WordBreakCompoundRewriter.java    License: Apache License 2.0 5 votes vote down vote up
protected int countCollatedMatches(final SuggestWord[] suggestion, final IndexSearcher searcher) {
    org.apache.lucene.search.BooleanQuery.Builder builder = new org.apache.lucene.search.BooleanQuery.Builder();
    for (final SuggestWord word : suggestion) {
        builder.add(new org.apache.lucene.search.BooleanClause(
                new TermQuery(new org.apache.lucene.index.Term(dictionaryField, word.string)),
                org.apache.lucene.search.BooleanClause.Occur.FILTER));
    }

    try {
        return searcher.count(builder.build());
    } catch (final IOException e) {
        throw new RuntimeException(e);
    }
}
 
Example 13
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDecompoundSingleTokenIntoOneTwoTokenAlternative() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { decompoundSuggestion("w1", "w2") });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1w2", false),
                            bq(
                                    dmq(must(), term("w1", true)),
                                    dmq(must(), term("w2", true))
                            )

                    )

            )
    );
}
 
Example 14
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
    public void testThatGeneratedSecondTermIsNotCompounded() throws IOException {
        // don't de-compound
        when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
                .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

        // compound of terms at idx 0+1
//        when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
//                .thenReturn(new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });


        WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
                false, false, new TrieMap<>(), 5, false);
        Query query = new Query();
        addTerm(query, "w1", false);
        addTerm(query, "w2", true);

        ExpandedQuery expandedQuery = new ExpandedQuery(query);

        final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

        assertThat((Query) rewritten.getUserQuery(),
                bq(
                        dmq(
                                term("w1", false)

                        ),
                        dmq(
                                term("w2", true)

                        )

                )
        );
    }
 
Example 15
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
    public void testThatGeneratedFirstTermIsNotCompounded() throws IOException {
        // don't de-compound
        when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
                .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

        // compound of terms at idx 0+1
//        when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
//                .thenReturn(new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });


        WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
                false, false, new TrieMap<>(), 5, false);
        Query query = new Query();
        addTerm(query, "w1", true);
        addTerm(query, "w2", false);

        ExpandedQuery expandedQuery = new ExpandedQuery(query);

        final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

        assertThat((Query) rewritten.getUserQuery(),
                bq(
                        dmq(
                                term("w1", true)

                        ),
                        dmq(
                                term("w2", false)

                        )

                )
        );
    }
 
Example 16
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDecompoundSingleTokenIntoTwoTwoTokenAlternatives() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { decompoundSuggestion("w1", "w2"), decompoundSuggestion("w", "1w2") });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1w2", false),
                            bq(
                                    dmq(must(), term("w1", true)),
                                    dmq(must(), term("w2", true))
                            ),
                            bq(
                                    dmq(must(), term("w", true)),
                                    dmq(must(), term("1w2", true))
                            )

                    )

            )
    );
}
 
Example 17
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testThatOnlyMaxExpansionsAreApplied() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { decompoundSuggestion("w3", "w4"), decompoundSuggestion("w", "3w4"),
                    decompoundSuggestion("w3w", "4") });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 2, false);
    Query query = new Query();
    addTerm(query, "w3w4", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w3w4", false),
                            bq(
                                    dmq(must(), term("w3", true)),
                                    dmq(must(), term("w4", true))
                            ),
                            bq(
                                    dmq(must(), term("w", true)),
                                    dmq(must(), term("3w4", true))
                            )

                    )

            )
    );
}
 
Example 18
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCompoundTwoInputTokensOnly() throws IOException {
    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    // compound of terms at idx 0+1
    when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
            .thenReturn(new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w1w2", true)
                    ),
                    dmq(
                            term("w2", false),
                            term("w1w2", true)
                    )

            )
    );
}
 
Example 19
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testNoCompoundForTwoInputTokensOnly() throws IOException {
    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
            .thenReturn(new  CombineSuggestion[] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "w2", false);


    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false)
                    ),
                    dmq(
                            term("w2", false)
                    )

            )
    );
}
 
Example 20
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testAlwaysAddReverseCompoundsForTwoWordInput() throws IOException {

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w1", "w2"), new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });
    suggestions.put(Arrays.asList("w2", "w1"), new  CombineSuggestion[] { combineSuggestion("w2w1", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, true, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w1w2", true),
                            term("w2w1", true)
                    ),

                    dmq(
                            term("w2", false),
                            term("w1w2", true),
                            term("w2w1", true)
                    )

            )
    );
}
 
Example 21
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSingleReverseCompoundTriggerWord() throws IOException {
    TrieMap<Boolean> triggerWords = new TrieMap<>();
    triggerWords.put("trigger", true);

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w3", "w1"), new  CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, triggerWords, 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "trigger", false);
    addTerm(query, "w3", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true)
                    )

            )
    );
}
 
Example 22
Source Project: lucene-solr   Source File: DirectSolrSpellChecker.java    License: Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings({"unchecked"})
public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) {

  SolrParams params = config.toSolrParams();

  log.info("init: {}", config);
  String name = super.init(config, core);
  
  Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR;
  String compClass = (String) config.get(COMPARATOR_CLASS);
  if (compClass != null) {
    if (compClass.equalsIgnoreCase(SCORE_COMP))
      comp = SuggestWordQueue.DEFAULT_COMPARATOR;
    else if (compClass.equalsIgnoreCase(FREQ_COMP))
      comp = new SuggestWordFrequencyComparator();
    else //must be a FQCN
      comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
  }
  
  StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
  String distClass = (String) config.get(STRING_DISTANCE);
  if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE))
    sd = core.getResourceLoader().newInstance(distClass, StringDistance.class);

  float minAccuracy = DEFAULT_ACCURACY;
  Float accuracy = params.getFloat(ACCURACY);
  if (accuracy != null)
    minAccuracy = accuracy;
  
  int maxEdits = DEFAULT_MAXEDITS;
  Integer edits = params.getInt(MAXEDITS);
  if (edits != null)
    maxEdits = edits;
  
  int minPrefix = DEFAULT_MINPREFIX;
  Integer prefix = params.getInt(MINPREFIX);
  if (prefix != null)
    minPrefix = prefix;
  
  int maxInspections = DEFAULT_MAXINSPECTIONS;
  Integer inspections = params.getInt(MAXINSPECTIONS);
  if (inspections != null)
    maxInspections = inspections;
  
  float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
  Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY);
  if (threshold != null)
    minThreshold = threshold;
  
  int minQueryLength = DEFAULT_MINQUERYLENGTH;
  Integer queryLength = params.getInt(MINQUERYLENGTH);
  if (queryLength != null)
    minQueryLength = queryLength;

  int maxQueryLength = DEFAULT_MAXQUERYLENGTH;
  Integer overriddenMaxQueryLength = params.getInt(MAXQUERYLENGTH);
  if (overriddenMaxQueryLength != null)
    maxQueryLength = overriddenMaxQueryLength;
  
  float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
  Float queryFreq = params.getFloat(MAXQUERYFREQUENCY);
  if (queryFreq != null)
    maxQueryFrequency = queryFreq;
  
  checker.setComparator(comp);
  checker.setDistance(sd);
  checker.setMaxEdits(maxEdits);
  checker.setMinPrefix(minPrefix);
  checker.setAccuracy(minAccuracy);
  checker.setThresholdFrequency(minThreshold);
  checker.setMaxInspections(maxInspections);
  checker.setMinQueryLength(minQueryLength);
  checker.setMaxQueryLength(maxQueryLength);
  checker.setMaxQueryFrequency(maxQueryFrequency);
  checker.setLowerCaseTerms(false);
  
  return name;
}
 
Example 23
Source Project: lucene-solr   Source File: DirectSolrSpellChecker.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public SpellingResult getSuggestions(SpellingOptions options)
    throws IOException {
  log.debug("getSuggestions: {}", options.tokens);
      
  SpellingResult result = new SpellingResult();
  float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
  
  for (Token token : options.tokens) {
    String tokenText = token.toString();
    Term term = new Term(field, tokenText);
    int freq = options.reader.docFreq(term);
    int count = (options.alternativeTermCount > 0 && freq > 0) ? options.alternativeTermCount: options.count;
    SuggestWord[] suggestions = checker.suggestSimilar(term, count,options.reader, options.suggestMode, accuracy);
    result.addFrequency(token, freq);
          
    // If considering alternatives to "correctly-spelled" terms, then add the
    // original as a viable suggestion.
    if (options.alternativeTermCount > 0 && freq > 0) {
      boolean foundOriginal = false;
      SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1];
      for (int i = 0; i < suggestions.length; i++) {
        if (suggestions[i].string.equals(tokenText)) {
          foundOriginal = true;
          break;
        }
        suggestionsWithOrig[i + 1] = suggestions[i];
      }
      if (!foundOriginal) {
        SuggestWord orig = new SuggestWord();
        orig.freq = freq;
        orig.string = tokenText;
        suggestionsWithOrig[0] = orig;
        suggestions = suggestionsWithOrig;
      }
    }      
    if(suggestions.length==0 && freq==0) {
      List<String> empty = Collections.emptyList();
      result.add(token, empty);
    } else {        
      for (SuggestWord suggestion : suggestions) {
        result.add(token, suggestion.string, suggestion.freq);
      }
    }
  }
  return result;
}
 
Example 24
Source Project: lucene-solr   Source File: SampleComparator.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public int compare(SuggestWord suggestWord, SuggestWord suggestWord1) {
  return suggestWord.string.compareTo(suggestWord1.string);
}
 
Example 25
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testThatCompoundingIfGeneratedIsMixedIn() throws IOException {
    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    // compound of terms at idx 0+1
    when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
            .thenReturn(new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });


    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "w2g", true);
    addTerm(query, "w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w1w2", true)

                    ),
                    dmq(
                            term("w2g", true)

                    ),
                    dmq(
                            term("w2", false),
                            term("w1w2", true)

                    )


            )
    );
}
 
Example 26
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testCompoundTriggerWordWithLowerCaseInputSetToFalse() throws IOException {
    TrieMap<Boolean> triggerWords = new TrieMap<>();
    triggerWords.put("Trigger_Upper", true);
    triggerWords.put("trigger_lower", true);

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w3", "w1"), new  CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, triggerWords, 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "Trigger_Upper", false);
    addTerm(query, "w3", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true)
                    )

            )
    );

    Query query2 = new Query();
    addTerm(query2, "w1", false);
    addTerm(query2, "trigger_upper", false);
    addTerm(query2, "w3", false);

    ExpandedQuery expandedQuery2 = new ExpandedQuery(query2);

    final ExpandedQuery rewritten2 = rewriter.rewrite(expandedQuery2);

    assertThat((Query) rewritten2.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false)
                    ),
                    dmq(
                            term("trigger_upper", false)
                    ),
                    dmq(
                            term("w3", false)
                    )

            )
    );

    Query query3 = new Query();
    addTerm(query3, "w1", false);
    addTerm(query3, "Trigger_Lower", false);
    addTerm(query3, "w3", false);

    ExpandedQuery expandedQuery3 = new ExpandedQuery(query3);

    final ExpandedQuery rewritten3 = rewriter.rewrite(expandedQuery3);

    assertThat((Query) rewritten3.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false)
                    ),
                    dmq(
                            term("Trigger_Lower", false)
                    ),
                    dmq(
                            term("w3", false)
                    )

            )
    );
}
 
Example 27
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testCompoundTriggerWordWithLowerCaseInputSetToTrue() throws IOException {
    TrieMap<Boolean> triggerWords = new TrieMap<>();
    triggerWords.put("trigger_lower", true);

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w3", "w1"), new  CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            true, false, triggerWords, 5, false);

    Query query1 = new Query();
    addTerm(query1, "w1", false);
    addTerm(query1, "trigger_lower", false);
    addTerm(query1, "w3", false);

    ExpandedQuery expandedQuery1 = new ExpandedQuery(query1);

    final ExpandedQuery rewritten1 = rewriter.rewrite(expandedQuery1);

    assertThat((Query) rewritten1.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true)
                    )

            )
    );

    Query query2 = new Query();
    addTerm(query2, "w1", false);
    addTerm(query2, "Trigger_Lower", false);
    addTerm(query2, "w3", false);

    ExpandedQuery expandedQuery2 = new ExpandedQuery(query2);

    final ExpandedQuery rewritten2 = rewriter.rewrite(expandedQuery2);

    assertThat((Query) rewritten2.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true)
                    )

            )
    );
}
 
Example 28
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testCompoundTriggerAffectsOnlySurroundingCompound() throws IOException {
    TrieMap<Boolean> triggerWords = new TrieMap<>();
    triggerWords.put("trigger", true);

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w0", "w1"), new  CombineSuggestion[] { combineSuggestion("w0w1", 0, 1) });
    suggestions.put(Arrays.asList("w3", "w1"), new  CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
    suggestions.put(Arrays.asList("w3", "w4"), new  CombineSuggestion[] { combineSuggestion("w3w4", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, triggerWords, 5, false);
    Query query = new Query();
    addTerm(query, "w0", false);
    addTerm(query, "w1", false);
    addTerm(query, "trigger", false);
    addTerm(query, "w3", false);
    addTerm(query, "w4", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w0", false),
                            term("w0w1", true)
                    ),
                    dmq(
                            term("w1", false),
                            term("w0w1", true),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true),
                            term("w3w4", true)
                    ),
                    dmq(
                            term("w4", false),
                            term("w3w4", true)
                    )
            )
    );
}
 
Example 29
Source Project: querqy   Source File: WordBreakCompoundRewriterTest.java    License: Apache License 2.0 4 votes vote down vote up
private static SuggestWord[] decompoundSuggestion(String... parts) {
    return Arrays.stream(parts).map(WordBreakCompoundRewriterTest::suggestWord).toArray(SuggestWord[]::new);
}