org.apache.lucene.search.spell.SuggestWord Java Examples

The following examples show how to use org.apache.lucene.search.spell.SuggestWord. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DirectCandidateGenerator.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public CandidateSet drawCandidates(CandidateSet set) throws IOException {
    Candidate original = set.originalTerm;
    BytesRef term = preFilter(original.term, spare, byteSpare);
    final long frequency = original.frequency;
    spellchecker.setThresholdFrequency(this.suggestMode == SuggestMode.SUGGEST_ALWAYS ? 0 : thresholdFrequency(frequency, dictSize));
    SuggestWord[] suggestSimilar = spellchecker.suggestSimilar(new Term(field, term), numCandidates, reader, this.suggestMode);
    List<Candidate> candidates = new ArrayList<>(suggestSimilar.length);
    for (int i = 0; i < suggestSimilar.length; i++) {
        SuggestWord suggestWord = suggestSimilar[i];
        BytesRef candidate = new BytesRef(suggestWord.string);
        postFilter(new Candidate(candidate, internalFrequency(candidate), suggestWord.score, score(suggestWord.freq, suggestWord.score, dictSize), false), spare, byteSpare, candidates);
    }
    set.addCandidates(candidates);
    return set;
}
 
Example #2
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Test
public void testThatDecompoundRespectsLowerCaseInputFalse() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "W1w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    rewriter.rewrite(expandedQuery);

    verify(wordBreakSpellChecker).suggestWordBreaks(eq(new Term("field1", "W1w2")), anyInt(), any(), any(), any());

}
 
Example #3
Source File: TermSuggester.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = SuggestUtils.getDirectSpellChecker(suggestion.getDirectSpellCheckerSettings());
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(
            name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort()
    );
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(
                token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode()
        );
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
 
Example #4
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Test
public void testThatCompoundRespectsLowerCaseInputFalse() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "W1", false);
    addTerm(query, "W2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    rewriter.rewrite(expandedQuery);

    verify(wordBreakSpellChecker).suggestWordCombinations(eq(new Term[] {
            new Term("field1", "W1"), new Term("field1", "W2")}), anyInt(), any(), any());

}
 
Example #5
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Test
public void testThatCompoundRespectsLowerCaseInputTrue() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            true, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "W1", false);
    addTerm(query, "W2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    rewriter.rewrite(expandedQuery);

    verify(wordBreakSpellChecker).suggestWordCombinations(eq(new Term[] {
            new Term("field1", "w1"), new Term("field1", "w2")}), anyInt(), any(), any());

}
 
Example #6
Source File: IndexBasedSpellCheckerTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Test
public void testComparator() throws Exception {
  SpellCheckComponent component = (SpellCheckComponent) h.getCore().getSearchComponent("spellcheck");
  assertNotNull(component);
  AbstractLuceneSpellChecker spellChecker;
  Comparator<SuggestWord> comp;
  spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("freq");
  assertNotNull(spellChecker);
  comp = spellChecker.getSpellChecker().getComparator();
  assertNotNull(comp);
  assertTrue(comp instanceof SuggestWordFrequencyComparator);

  spellChecker = (AbstractLuceneSpellChecker) component.getSpellChecker("fqcn");
  assertNotNull(spellChecker);
  comp = spellChecker.getSpellChecker().getComparator();
  assertNotNull(comp);
  assertTrue(comp instanceof SampleComparator);


}
 
Example #7
Source File: WordBreakCompoundRewriter.java    From querqy with Apache License 2.0 6 votes vote down vote up
protected void decompound(final Term term) {
    // determine the nodesToAdd based on the term
    try {

        for (final SuggestWord[] decompounded : suggestWordbreaks(term)) {

            if (decompounded != null && decompounded.length > 0) {

                final BooleanQuery bq = new BooleanQuery(term.getParent(), Clause.Occur.SHOULD, true);

                for (final SuggestWord word : decompounded) {
                    final DisjunctionMaxQuery dmq = new DisjunctionMaxQuery(bq, Clause.Occur.MUST, true);
                    bq.addClause(dmq);
                    dmq.addClause(new Term(dmq, term.getField(), word.string, true));
                }
                nodesToAdd.add(bq);

            }

        }

    } catch (final IOException e) {
        // IO is broken, this looks serious -> throw as RTE
        throw new RuntimeException("Error decompounding " + term, e);
    }
}
 
Example #8
Source File: WordBreakCompoundRewriter.java    From querqy with Apache License 2.0 6 votes vote down vote up
protected List<SuggestWord[]> suggestWordbreaks(final Term term) throws IOException {
    final SuggestWord[][] rawSuggestions = wordBreakSpellChecker
            .suggestWordBreaks(toLuceneTerm(term), decompoundsToQuery, indexReader, SuggestMode.SUGGEST_ALWAYS,
                    WordBreakSpellChecker.BreakSuggestionSortMethod.NUM_CHANGES_THEN_MAX_FREQUENCY);

    if (rawSuggestions.length == 0) {
        return Collections.emptyList();
    }

    if (!verifyDecompoundCollation) {
        return Arrays.stream(rawSuggestions)
                .filter(suggestion -> suggestion != null && suggestion.length > 1)
                .limit(maxDecompoundExpansions).collect(Collectors.toList());
    }

    final IndexSearcher searcher = new IndexSearcher(indexReader);
    return Arrays.stream(rawSuggestions)
            .filter(suggestion -> suggestion != null && suggestion.length > 1)
            .map(suggestion -> new MaxSortable<>(suggestion, countCollatedMatches(suggestion, searcher)))
            .filter(sortable -> sortable.count > 0)
            .sorted()
            .limit(maxDecompoundExpansions) // TODO: use PriorityQueue
            .map(sortable -> sortable.obj)
            .collect(Collectors.toList());

}
 
Example #9
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Test
public void testNoDecompoundForSingleToken() throws IOException {

    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1w2", false);


    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1w2", false)
                    )

            )
    );

}
 
Example #10
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 6 votes vote down vote up
@Test
public void testThatDecompoundRespectsLowerCaseInputTrue() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            true, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "W1w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    rewriter.rewrite(expandedQuery);

    verify(wordBreakSpellChecker).suggestWordBreaks(eq(new Term("field1", "w1w2")), anyInt(), any(), any(), any());

}
 
Example #11
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecompoundSingleTokenIntoOneTwoTokenAlternative() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { decompoundSuggestion("w1", "w2") });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1w2", false),
                            bq(
                                    dmq(must(), term("w1", true)),
                                    dmq(must(), term("w2", true))
                            )

                    )

            )
    );
}
 
Example #12
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
public void testSingleReverseCompoundTriggerWord() throws IOException {
    TrieMap<Boolean> triggerWords = new TrieMap<>();
    triggerWords.put("trigger", true);

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w3", "w1"), new  CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, triggerWords, 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "trigger", false);
    addTerm(query, "w3", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true)
                    )

            )
    );
}
 
Example #13
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
public void testAlwaysAddReverseCompoundsForTwoWordInput() throws IOException {

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w1", "w2"), new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });
    suggestions.put(Arrays.asList("w2", "w1"), new  CombineSuggestion[] { combineSuggestion("w2w1", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, true, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w1w2", true),
                            term("w2w1", true)
                    ),

                    dmq(
                            term("w2", false),
                            term("w1w2", true),
                            term("w2w1", true)
                    )

            )
    );
}
 
Example #14
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
public void testNoCompoundForTwoInputTokensOnly() throws IOException {
    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
            .thenReturn(new  CombineSuggestion[] { });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "w2", false);


    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false)
                    ),
                    dmq(
                            term("w2", false)
                    )

            )
    );
}
 
Example #15
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
public void testCompoundTwoInputTokensOnly() throws IOException {
    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    // compound of terms at idx 0+1
    when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
            .thenReturn(new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w1w2", true)
                    ),
                    dmq(
                            term("w2", false),
                            term("w1w2", true)
                    )

            )
    );
}
 
Example #16
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
public void testThatOnlyMaxExpansionsAreApplied() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { decompoundSuggestion("w3", "w4"), decompoundSuggestion("w", "3w4"),
                    decompoundSuggestion("w3w", "4") });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 2, false);
    Query query = new Query();
    addTerm(query, "w3w4", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w3w4", false),
                            bq(
                                    dmq(must(), term("w3", true)),
                                    dmq(must(), term("w4", true))
                            ),
                            bq(
                                    dmq(must(), term("w", true)),
                                    dmq(must(), term("3w4", true))
                            )

                    )

            )
    );
}
 
Example #17
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
public void testDecompoundSingleTokenIntoTwoTwoTokenAlternatives() throws IOException {
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] { decompoundSuggestion("w1", "w2"), decompoundSuggestion("w", "1w2") });

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1w2", false),
                            bq(
                                    dmq(must(), term("w1", true)),
                                    dmq(must(), term("w2", true))
                            ),
                            bq(
                                    dmq(must(), term("w", true)),
                                    dmq(must(), term("1w2", true))
                            )

                    )

            )
    );
}
 
Example #18
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
    public void testThatGeneratedFirstTermIsNotCompounded() throws IOException {
        // don't de-compound
        when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
                .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

        // compound of terms at idx 0+1
//        when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
//                .thenReturn(new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });


        WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
                false, false, new TrieMap<>(), 5, false);
        Query query = new Query();
        addTerm(query, "w1", true);
        addTerm(query, "w2", false);

        ExpandedQuery expandedQuery = new ExpandedQuery(query);

        final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

        assertThat((Query) rewritten.getUserQuery(),
                bq(
                        dmq(
                                term("w1", true)

                        ),
                        dmq(
                                term("w2", false)

                        )

                )
        );
    }
 
Example #19
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 5 votes vote down vote up
@Test
    public void testThatGeneratedSecondTermIsNotCompounded() throws IOException {
        // don't de-compound
        when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
                .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

        // compound of terms at idx 0+1
//        when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
//                .thenReturn(new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });


        WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
                false, false, new TrieMap<>(), 5, false);
        Query query = new Query();
        addTerm(query, "w1", false);
        addTerm(query, "w2", true);

        ExpandedQuery expandedQuery = new ExpandedQuery(query);

        final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

        assertThat((Query) rewritten.getUserQuery(),
                bq(
                        dmq(
                                term("w1", false)

                        ),
                        dmq(
                                term("w2", true)

                        )

                )
        );
    }
 
Example #20
Source File: WordBreakCompoundRewriter.java    From querqy with Apache License 2.0 5 votes vote down vote up
protected int countCollatedMatches(final SuggestWord[] suggestion, final IndexSearcher searcher) {
    org.apache.lucene.search.BooleanQuery.Builder builder = new org.apache.lucene.search.BooleanQuery.Builder();
    for (final SuggestWord word : suggestion) {
        builder.add(new org.apache.lucene.search.BooleanClause(
                new TermQuery(new org.apache.lucene.index.Term(dictionaryField, word.string)),
                org.apache.lucene.search.BooleanClause.Occur.FILTER));
    }

    try {
        return searcher.count(builder.build());
    } catch (final IOException e) {
        throw new RuntimeException(e);
    }
}
 
Example #21
Source File: LuceneWordSearch.java    From preDict with GNU Lesser General Public License v3.0 5 votes vote down vote up
private List<String> getUsingSpellcheck(String searchQuery) throws IOException {
	SuggestWord[] suggestions = spellChecker.suggestSimilar(new Term(WORD_FIELD, searchQuery), 2, reader, SuggestMode.SUGGEST_ALWAYS);
	List<String> result = new ArrayList<>();
	for(SuggestWord suggestion : suggestions) {
		result.add(suggestion.string);
	}
	return result;
}
 
Example #22
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 4 votes vote down vote up
private static SuggestWord[] decompoundSuggestion(String... parts) {
    return Arrays.stream(parts).map(WordBreakCompoundRewriterTest::suggestWord).toArray(SuggestWord[]::new);
}
 
Example #23
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 4 votes vote down vote up
@Test
public void testCompoundTriggerAffectsOnlySurroundingCompound() throws IOException {
    TrieMap<Boolean> triggerWords = new TrieMap<>();
    triggerWords.put("trigger", true);

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w0", "w1"), new  CombineSuggestion[] { combineSuggestion("w0w1", 0, 1) });
    suggestions.put(Arrays.asList("w3", "w1"), new  CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
    suggestions.put(Arrays.asList("w3", "w4"), new  CombineSuggestion[] { combineSuggestion("w3w4", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, triggerWords, 5, false);
    Query query = new Query();
    addTerm(query, "w0", false);
    addTerm(query, "w1", false);
    addTerm(query, "trigger", false);
    addTerm(query, "w3", false);
    addTerm(query, "w4", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w0", false),
                            term("w0w1", true)
                    ),
                    dmq(
                            term("w1", false),
                            term("w0w1", true),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true),
                            term("w3w4", true)
                    ),
                    dmq(
                            term("w4", false),
                            term("w3w4", true)
                    )
            )
    );
}
 
Example #24
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 4 votes vote down vote up
@Test
public void testCompoundTriggerWordWithLowerCaseInputSetToTrue() throws IOException {
    TrieMap<Boolean> triggerWords = new TrieMap<>();
    triggerWords.put("trigger_lower", true);

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w3", "w1"), new  CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            true, false, triggerWords, 5, false);

    Query query1 = new Query();
    addTerm(query1, "w1", false);
    addTerm(query1, "trigger_lower", false);
    addTerm(query1, "w3", false);

    ExpandedQuery expandedQuery1 = new ExpandedQuery(query1);

    final ExpandedQuery rewritten1 = rewriter.rewrite(expandedQuery1);

    assertThat((Query) rewritten1.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true)
                    )

            )
    );

    Query query2 = new Query();
    addTerm(query2, "w1", false);
    addTerm(query2, "Trigger_Lower", false);
    addTerm(query2, "w3", false);

    ExpandedQuery expandedQuery2 = new ExpandedQuery(query2);

    final ExpandedQuery rewritten2 = rewriter.rewrite(expandedQuery2);

    assertThat((Query) rewritten2.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true)
                    )

            )
    );
}
 
Example #25
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 4 votes vote down vote up
@Test
public void testCompoundTriggerWordWithLowerCaseInputSetToFalse() throws IOException {
    TrieMap<Boolean> triggerWords = new TrieMap<>();
    triggerWords.put("Trigger_Upper", true);
    triggerWords.put("trigger_lower", true);

    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    Map<List<String>, CombineSuggestion[]> suggestions = new HashMap<>();
    suggestions.put(Arrays.asList("w3", "w1"), new  CombineSuggestion[] { combineSuggestion("w3w1", 0, 1) });
    setupWordBreakMockWithCombinations(suggestions);

    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, triggerWords, 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "Trigger_Upper", false);
    addTerm(query, "w3", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w3w1", true)
                    ),
                    dmq(
                            term("w3", false),
                            term("w3w1", true)
                    )

            )
    );

    Query query2 = new Query();
    addTerm(query2, "w1", false);
    addTerm(query2, "trigger_upper", false);
    addTerm(query2, "w3", false);

    ExpandedQuery expandedQuery2 = new ExpandedQuery(query2);

    final ExpandedQuery rewritten2 = rewriter.rewrite(expandedQuery2);

    assertThat((Query) rewritten2.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false)
                    ),
                    dmq(
                            term("trigger_upper", false)
                    ),
                    dmq(
                            term("w3", false)
                    )

            )
    );

    Query query3 = new Query();
    addTerm(query3, "w1", false);
    addTerm(query3, "Trigger_Lower", false);
    addTerm(query3, "w3", false);

    ExpandedQuery expandedQuery3 = new ExpandedQuery(query3);

    final ExpandedQuery rewritten3 = rewriter.rewrite(expandedQuery3);

    assertThat((Query) rewritten3.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false)
                    ),
                    dmq(
                            term("Trigger_Lower", false)
                    ),
                    dmq(
                            term("w3", false)
                    )

            )
    );
}
 
Example #26
Source File: WordBreakCompoundRewriterTest.java    From querqy with Apache License 2.0 4 votes vote down vote up
@Test
public void testThatCompoundingIfGeneratedIsMixedIn() throws IOException {
    // don't de-compound
    when(wordBreakSpellChecker.suggestWordBreaks(any(), anyInt(), any(), any(), any()))
            .thenReturn(new SuggestWord[][] {new SuggestWord[] {}});

    // compound of terms at idx 0+1
    when(wordBreakSpellChecker.suggestWordCombinations(any(), anyInt(), any(), any()))
            .thenReturn(new  CombineSuggestion[] { combineSuggestion("w1w2", 0, 1) });


    WordBreakCompoundRewriter rewriter = new WordBreakCompoundRewriter(wordBreakSpellChecker, indexReader, "field1",
            false, false, new TrieMap<>(), 5, false);
    Query query = new Query();
    addTerm(query, "w1", false);
    addTerm(query, "w2g", true);
    addTerm(query, "w2", false);

    ExpandedQuery expandedQuery = new ExpandedQuery(query);

    final ExpandedQuery rewritten = rewriter.rewrite(expandedQuery);

    assertThat((Query) rewritten.getUserQuery(),
            bq(
                    dmq(
                            term("w1", false),
                            term("w1w2", true)

                    ),
                    dmq(
                            term("w2g", true)

                    ),
                    dmq(
                            term("w2", false),
                            term("w1w2", true)

                    )


            )
    );
}
 
Example #27
Source File: SampleComparator.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public int compare(SuggestWord suggestWord, SuggestWord suggestWord1) {
  return suggestWord.string.compareTo(suggestWord1.string);
}
 
Example #28
Source File: DirectSolrSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public SpellingResult getSuggestions(SpellingOptions options)
    throws IOException {
  log.debug("getSuggestions: {}", options.tokens);
      
  SpellingResult result = new SpellingResult();
  float accuracy = (options.accuracy == Float.MIN_VALUE) ? checker.getAccuracy() : options.accuracy;
  
  for (Token token : options.tokens) {
    String tokenText = token.toString();
    Term term = new Term(field, tokenText);
    int freq = options.reader.docFreq(term);
    int count = (options.alternativeTermCount > 0 && freq > 0) ? options.alternativeTermCount: options.count;
    SuggestWord[] suggestions = checker.suggestSimilar(term, count,options.reader, options.suggestMode, accuracy);
    result.addFrequency(token, freq);
          
    // If considering alternatives to "correctly-spelled" terms, then add the
    // original as a viable suggestion.
    if (options.alternativeTermCount > 0 && freq > 0) {
      boolean foundOriginal = false;
      SuggestWord[] suggestionsWithOrig = new SuggestWord[suggestions.length + 1];
      for (int i = 0; i < suggestions.length; i++) {
        if (suggestions[i].string.equals(tokenText)) {
          foundOriginal = true;
          break;
        }
        suggestionsWithOrig[i + 1] = suggestions[i];
      }
      if (!foundOriginal) {
        SuggestWord orig = new SuggestWord();
        orig.freq = freq;
        orig.string = tokenText;
        suggestionsWithOrig[0] = orig;
        suggestions = suggestionsWithOrig;
      }
    }      
    if(suggestions.length==0 && freq==0) {
      List<String> empty = Collections.emptyList();
      result.add(token, empty);
    } else {        
      for (SuggestWord suggestion : suggestions) {
        result.add(token, suggestion.string, suggestion.freq);
      }
    }
  }
  return result;
}
 
Example #29
Source File: DirectSolrSpellChecker.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings({"unchecked"})
public String init(@SuppressWarnings({"rawtypes"})NamedList config, SolrCore core) {

  SolrParams params = config.toSolrParams();

  log.info("init: {}", config);
  String name = super.init(config, core);
  
  Comparator<SuggestWord> comp = SuggestWordQueue.DEFAULT_COMPARATOR;
  String compClass = (String) config.get(COMPARATOR_CLASS);
  if (compClass != null) {
    if (compClass.equalsIgnoreCase(SCORE_COMP))
      comp = SuggestWordQueue.DEFAULT_COMPARATOR;
    else if (compClass.equalsIgnoreCase(FREQ_COMP))
      comp = new SuggestWordFrequencyComparator();
    else //must be a FQCN
      comp = (Comparator<SuggestWord>) core.getResourceLoader().newInstance(compClass, Comparator.class);
  }
  
  StringDistance sd = DirectSpellChecker.INTERNAL_LEVENSHTEIN;
  String distClass = (String) config.get(STRING_DISTANCE);
  if (distClass != null && !distClass.equalsIgnoreCase(INTERNAL_DISTANCE))
    sd = core.getResourceLoader().newInstance(distClass, StringDistance.class);

  float minAccuracy = DEFAULT_ACCURACY;
  Float accuracy = params.getFloat(ACCURACY);
  if (accuracy != null)
    minAccuracy = accuracy;
  
  int maxEdits = DEFAULT_MAXEDITS;
  Integer edits = params.getInt(MAXEDITS);
  if (edits != null)
    maxEdits = edits;
  
  int minPrefix = DEFAULT_MINPREFIX;
  Integer prefix = params.getInt(MINPREFIX);
  if (prefix != null)
    minPrefix = prefix;
  
  int maxInspections = DEFAULT_MAXINSPECTIONS;
  Integer inspections = params.getInt(MAXINSPECTIONS);
  if (inspections != null)
    maxInspections = inspections;
  
  float minThreshold = DEFAULT_THRESHOLD_TOKEN_FREQUENCY;
  Float threshold = params.getFloat(THRESHOLD_TOKEN_FREQUENCY);
  if (threshold != null)
    minThreshold = threshold;
  
  int minQueryLength = DEFAULT_MINQUERYLENGTH;
  Integer queryLength = params.getInt(MINQUERYLENGTH);
  if (queryLength != null)
    minQueryLength = queryLength;

  int maxQueryLength = DEFAULT_MAXQUERYLENGTH;
  Integer overriddenMaxQueryLength = params.getInt(MAXQUERYLENGTH);
  if (overriddenMaxQueryLength != null)
    maxQueryLength = overriddenMaxQueryLength;
  
  float maxQueryFrequency = DEFAULT_MAXQUERYFREQUENCY;
  Float queryFreq = params.getFloat(MAXQUERYFREQUENCY);
  if (queryFreq != null)
    maxQueryFrequency = queryFreq;
  
  checker.setComparator(comp);
  checker.setDistance(sd);
  checker.setMaxEdits(maxEdits);
  checker.setMinPrefix(minPrefix);
  checker.setAccuracy(minAccuracy);
  checker.setThresholdFrequency(minThreshold);
  checker.setMaxInspections(maxInspections);
  checker.setMinQueryLength(minQueryLength);
  checker.setMaxQueryLength(maxQueryLength);
  checker.setMaxQueryFrequency(maxQueryFrequency);
  checker.setLowerCaseTerms(false);
  
  return name;
}