Java Code Examples for org.apache.lucene.analysis.Analyzer
The following examples show how to use
org.apache.lucene.analysis.Analyzer.
These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: elasticsearch-plugin-bundle Author: jprante File: UnstemmedGermanNormalizationTests.java License: GNU Affero General Public License v3.0 | 6 votes |
public void testTwo() throws Exception { String source = "So wird's was: das Elasticsearch-Buch erscheint beim O'Reilly-Verlag."; String[] expected = { "wird's", "elasticsearch-buch", "elasticsearchbuch", "erscheint", "o'reilly-verlag", "o'reillyverlag" }; String resource = "unstemmed.json"; Settings settings = Settings.builder() .loadFromStream(resource, getClass().getResourceAsStream(resource), true) .build(); ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"), settings, new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin()); Analyzer analyzer = analysis.indexAnalyzers.get("default"); assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected); }
Example #2
Source Project: lucene-solr Author: apache File: TestPhraseQuery.java License: Apache License 2.0 | 6 votes |
public void testPhraseQueryWithStopAnalyzer() throws Exception { Directory directory = newDirectory(); Analyzer stopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(stopAnalyzer)); Document doc = new Document(); doc.add(newTextField("field", "the stop words are here", Field.Store.YES)); writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); IndexSearcher searcher = newSearcher(reader); // valid exact phrase query PhraseQuery query = new PhraseQuery("field", "stop", "words"); ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; assertEquals(1, hits.length); QueryUtils.check(random(), query,searcher); reader.close(); directory.close(); }
Example #3
Source Project: SearchServices Author: Alfresco File: LanguagePrefixedTokenStream.java License: GNU Lesser General Public License v3.0 | 6 votes |
/** * Returns the {@link Analyzer} associated with the given language. * The proper {@link Analyzer} is retrieved from the first field type not null in the following list: * * <ul> * <li>highlighted_text_ + locale (e.g. highlighted_text_en)</li> * <li>text_ + locale (e.g. text_en)</li> * <li>text___ (text general field)</li> * </ul> * * @param language the language code. * @return the {@link Analyzer} associated with the given language. */ Analyzer analyzer(String language) { FieldType localisedFieldType = ofNullable(indexSchema.getFieldTypeByName(highlightingFieldTypeName(language))) .orElseGet(() -> indexSchema.getFieldTypeByName(localisedFieldTypeName(language))); FieldType targetFieldType = ofNullable(localisedFieldType) .orElseGet(() -> indexSchema.getFieldTypeByName(FALLBACK_TEXT_FIELD_TYPE_NAME)); switch (mode) { case QUERY: return targetFieldType.getQueryAnalyzer(); case INDEX: default: return targetFieldType.getIndexAnalyzer(); } }
Example #4
Source Project: lucene-solr Author: apache File: TestDirectSpellChecker.java License: Apache License 2.0 | 6 votes |
public void testTransposition2() throws Exception { DirectSpellChecker spellChecker = new DirectSpellChecker(); Directory dir = newDirectory(); Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, analyzer); for (int i = 0; i < 20; i++) { Document doc = new Document(); doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO)); writer.addDocument(doc); } IndexReader ir = writer.getReader(); SuggestWord[] similar = spellChecker.suggestSimilar(new Term( "numbers", "seevntene"), 2, ir, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX); assertEquals(1, similar.length); assertEquals("seventeen", similar[0].string); IOUtils.close(ir, writer, dir, analyzer); }
Example #5
Source Project: lucene-solr Author: apache File: TestQPHelper.java License: Apache License 2.0 | 6 votes |
public void testBoost() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet); StandardQueryParser qp = new StandardQueryParser(); qp.setAnalyzer(oneStopAnalyzer); Query q = qp.parse("on^1.0", "field"); assertNotNull(q); q = qp.parse("\"hello\"^2.0", "field"); assertNotNull(q); assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("hello^2.0", "field"); assertNotNull(q); assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5); q = qp.parse("\"on\"^1.0", "field"); assertNotNull(q); StandardQueryParser qp2 = new StandardQueryParser(); qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET)); q = qp2.parse("the^3", "field"); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertMatchNoDocsQuery(q); assertFalse(q instanceof BoostQuery); }
Example #6
Source Project: lucene-solr Author: apache File: TestDocValuesIndexing.java License: Apache License 2.0 | 6 votes |
public void testAddSortedTwice() throws IOException { Analyzer analyzer = new MockAnalyzer(random()); Directory directory = newDirectory(); // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1 IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newLogMergePolicy()); IndexWriter iwriter = new IndexWriter(directory, iwc); Document doc = new Document(); doc.add(new SortedDocValuesField("dv", new BytesRef("foo!"))); iwriter.addDocument(doc); doc.add(new SortedDocValuesField("dv", new BytesRef("bar!"))); expectThrows(IllegalArgumentException.class, () -> { iwriter.addDocument(doc); }); IndexReader ir = iwriter.getReader(); assertEquals(1, ir.numDocs()); ir.close(); iwriter.close(); directory.close(); }
Example #7
Source Project: bioasq Author: oaqa File: LuceneDocumentRetrievalExecutor.java License: Apache License 2.0 | 6 votes |
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 100); // query constructor constructor = UimaContextHelper.createObjectFromConfigParameter(context, "query-string-constructor", "query-string-constructor-params", BooleanBagOfPhraseQueryStringConstructor.class, QueryStringConstructor.class); // lucene Analyzer analyzer = UimaContextHelper.createObjectFromConfigParameter(context, "query-analyzer", "query-analyzer-params", StandardAnalyzer.class, Analyzer.class); String[] fields = UimaContextHelper.getConfigParameterStringArrayValue(context, "fields"); parser = new MultiFieldQueryParser(fields, analyzer); String index = UimaContextHelper.getConfigParameterStringValue(context, "index"); try { reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); } catch (IOException e) { throw new ResourceInitializationException(e); } searcher = new IndexSearcher(reader); idFieldName = UimaContextHelper.getConfigParameterStringValue(context, "id-field", null); titleFieldName = UimaContextHelper.getConfigParameterStringValue(context, "title-field", null); textFieldName = UimaContextHelper.getConfigParameterStringValue(context, "text-field", null); uriPrefix = UimaContextHelper.getConfigParameterStringValue(context, "uri-prefix", null); }
Example #8
Source Project: gumtree-spoon-ast-diff Author: SpoonLabs File: right_IndexWriter_1.42.java License: Apache License 2.0 | 6 votes |
private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir) throws IOException { this.closeDir = closeDir; directory = d; analyzer = a; Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME); if (!writeLock.obtain(WRITE_LOCK_TIMEOUT)) // obtain write lock throw new IOException("Index locked for write: " + writeLock); this.writeLock = writeLock; // save it synchronized (directory) { // in- & inter-process sync new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) { public Object doBody() throws IOException { if (create) segmentInfos.write(directory); else segmentInfos.read(directory); return null; } }.run(); } }
Example #9
Source Project: elasticsearch-plugin-bundle Author: jprante File: IcuAnalysisTests.java License: GNU Affero General Public License v3.0 | 6 votes |
public void testDefaultsIcuAnalysis() throws IOException { TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new BundlePlugin(Settings.EMPTY)); CharFilterFactory charFilterFactory = analysis.charFilter.get("icu_normalizer"); assertThat(charFilterFactory, instanceOf(IcuNormalizerCharFilterFactory.class)); TokenizerFactory tf = analysis.tokenizer.get("icu_tokenizer"); assertThat(tf, instanceOf(IcuTokenizerFactory.class)); TokenFilterFactory filterFactory = analysis.tokenFilter.get("icu_normalizer"); assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class)); filterFactory = analysis.tokenFilter.get("icu_folding"); assertThat(filterFactory, instanceOf(IcuFoldingTokenFilterFactory.class)); filterFactory = analysis.tokenFilter.get("icu_transform"); assertThat(filterFactory, instanceOf(IcuTransformTokenFilterFactory.class)); Analyzer analyzer = analysis.indexAnalyzers.get( "icu_collation"); assertThat(analyzer, instanceOf(NamedAnalyzer.class)); }
Example #10
Source Project: minhash Author: codelibs File: MinHashTest.java License: Apache License 2.0 | 6 votes |
public void test_calculate_1bit_256funcs_seed0() throws IOException { final int hashBit = 1; final int seed = 0; final int num = 256; final Analyzer minhashAnalyzer = MinHash.createAnalyzer(hashBit, seed, num); final StringBuilder[] texts = createTexts(); final byte[][] data = createMinHashes(minhashAnalyzer, texts); assertEquals(1.0f, MinHash.compare(data[0], data[0])); assertEquals(0.90625f, MinHash.compare(data[0], data[1])); assertEquals(0.82421875f, MinHash.compare(data[0], data[2])); assertEquals(0.76953125f, MinHash.compare(data[0], data[3])); assertEquals(0.703125f, MinHash.compare(data[0], data[4])); assertEquals(0.625f, MinHash.compare(data[0], data[5])); assertEquals(0.6015625f, MinHash.compare(data[0], data[6])); assertEquals(0.55078125f, MinHash.compare(data[0], data[7])); assertEquals(0.53125f, MinHash.compare(data[0], data[8])); assertEquals(0.51171875f, MinHash.compare(data[0], data[9])); }
Example #11
Source Project: lucene-solr Author: apache File: PayloadUtils.java License: Apache License 2.0 | 6 votes |
public static String getPayloadEncoder(FieldType fieldType) { // TODO: support custom payload encoding fields too somehow - maybe someone has a custom component that encodes payloads as floats String encoder = null; Analyzer a = fieldType.getIndexAnalyzer(); if (a instanceof TokenizerChain) { // examine the indexing analysis chain for DelimitedPayloadTokenFilterFactory or NumericPayloadTokenFilterFactory TokenizerChain tc = (TokenizerChain)a; TokenFilterFactory[] factories = tc.getTokenFilterFactories(); for (TokenFilterFactory factory : factories) { if (factory instanceof DelimitedPayloadTokenFilterFactory) { encoder = factory.getOriginalArgs().get(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR); break; } if (factory instanceof NumericPayloadTokenFilterFactory) { // encodes using `PayloadHelper.encodeFloat(payload)` encoder = "float"; break; } } } return encoder; }
Example #12
Source Project: crate Author: crate File: SynonymTokenFilterFactory.java License: Apache License 2.0 | 5 votes |
protected Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters, List<TokenFilterFactory> tokenFilters) { return new CustomAnalyzer("synonyms", tokenizer, charFilters.toArray(new CharFilterFactory[0]), tokenFilters.stream() .map(TokenFilterFactory::getSynonymFilter) .toArray(TokenFilterFactory[]::new)); }
Example #13
Source Project: lucene-solr Author: apache File: AnalyzingSuggesterTest.java License: Apache License 2.0 | 5 votes |
public void testQueueExhaustion() throws Exception { Analyzer a = new MockAnalyzer(random()); Directory tempDir = getDirectory(); AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true); suggester.build(new InputArrayIterator(new Input[] { new Input("a", 2), new Input("a b c", 3), new Input("a c a", 1), new Input("a c b", 1), })); suggester.lookup("a", false, 4); IOUtils.close(a, tempDir); }
Example #14
Source Project: lucene-solr Author: apache File: QueryParserTestBase.java License: Apache License 2.0 | 5 votes |
public void testQueryStringEscaping() throws Exception { Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); assertEscapedQueryEquals("a-b:c", a, "a\\-b\\:c"); assertEscapedQueryEquals("a+b:c", a, "a\\+b\\:c"); assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c"); assertEscapedQueryEquals("a\\b:c", a, "a\\\\b\\:c"); assertEscapedQueryEquals("a:b-c", a, "a\\:b\\-c"); assertEscapedQueryEquals("a:b+c", a, "a\\:b\\+c"); assertEscapedQueryEquals("a:b:c", a, "a\\:b\\:c"); assertEscapedQueryEquals("a:b\\c", a, "a\\:b\\\\c"); assertEscapedQueryEquals("a:b-c*", a, "a\\:b\\-c\\*"); assertEscapedQueryEquals("a:b+c*", a, "a\\:b\\+c\\*"); assertEscapedQueryEquals("a:b:c*", a, "a\\:b\\:c\\*"); assertEscapedQueryEquals("a:b\\\\c*", a, "a\\:b\\\\\\\\c\\*"); assertEscapedQueryEquals("a:b-?c", a, "a\\:b\\-\\?c"); assertEscapedQueryEquals("a:b+?c", a, "a\\:b\\+\\?c"); assertEscapedQueryEquals("a:b:?c", a, "a\\:b\\:\\?c"); assertEscapedQueryEquals("a:b?c", a, "a\\:b\\?c"); assertEscapedQueryEquals("a:b-c~", a, "a\\:b\\-c\\~"); assertEscapedQueryEquals("a:b+c~", a, "a\\:b\\+c\\~"); assertEscapedQueryEquals("a:b:c~", a, "a\\:b\\:c\\~"); assertEscapedQueryEquals("a:b\\c~", a, "a\\:b\\\\c\\~"); assertEscapedQueryEquals("[ a - TO a+ ]", null, "\\[ a \\- TO a\\+ \\]"); assertEscapedQueryEquals("[ a : TO a~ ]", null, "\\[ a \\: TO a\\~ \\]"); assertEscapedQueryEquals("[ a\\ TO a* ]", null, "\\[ a\\\\ TO a\\* \\]"); // LUCENE-881 assertEscapedQueryEquals("|| abc ||", a, "\\|\\| abc \\|\\|"); assertEscapedQueryEquals("&& abc &&", a, "\\&\\& abc \\&\\&"); }
Example #15
Source Project: lucene-solr Author: apache File: TestArabicStemFilter.java License: Apache License 2.0 | 5 votes |
public void testEmptyTerm() throws IOException { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, new ArabicStemFilter(tokenizer)); } }; checkOneTerm(a, "", ""); a.close(); }
Example #16
Source Project: lucene-solr Author: apache File: TestWordDelimiterGraphFilter.java License: Apache License 2.0 | 5 votes |
private Analyzer getAnalyzer(int flags, CharArraySet protectedWords) { return new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, new WordDelimiterGraphFilter(tokenizer, flags, protectedWords)); } }; }
Example #17
Source Project: elasticsearch-plugin-bundle Author: jprante File: SegmentationIcuTokenizerTests.java License: GNU Affero General Public License v3.0 | 5 votes |
public void testAmharic() throws Exception { Analyzer a = createAnalyzer(); assertAnalyzesTo(a, "ዊኪፔድያ የባለ ብዙ ቋንቋ የተሟላ ትክክለኛና ነጻ መዝገበ ዕውቀት (ኢንሳይክሎፒዲያ) ነው። ማንኛውም", new String[] { "ዊኪፔድያ", "የባለ", "ብዙ", "ቋንቋ", "የተሟላ", "ትክክለኛና", "ነጻ", "መዝገበ", "ዕውቀት", "ኢንሳይክሎፒዲያ", "ነው", "ማንኛውም" } ); destroyAnalzyer(a); }
Example #18
Source Project: lucene-solr Author: apache File: TestMemoryIndexAgainstDirectory.java License: Apache License 2.0 | 5 votes |
/** * Run all queries against both the Directory and MemoryIndex, ensuring they are the same. */ public void assertAllQueries(MemoryIndex memory, Directory directory, Analyzer analyzer) throws Exception { IndexReader reader = DirectoryReader.open(directory); IndexSearcher ram = newSearcher(reader); IndexSearcher mem = memory.createSearcher(); QueryParser qp = new QueryParser("foo", analyzer); for (String query : queries) { TopDocs ramDocs = ram.search(qp.parse(query), 1); TopDocs memDocs = mem.search(qp.parse(query), 1); assertEquals(query, ramDocs.totalHits.value, memDocs.totalHits.value); } reader.close(); }
Example #19
Source Project: uyuni Author: uyuni-project File: IndexManager.java License: GNU General Public License v2.0 | 5 votes |
private Analyzer getHardwareDeviceAnalyzer() { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new NGramAnalyzer(min_ngram, max_ngram)); analyzer.addAnalyzer("id", new KeywordAnalyzer()); analyzer.addAnalyzer("serverId", new KeywordAnalyzer()); analyzer.addAnalyzer("pciType", new KeywordAnalyzer()); return analyzer; }
Example #20
Source Project: lucene-solr Author: apache File: TestSynonymGraphFilter.java License: Apache License 2.0 | 5 votes |
public void testSynAtEnd() throws Exception { SynonymMap.Builder b = new SynonymMap.Builder(); add(b, "a b", "x", true); Analyzer a = getAnalyzer(b, true); assertAnalyzesTo(a, "c d e a b", new String[] {"c", "d", "e", "x", "a", "b"}, new int[] { 0, 2, 4, 6, 6, 8}, new int[] { 1, 3, 5, 9, 7, 9}, new String[] {"word", "word", "word", "SYNONYM", "word", "word"}, new int[] { 1, 1, 1, 1, 0, 1}, new int[] { 1, 1, 1, 2, 1, 1}); a.close(); }
Example #21
Source Project: cxf Author: apache File: TikaLuceneContentExtractorTest.java License: Apache License 2.0 | 5 votes |
@Before public void setUp() throws Exception { final Analyzer analyzer = new StandardAnalyzer(); tempDirectory = Files.createTempDirectory("lucene"); directory = new MMapDirectory(tempDirectory); IndexWriterConfig config = new IndexWriterConfig(analyzer); writer = new IndexWriter(directory, config); writer.commit(); parser = new FiqlParser<>(SearchBean.class); extractor = new TikaLuceneContentExtractor(new PDFParser()); }
Example #22
Source Project: lucene-solr Author: apache File: TestSynonymGraphFilter.java License: Apache License 2.0 | 5 votes |
public void testVanishingTermsWithFlatten() throws Exception { String testFile = "aaa => aaaa1 aaaa2 aaaa3\n" + "bbb => bbbb1 bbbb2\n"; Analyzer analyzer = solrSynsToAnalyzer(testFile); assertAnalyzesTo(analyzer, "xyzzy bbb pot of gold", new String[] { "xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold" }); // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold assertAnalyzesTo(analyzer, "xyzzy aaa pot of gold", new String[] { "xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold" }); analyzer.close(); }
Example #23
Source Project: lucene-solr Author: apache File: TestNorwegianMinimalStemFilter.java License: Apache License 2.0 | 5 votes |
public void testEmptyTerm() throws IOException { Analyzer a = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); return new TokenStreamComponents(tokenizer, new NorwegianMinimalStemFilter(tokenizer)); } }; checkOneTerm(a, "", ""); a.close(); }
Example #24
Source Project: lucene-solr Author: apache File: HTMLStripCharFilterTest.java License: Apache License 2.0 | 5 votes |
public void testRandomBrokenHTML() throws Exception { int maxNumElements = 10000; String text = TestUtil.randomHtmlishString(random(), maxNumElements); Analyzer a = newTestAnalyzer(); checkAnalysisConsistency(random(), a, random().nextBoolean(), text); a.close(); }
Example #25
Source Project: solr-researcher Author: sematext File: EdismaxQueryConverter.java License: Apache License 2.0 | 5 votes |
protected String[] analyze(String text, Analyzer analyzer) throws IOException { List<String> result = new ArrayList<String>(); TokenStream stream = analyzer.tokenStream("", new StringReader(text)); CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { result.add(new String(termAtt.buffer(), 0, termAtt.length())); } stream.end(); stream.close(); return result.toArray(new String[result.size()]); }
Example #26
Source Project: lucene-solr Author: apache File: TestBrazilianAnalyzer.java License: Apache License 2.0 | 5 votes |
public void testReusableTokenStream() throws Exception { Analyzer a = new BrazilianAnalyzer(); checkReuse(a, "boa", "boa"); checkReuse(a, "boainain", "boainain"); checkReuse(a, "boas", "boas"); checkReuse(a, "bôas", "boas"); // removes diacritic: different from snowball portugese a.close(); }
Example #27
Source Project: lucene-solr Author: apache File: TestSuggestField.java License: Apache License 2.0 | 5 votes |
@Test @Slow public void testDupSuggestFieldValues() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field")); final int num = Math.min(1000, atLeast(100)); int[] weights = new int[num]; for(int i = 0; i < num; i++) { Document document = new Document(); weights[i] = random().nextInt(Integer.MAX_VALUE); document.add(new SuggestField("suggest_field", "abc", weights[i])); iw.addDocument(document); if (usually()) { iw.commit(); } } DirectoryReader reader = iw.getReader(); Entry[] expectedEntries = new Entry[num]; Arrays.sort(weights); for (int i = 1; i <= num; i++) { expectedEntries[i - 1] = new Entry("abc", weights[num - i]); } SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc")); TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num, false); assertSuggestions(lookupDocs, expectedEntries); reader.close(); iw.close(); }
Example #28
Source Project: russianmorphology Author: AKuznetsov File: AnalyzersTest.java License: Apache License 2.0 | 5 votes |
@Test public void shouldGiveCorrectWordsForRussian() throws IOException { Analyzer morphlogyAnalyzer = new RussianAnalyzer(); String answerPath = "/russian/russian-analyzer-answer.txt"; String testPath = "/russian/russian-analyzer-data.txt"; testAnalayzer(morphlogyAnalyzer, answerPath, testPath); }
Example #29
Source Project: lucene-solr Author: apache File: SearchImpl.java License: Apache License 2.0 | 5 votes |
private Query parseByClassicParser(String expression, String defField, Analyzer analyzer, QueryParserConfig config) { QueryParser parser = new QueryParser(defField, analyzer); switch (config.getDefaultOperator()) { case OR: parser.setDefaultOperator(QueryParser.Operator.OR); break; case AND: parser.setDefaultOperator(QueryParser.Operator.AND); break; } parser.setSplitOnWhitespace(config.isSplitOnWhitespace()); parser.setAutoGenerateMultiTermSynonymsPhraseQuery(config.isAutoGenerateMultiTermSynonymsPhraseQuery()); parser.setAutoGeneratePhraseQueries(config.isAutoGeneratePhraseQueries()); parser.setEnablePositionIncrements(config.isEnablePositionIncrements()); parser.setAllowLeadingWildcard(config.isAllowLeadingWildcard()); parser.setDateResolution(config.getDateResolution()); parser.setFuzzyMinSim(config.getFuzzyMinSim()); parser.setFuzzyPrefixLength(config.getFuzzyPrefixLength()); parser.setLocale(config.getLocale()); parser.setTimeZone(config.getTimeZone()); parser.setPhraseSlop(config.getPhraseSlop()); try { return parser.parse(expression); } catch (ParseException e) { throw new LukeException(String.format(Locale.ENGLISH, "Failed to parse query expression: %s", expression), e); } }
Example #30
Source Project: elasticsearch-plugin-bundle Author: jprante File: SegmentationIcuTokenizerTests.java License: GNU Affero General Public License v3.0 | 5 votes |
public void testJapanese() throws Exception { Analyzer a = createAnalyzer(); assertAnalyzesTo(a, "仮名遣い カタカナ", new String[] { "仮", "名", "遣", "い", "カタカナ" }, new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" }); destroyAnalzyer(a); }