org.apache.lucene.analysis.Analyzer Java Examples

The following examples show how to use org.apache.lucene.analysis.Analyzer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: TestDirectSpellChecker.java From lucene-solr with Apache License 2.0

7 votes

public void testTransposition2() throws Exception {
  DirectSpellChecker spellChecker = new DirectSpellChecker();
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, analyzer);

  for (int i = 0; i < 20; i++) {
    Document doc = new Document();
    doc.add(newTextField("numbers", English.intToEnglish(i), Field.Store.NO));
    writer.addDocument(doc);
  }

  IndexReader ir = writer.getReader();

  SuggestWord[] similar = spellChecker.suggestSimilar(new Term(
      "numbers", "seevntene"), 2, ir,
      SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX);
  assertEquals(1, similar.length);
  assertEquals("seventeen", similar[0].string);
  
  IOUtils.close(ir, writer, dir, analyzer);
}

Example #2

Source File: TestQPHelper.java From lucene-solr with Apache License 2.0

7 votes

public void testBoost() throws Exception {
  CharacterRunAutomaton stopSet = new CharacterRunAutomaton(Automata.makeString("on"));
  Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet);
  StandardQueryParser qp = new StandardQueryParser();
  qp.setAnalyzer(oneStopAnalyzer);

  Query q = qp.parse("on^1.0", "field");
  assertNotNull(q);
  q = qp.parse("\"hello\"^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("hello^2.0", "field");
  assertNotNull(q);
  assertEquals(((BoostQuery) q).getBoost(), (float) 2.0, (float) 0.5);
  q = qp.parse("\"on\"^1.0", "field");
  assertNotNull(q);

  StandardQueryParser qp2 = new StandardQueryParser();
  qp2.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET));

  q = qp2.parse("the^3", "field");
  // "the" is a stop word so the result is an empty query:
  assertNotNull(q);
  assertMatchNoDocsQuery(q);
  assertFalse(q instanceof BoostQuery);
}

Example #3

Source File: MinHashTest.java From minhash with Apache License 2.0

6 votes

public void test_calculate_1bit_256funcs_seed0() throws IOException {

        final int hashBit = 1;
        final int seed = 0;
        final int num = 256;
        final Analyzer minhashAnalyzer = MinHash.createAnalyzer(hashBit,
                seed, num);
        final StringBuilder[] texts = createTexts();
        final byte[][] data = createMinHashes(minhashAnalyzer, texts);

        assertEquals(1.0f, MinHash.compare(data[0], data[0]));
        assertEquals(0.90625f, MinHash.compare(data[0], data[1]));
        assertEquals(0.82421875f, MinHash.compare(data[0], data[2]));
        assertEquals(0.76953125f, MinHash.compare(data[0], data[3]));
        assertEquals(0.703125f, MinHash.compare(data[0], data[4]));
        assertEquals(0.625f, MinHash.compare(data[0], data[5]));
        assertEquals(0.6015625f, MinHash.compare(data[0], data[6]));
        assertEquals(0.55078125f, MinHash.compare(data[0], data[7]));
        assertEquals(0.53125f, MinHash.compare(data[0], data[8]));
        assertEquals(0.51171875f, MinHash.compare(data[0], data[9]));
    }

Example #4

Source File: UnstemmedGermanNormalizationTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

6 votes

public void testTwo() throws Exception {
    String source = "So wird's was: das Elasticsearch-Buch erscheint beim O'Reilly-Verlag.";
    String[] expected = {
            "wird's",
            "elasticsearch-buch",
            "elasticsearchbuch",
            "erscheint",
            "o'reilly-verlag",
            "o'reillyverlag"
    };
    String resource = "unstemmed.json";
    Settings settings = Settings.builder()
            .loadFromStream(resource, getClass().getResourceAsStream(resource), true)
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY), new CommonAnalysisPlugin());
    Analyzer analyzer = analysis.indexAnalyzers.get("default");
    assertTokenStreamContents(analyzer.tokenStream(null, new StringReader(source)), expected);
}

Example #5

Source File: TestPhraseQuery.java From lucene-solr with Apache License 2.0

6 votes

public void testPhraseQueryWithStopAnalyzer() throws Exception {
  Directory directory = newDirectory();
  Analyzer stopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, 
      newIndexWriterConfig(stopAnalyzer));
  Document doc = new Document();
  doc.add(newTextField("field", "the stop words are here", Field.Store.YES));
  writer.addDocument(doc);
  IndexReader reader = writer.getReader();
  writer.close();

  IndexSearcher searcher = newSearcher(reader);

  // valid exact phrase query
  PhraseQuery query = new PhraseQuery("field", "stop", "words");
  ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
  assertEquals(1, hits.length);
  QueryUtils.check(random(), query,searcher);

  reader.close();
  directory.close();
}

Example #6

Source File: LanguagePrefixedTokenStream.java From SearchServices with GNU Lesser General Public License v3.0

6 votes

/**
 * Returns the {@link Analyzer} associated with the given language.
 * The proper {@link Analyzer} is retrieved from the first field type not null in the following list:
 *
 * <ul>
 *     <li>highlighted_text_ + locale (e.g. highlighted_text_en)</li>
 *     <li>text_ + locale (e.g. text_en)</li>
 *     <li>text___ (text general field)</li>
 * </ul>
 *
 * @param language the language code.
 * @return the {@link Analyzer} associated with the given language.
 */
Analyzer analyzer(String language) {
    FieldType localisedFieldType =
            ofNullable(indexSchema.getFieldTypeByName(highlightingFieldTypeName(language)))
                    .orElseGet(() -> indexSchema.getFieldTypeByName(localisedFieldTypeName(language)));

    FieldType targetFieldType =
            ofNullable(localisedFieldType)
                    .orElseGet(() ->  indexSchema.getFieldTypeByName(FALLBACK_TEXT_FIELD_TYPE_NAME));
    switch (mode)
    {
        case QUERY:
            return targetFieldType.getQueryAnalyzer();
        case INDEX:
        default:
            return targetFieldType.getIndexAnalyzer();
    }
}

Example #7

Source File: LuceneDocumentRetrievalExecutor.java From bioasq with Apache License 2.0

6 votes

@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
  super.initialize(context);
  hits = UimaContextHelper.getConfigParameterIntValue(context, "hits", 100);
  // query constructor
  constructor = UimaContextHelper.createObjectFromConfigParameter(context,
          "query-string-constructor", "query-string-constructor-params",
          BooleanBagOfPhraseQueryStringConstructor.class, QueryStringConstructor.class);
  // lucene
  Analyzer analyzer = UimaContextHelper.createObjectFromConfigParameter(context, "query-analyzer",
          "query-analyzer-params", StandardAnalyzer.class, Analyzer.class);
  String[] fields = UimaContextHelper.getConfigParameterStringArrayValue(context, "fields");
  parser = new MultiFieldQueryParser(fields, analyzer);
  String index = UimaContextHelper.getConfigParameterStringValue(context, "index");
  try {
    reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
  } catch (IOException e) {
    throw new ResourceInitializationException(e);
  }
  searcher = new IndexSearcher(reader);
  idFieldName = UimaContextHelper.getConfigParameterStringValue(context, "id-field", null);
  titleFieldName = UimaContextHelper.getConfigParameterStringValue(context, "title-field", null);
  textFieldName = UimaContextHelper.getConfigParameterStringValue(context, "text-field", null);
  uriPrefix = UimaContextHelper.getConfigParameterStringValue(context, "uri-prefix", null);
}

Example #8

Source File: right_IndexWriter_1.42.java From gumtree-spoon-ast-diff with Apache License 2.0

6 votes

private IndexWriter(Directory d, Analyzer a, final boolean create, boolean closeDir)
  throws IOException {
    this.closeDir = closeDir;
    directory = d;
    analyzer = a;

    Lock writeLock = directory.makeLock(IndexWriter.WRITE_LOCK_NAME);
    if (!writeLock.obtain(WRITE_LOCK_TIMEOUT)) // obtain write lock
      throw new IOException("Index locked for write: " + writeLock);
    this.writeLock = writeLock;                   // save it

    synchronized (directory) {        // in- & inter-process sync
      new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME), COMMIT_LOCK_TIMEOUT) {
          public Object doBody() throws IOException {
            if (create)
              segmentInfos.write(directory);
            else
              segmentInfos.read(directory);
            return null;
          }
        }.run();
    }
}

Example #9

Source File: IcuAnalysisTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

6 votes

public void testDefaultsIcuAnalysis() throws IOException {

        TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY,
                new BundlePlugin(Settings.EMPTY));

        CharFilterFactory charFilterFactory = analysis.charFilter.get("icu_normalizer");
        assertThat(charFilterFactory, instanceOf(IcuNormalizerCharFilterFactory.class));

        TokenizerFactory tf = analysis.tokenizer.get("icu_tokenizer");
        assertThat(tf, instanceOf(IcuTokenizerFactory.class));

        TokenFilterFactory filterFactory = analysis.tokenFilter.get("icu_normalizer");
        assertThat(filterFactory, instanceOf(IcuNormalizerTokenFilterFactory.class));

        filterFactory = analysis.tokenFilter.get("icu_folding");
        assertThat(filterFactory, instanceOf(IcuFoldingTokenFilterFactory.class));

        filterFactory = analysis.tokenFilter.get("icu_transform");
        assertThat(filterFactory, instanceOf(IcuTransformTokenFilterFactory.class));

        Analyzer analyzer = analysis.indexAnalyzers.get( "icu_collation");
        assertThat(analyzer, instanceOf(NamedAnalyzer.class));
    }

Example #10

Source File: TestDocValuesIndexing.java From lucene-solr with Apache License 2.0

6 votes

public void testAddSortedTwice() throws IOException {
  Analyzer analyzer = new MockAnalyzer(random());

  Directory directory = newDirectory();
  // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  IndexWriter iwriter = new IndexWriter(directory, iwc);
  Document doc = new Document();
  doc.add(new SortedDocValuesField("dv", new BytesRef("foo!")));
  iwriter.addDocument(doc);
  
  doc.add(new SortedDocValuesField("dv", new BytesRef("bar!")));
  expectThrows(IllegalArgumentException.class, () -> {
    iwriter.addDocument(doc);
  });

  IndexReader ir = iwriter.getReader();
  assertEquals(1, ir.numDocs());
  ir.close();
  iwriter.close();
  directory.close();
}

Example #11

Source File: PayloadUtils.java From lucene-solr with Apache License 2.0

6 votes

public static String getPayloadEncoder(FieldType fieldType) {
  // TODO: support custom payload encoding fields too somehow - maybe someone has a custom component that encodes payloads as floats
  String encoder = null;
  Analyzer a = fieldType.getIndexAnalyzer();
  if (a instanceof TokenizerChain) {
    // examine the indexing analysis chain for DelimitedPayloadTokenFilterFactory or NumericPayloadTokenFilterFactory
    TokenizerChain tc = (TokenizerChain)a;
    TokenFilterFactory[] factories = tc.getTokenFilterFactories();
    for (TokenFilterFactory factory : factories) {
      if (factory instanceof DelimitedPayloadTokenFilterFactory) {
        encoder = factory.getOriginalArgs().get(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR);
        break;
      }

      if (factory instanceof NumericPayloadTokenFilterFactory) {
        // encodes using `PayloadHelper.encodeFloat(payload)`
        encoder = "float";
        break;
      }
    }
  }

  return encoder;
}

Example #12

Source File: WildcardQueryNodeProcessor.java From lucene-solr with Apache License 2.0

5 votes

@Override
protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException {

  // the old Lucene Parser ignores FuzzyQueryNode that are also PrefixWildcardQueryNode or WildcardQueryNode
  // we do the same here, also ignore empty terms
  if (node instanceof FieldQueryNode || node instanceof FuzzyQueryNode) {      
    FieldQueryNode fqn = (FieldQueryNode) node;      
    CharSequence text = fqn.getText(); 
    
    // do not process wildcards for TermRangeQueryNode children and 
    // QuotedFieldQueryNode to reproduce the old parser behavior
    if (fqn.getParent() instanceof TermRangeQueryNode 
        || fqn instanceof QuotedFieldQueryNode 
        || text.length() <= 0){
      // Ignore empty terms
      return node;
    }
    
    // Code below simulates the old lucene parser behavior for wildcards
    
    
    if (isWildcard(text)) {
      Analyzer analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER);
      if (analyzer != null) {
        text = analyzeWildcard(analyzer, fqn.getFieldAsString(), text.toString());
      }
      if (isPrefixWildcard(text)) {
        return new PrefixWildcardQueryNode(fqn.getField(), text, fqn.getBegin(), fqn.getEnd());
      } else {
        return new WildcardQueryNode(fqn.getField(), text, fqn.getBegin(), fqn.getEnd());
      }
    }

  }

  return node;

}

Example #13

Source File: TestTeeSinkTokenFilter.java From lucene-solr with Apache License 2.0

5 votes

public void testEndOffsetPositionWithTeeSinkTokenFilter() throws Exception {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(analyzer));
  Document doc = new Document();
  TokenStream tokenStream = analyzer.tokenStream("field", "abcd   ");
  TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
  TokenStream sink = tee.newSinkTokenStream();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorOffsets(true);
  ft.setStoreTermVectorPositions(true);
  Field f1 = new Field("field", tee, ft);
  Field f2 = new Field("field", sink, ft);
  doc.add(f1);
  doc.add(f2);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  Terms vector = r.getTermVectors(0).terms("field");
  assertEquals(1, vector.size());
  TermsEnum termsEnum = vector.iterator();
  termsEnum.next();
  assertEquals(2, termsEnum.totalTermFreq());
  PostingsEnum positions = termsEnum.postings(null, PostingsEnum.ALL);
  assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(2, positions.freq());
  positions.nextPosition();
  assertEquals(0, positions.startOffset());
  assertEquals(4, positions.endOffset());
  positions.nextPosition();
  assertEquals(8, positions.startOffset());
  assertEquals(12, positions.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, positions.nextDoc());
  r.close();
  dir.close();
  analyzer.close();
}

Example #14

Source File: TestSynonymGraphFilter.java From lucene-solr with Apache License 2.0

5 votes

public void testBasicNotKeepOrigOneOutput() throws Exception {
  SynonymMap.Builder b = new SynonymMap.Builder();
  add(b, "a b", "x", false);

  Analyzer a = getAnalyzer(b, true);
  assertAnalyzesTo(a,
                   "c a b",
                   new String[] {"c", "x"},
                   new int[] {0, 2},
                   new int[] {1, 5},
                   new String[] {"word", "SYNONYM"},
                   new int[] {1, 1},
                   new int[] {1, 1});
  a.close();
}

Example #15

Source File: TestPorterStemFilter.java From lucene-solr with Apache License 2.0

5 votes

public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new PorterStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}

Example #16

Source File: TestNorwegianMinimalStemFilter.java From lucene-solr with Apache License 2.0

5 votes

public void testEmptyTerm() throws IOException {
  Analyzer a = new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      Tokenizer tokenizer = new KeywordTokenizer();
      return new TokenStreamComponents(tokenizer, new NorwegianMinimalStemFilter(tokenizer));
    }
  };
  checkOneTerm(a, "", "");
  a.close();
}

Example #17

Source File: HTMLStripCharFilterTest.java From lucene-solr with Apache License 2.0

5 votes

public void testRandomBrokenHTML() throws Exception {
  int maxNumElements = 10000;
  String text = TestUtil.randomHtmlishString(random(), maxNumElements);
  Analyzer a = newTestAnalyzer();
  checkAnalysisConsistency(random(), a, random().nextBoolean(), text);
  a.close();
}

Example #18

Source File: EdismaxQueryConverter.java From solr-researcher with Apache License 2.0

5 votes

protected String[] analyze(String text, Analyzer analyzer) throws IOException {
  List<String> result = new ArrayList<String>();
  TokenStream stream = analyzer.tokenStream("", new StringReader(text));
  CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
  stream.reset();
  while (stream.incrementToken()) {
    result.add(new String(termAtt.buffer(), 0, termAtt.length()));
  }
  stream.end();
  stream.close();

  return result.toArray(new String[result.size()]);
}

Example #19

Source File: SynonymTokenFilterFactory.java From crate with Apache License 2.0

5 votes

protected Analyzer buildSynonymAnalyzer(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters,
                                        List<TokenFilterFactory> tokenFilters) {
    return new CustomAnalyzer("synonyms", tokenizer, charFilters.toArray(new CharFilterFactory[0]),
        tokenFilters.stream()
            .map(TokenFilterFactory::getSynonymFilter)
            .toArray(TokenFilterFactory[]::new));
}

Example #20

Source File: TestBrazilianAnalyzer.java From lucene-solr with Apache License 2.0

5 votes

public void testReusableTokenStream() throws Exception {
  Analyzer a = new BrazilianAnalyzer();
  checkReuse(a, "boa", "boa");
  checkReuse(a, "boainain", "boainain");
  checkReuse(a, "boas", "boas");
  checkReuse(a, "bôas", "boas"); // removes diacritic: different from snowball portugese
  a.close();
}

Example #21

Source File: DocumentDictionaryTest.java From lucene-solr with Apache License 2.0

5 votes

@Test
public void testMultiValuedField() throws IOException {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  List<Suggestion> suggestions = indexMultiValuedDocuments(atLeast(1000), writer);
  writer.commit();
  writer.close();

  IndexReader ir = DirectoryReader.open(dir);
  Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
  InputIterator inputIterator = dictionary.getEntryIterator();
  BytesRef f;
  Iterator<Suggestion> suggestionsIter = suggestions.iterator();
  while((f = inputIterator.next())!=null) {
    Suggestion nextSuggestion = suggestionsIter.next();
    assertTrue(f.equals(nextSuggestion.term));
    long weight = nextSuggestion.weight;
    assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0);
    assertEquals(inputIterator.payload(), nextSuggestion.payload);
    assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts));
  }
  assertFalse(suggestionsIter.hasNext());
  IOUtils.close(ir, analyzer, dir);
}

Example #22

Source File: TestSuggestField.java From lucene-solr with Apache License 2.0

5 votes

@Test @Slow
public void testDupSuggestFieldValues() throws Exception {
  Analyzer analyzer = new MockAnalyzer(random());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
  final int num = Math.min(1000, atLeast(100));
  int[] weights = new int[num];
  for(int i = 0; i < num; i++) {
    Document document = new Document();
    weights[i] = random().nextInt(Integer.MAX_VALUE);
    document.add(new SuggestField("suggest_field", "abc", weights[i]));
    iw.addDocument(document);

    if (usually()) {
      iw.commit();
    }
  }

  DirectoryReader reader = iw.getReader();
  Entry[] expectedEntries = new Entry[num];
  Arrays.sort(weights);
  for (int i = 1; i <= num; i++) {
    expectedEntries[i - 1] = new Entry("abc", weights[num - i]);
  }

  SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
  PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc"));
  TopSuggestDocs lookupDocs = suggestIndexSearcher.suggest(query, num, false);
  assertSuggestions(lookupDocs, expectedEntries);

  reader.close();
  iw.close();
}

Example #23

Source File: AnalyzersTest.java From russianmorphology with Apache License 2.0

5 votes

@Test
public void shouldGiveCorrectWordsForRussian() throws IOException {
    Analyzer morphlogyAnalyzer = new RussianAnalyzer();
    String answerPath = "/russian/russian-analyzer-answer.txt";
    String testPath = "/russian/russian-analyzer-data.txt";

    testAnalayzer(morphlogyAnalyzer, answerPath, testPath);
}

Example #24

Source File: SearchImpl.java From lucene-solr with Apache License 2.0

5 votes

private Query parseByClassicParser(String expression, String defField, Analyzer analyzer,
                                   QueryParserConfig config) {
  QueryParser parser = new QueryParser(defField, analyzer);

  switch (config.getDefaultOperator()) {
    case OR:
      parser.setDefaultOperator(QueryParser.Operator.OR);
      break;
    case AND:
      parser.setDefaultOperator(QueryParser.Operator.AND);
      break;
  }

  parser.setSplitOnWhitespace(config.isSplitOnWhitespace());
  parser.setAutoGenerateMultiTermSynonymsPhraseQuery(config.isAutoGenerateMultiTermSynonymsPhraseQuery());
  parser.setAutoGeneratePhraseQueries(config.isAutoGeneratePhraseQueries());
  parser.setEnablePositionIncrements(config.isEnablePositionIncrements());
  parser.setAllowLeadingWildcard(config.isAllowLeadingWildcard());
  parser.setDateResolution(config.getDateResolution());
  parser.setFuzzyMinSim(config.getFuzzyMinSim());
  parser.setFuzzyPrefixLength(config.getFuzzyPrefixLength());
  parser.setLocale(config.getLocale());
  parser.setTimeZone(config.getTimeZone());
  parser.setPhraseSlop(config.getPhraseSlop());

  try {
    return parser.parse(expression);
  } catch (ParseException e) {
    throw new LukeException(String.format(Locale.ENGLISH, "Failed to parse query expression: %s", expression), e);
  }

}

Example #25

Source File: AnalyzingSuggesterTest.java From lucene-solr with Apache License 2.0

5 votes

public void testQueueExhaustion() throws Exception {
  Analyzer a = new MockAnalyzer(random());
  Directory tempDir = getDirectory();
  AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.EXACT_FIRST, 256, -1, true);

  suggester.build(new InputArrayIterator(new Input[] {
        new Input("a", 2),
        new Input("a b c", 3),
        new Input("a c a", 1),
        new Input("a c b", 1),
      }));

  suggester.lookup("a", false, 4);
  IOUtils.close(a, tempDir);
}

Example #26

Source File: TestArmenianAnalyzer.java From lucene-solr with Apache License 2.0

5 votes

/** test use of exclusion set */
public void testExclude() throws IOException {
  CharArraySet exclusionSet = new CharArraySet( asSet("արծիվներ"), false);
  Analyzer a = new ArmenianAnalyzer( 
      ArmenianAnalyzer.getDefaultStopSet(), exclusionSet);
  checkOneTerm(a, "արծիվներ", "արծիվներ");
  checkOneTerm(a, "արծիվ", "արծ");
  a.close();
}

Example #27

Source File: SegmentationIcuTokenizerTests.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

5 votes

public void testJapanese() throws Exception {
    Analyzer a = createAnalyzer();
    assertAnalyzesTo(a, "仮名遣い カタカナ",
            new String[] { "仮", "名", "遣", "い", "カタカナ" },
            new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
    destroyAnalzyer(a);
}

Example #28

Source File: TestCatalanAnalyzer.java From lucene-solr with Apache License 2.0

5 votes

/** test use of exclusion set */
public void testExclude() throws IOException {
  CharArraySet exclusionSet = new CharArraySet(asSet("llengües"), false);
  Analyzer a = new CatalanAnalyzer(CatalanAnalyzer.getDefaultStopSet(), exclusionSet);
  checkOneTerm(a, "llengües", "llengües");
  checkOneTerm(a, "llengua", "llengu");
  a.close();
}

Example #29

Source File: StandardIndexManager.java From nifi with Apache License 2.0

5 votes

private IndexWriterCount createWriter(final File indexDirectory) throws IOException {
    final List<Closeable> closeables = new ArrayList<>();
    final Directory directory = FSDirectory.open(indexDirectory.toPath());
    closeables.add(directory);

    try {
        final Analyzer analyzer = new StandardAnalyzer();
        closeables.add(analyzer);

        final IndexWriterConfig config = new IndexWriterConfig(analyzer);

        final ConcurrentMergeScheduler mergeScheduler = new ConcurrentMergeScheduler();
        final int mergeThreads = repoConfig.getConcurrentMergeThreads();
        mergeScheduler.setMaxMergesAndThreads(mergeThreads, mergeThreads);
        config.setMergeScheduler(mergeScheduler);

        final IndexWriter indexWriter = new IndexWriter(directory, config);
        final EventIndexWriter eventIndexWriter = new LuceneEventIndexWriter(indexWriter, indexDirectory);

        final IndexWriterCount writerCount = new IndexWriterCount(eventIndexWriter, analyzer, directory, 1, false);
        logger.debug("Providing new index writer for {}", indexDirectory);
        return writerCount;
    } catch (final IOException ioe) {
        for (final Closeable closeable : closeables) {
            try {
                closeable.close();
            } catch (final IOException ioe2) {
                ioe.addSuppressed(ioe2);
            }
        }

        throw ioe;
    }
}

Example #30

Source File: MapperQueryParser.java From Elasticsearch with Apache License 2.0

5 votes

private Query getPrefixQuerySingle(String field, String termStr) throws ParseException {
    currentFieldType = null;
    Analyzer oldAnalyzer = getAnalyzer();
    try {
        currentFieldType = parseContext.fieldMapper(field);
        if (currentFieldType != null) {
            if (!forcedAnalyzer) {
                setAnalyzer(parseContext.getSearchAnalyzer(currentFieldType));
            }
            Query query = null;
            if (currentFieldType.useTermQueryWithQueryString()) {
                query = currentFieldType.prefixQuery(termStr, multiTermRewriteMethod, parseContext);
            }
            if (query == null) {
                query = getPossiblyAnalyzedPrefixQuery(currentFieldType.names().indexName(), termStr);
            }
            return query;
        }
        return getPossiblyAnalyzedPrefixQuery(field, termStr);
    } catch (RuntimeException e) {
        if (settings.lenient()) {
            return null;
        }
        throw e;
    } finally {
        setAnalyzer(oldAnalyzer);
    }
}