Java Code Examples for org.apache.lucene.analysis.CharArraySet#EMPTY_SET

The following examples show how to use org.apache.lucene.analysis.CharArraySet#EMPTY_SET . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Analysis.java    From crate with Apache License 2.0 6 votes vote down vote up
public static CharArraySet parseWords(Environment env, Settings settings, String name, CharArraySet defaultWords,
                                      Map<String, Set<?>> namedWords, boolean ignoreCase) {
    String value = settings.get(name);
    if (value != null) {
        if ("_none_".equals(value)) {
            return CharArraySet.EMPTY_SET;
        } else {
            return resolveNamedWords(settings.getAsList(name), namedWords, ignoreCase);
        }
    }
    List<String> pathLoadedWords = getWordList(env, settings, name);
    if (pathLoadedWords != null) {
        return resolveNamedWords(pathLoadedWords, namedWords, ignoreCase);
    }
    return defaultWords;
}
 
Example 2
Source File: TestDutchAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testExclusionTableViaCtor() throws IOException {
  CharArraySet set = new CharArraySet( 1, true);
  set.add("lichamelijk");
  DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set);
  assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
  a.close();

  a = new DutchAnalyzer( CharArraySet.EMPTY_SET, set);
  assertAnalyzesTo(a, "lichamelijk lichamelijke", new String[] { "lichamelijk", "licham" });
  a.close();
}
 
Example 3
Source File: StandardHtmlStripAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
StandardHtmlStripAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    analyzer = new StandardHtmlStripAnalyzer(stopWords);
    analyzer.setVersion(version);
}
 
Example 4
Source File: TestThaiAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testOffsets() throws Exception {
  Analyzer analyzer = new ThaiAnalyzer(CharArraySet.EMPTY_SET);
  assertAnalyzesTo(analyzer, "การที่ได้ต้องแสดงว่างานดี",
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี" },
      new int[] { 0, 3, 6, 9, 13, 17, 20, 23 },
      new int[] { 3, 6, 9, 13, 17, 20, 23, 25 });
  analyzer.close();
}
 
Example 5
Source File: StandardAnalyzerProvider.java    From crate with Apache License 2.0 5 votes vote down vote up
public StandardAnalyzerProvider(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name, settings);
    final CharArraySet defaultStopwords = CharArraySet.EMPTY_SET;
    CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords);
    int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH);
    standardAnalyzer = new StandardAnalyzer(stopWords);
    standardAnalyzer.setVersion(version);
    standardAnalyzer.setMaxTokenLength(maxTokenLength);
}
 
Example 6
Source File: TestThaiAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testReusableTokenStream() throws Exception {
  ThaiAnalyzer analyzer = new ThaiAnalyzer(CharArraySet.EMPTY_SET);
  assertAnalyzesTo(analyzer, "", new String[] {});
  
  assertAnalyzesTo(
      analyzer,
      "การที่ได้ต้องแสดงว่างานดี",
      new String[] { "การ", "ที่", "ได้", "ต้อง", "แสดง", "ว่า", "งาน", "ดี"});
  
  assertAnalyzesTo(
      analyzer,
      "บริษัทชื่อ XY&Z - คุยกับ [email protected]",
      new String[] { "บริษัท", "ชื่อ", "xy", "z", "คุย", "กับ", "xyz", "demo.com" });
  analyzer.close();
}
 
Example 7
Source File: CJKBigramFilterTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
@Before
public void up() {
    analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new IcuTokenizer(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY,
                    new DefaultIcuTokenizerConfig(false, true));
            TokenStream result = new CJKBigramFilter(source);
            return new TokenStreamComponents(source, new StopFilter(result, CharArraySet.EMPTY_SET));
        }
    };
}
 
Example 8
Source File: BulgarianAnalyzer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * Builds an analyzer with the given stop words.
 */
public BulgarianAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}
 
Example 9
Source File: TestDutchAnalyzer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/** 
 * check that the default stem overrides are used
 * even if you use a non-default ctor.
 */
public void testStemOverrides() throws IOException {
  DutchAnalyzer a = new DutchAnalyzer( CharArraySet.EMPTY_SET);
  checkOneTerm(a, "fiets", "fiets");
  a.close();
}
 
Example 10
Source File: UkrainianMorfologikAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public UkrainianMorfologikAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}
 
Example 11
Source File: IndonesianAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builds an analyzer with the given stop words
 * 
 * @param stopwords
 *          a stopword set
 */
public IndonesianAnalyzer(CharArraySet stopwords){
  this(stopwords, CharArraySet.EMPTY_SET);
}
 
Example 12
Source File: IrishAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public IrishAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}
 
Example 13
Source File: CatalanAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public CatalanAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}
 
Example 14
Source File: GermanAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builds an analyzer with the given stop words 
 * 
 * @param stopwords
 *          a stopword set
 */
public GermanAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}
 
Example 15
Source File: PolishAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public PolishAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}
 
Example 16
Source File: SmartChineseAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * <p>
 * Create a new SmartChineseAnalyzer, optionally using the default stopword list.
 * </p>
 * <p>
 * The included default stopword list is simply a list of punctuation.
 * If you do not use this list, punctuation will not be removed from the text!
 * </p>
 * 
 * @param useDefaultStopWords true to use the default stopword list.
 */
public SmartChineseAnalyzer(boolean useDefaultStopWords) {
  stopWords = useDefaultStopWords ? DefaultSetHolder.DEFAULT_STOP_SET
    : CharArraySet.EMPTY_SET;
}
 
Example 17
Source File: SoraniAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public SoraniAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}
 
Example 18
Source File: BlendedInfixSuggesterTest.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
public void testSuggesterCountForAllLookups() throws IOException {


    Input keys[] = new Input[]{
        new Input("lend me your ears", 1),
        new Input("as you sow so shall you reap", 1),
    };

    Path tempDir = createTempDir("BlendedInfixSuggesterTest");
    Analyzer a = new StandardAnalyzer(CharArraySet.EMPTY_SET);

    // BlenderType.LINEAR is used by default (remove position*10%)
    BlendedInfixSuggester suggester = new BlendedInfixSuggester(newFSDirectory(tempDir), a);
    suggester.build(new InputArrayIterator(keys));


    String term = "you";

    List<Lookup.LookupResult> responses = suggester.lookup(term, false, 1);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, false, 2);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, 1, false, false);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, 2, false, false);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, (Map<BytesRef, BooleanClause.Occur>) null, 1, false, false);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, (Map<BytesRef, BooleanClause.Occur>) null, 2, false, false);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, (Set<BytesRef>) null, 1, false, false);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, (Set<BytesRef>) null, 2, false, false);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, null, false, 1);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, null, false, 2);
    assertEquals(2, responses.size());


    responses = suggester.lookup(term, (BooleanQuery) null, 1, false, false);
    assertEquals(1, responses.size());

    responses = suggester.lookup(term, (BooleanQuery) null, 2, false, false);
    assertEquals(2, responses.size());


    suggester.close();

  }
 
Example 19
Source File: GalicianAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builds an analyzer with the given stop words.
 * 
 * @param stopwords a stopword set
 */
public GalicianAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}
 
Example 20
Source File: BengaliAnalyzer.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Builds an analyzer with the given stop words 
 * 
 * @param stopwords a stopword set
 */
public BengaliAnalyzer(CharArraySet stopwords) {
  this(stopwords, CharArraySet.EMPTY_SET);
}