Java Code Examples for org.apache.lucene.analysis.standard.StandardTokenizerFactory

The following examples show how to use org.apache.lucene.analysis.standard.StandardTokenizerFactory. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Quelea   Source File: SongSearchIndex.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * Create a new empty search index.
 */
public SongSearchIndex() {
    songs = new HashMap<>();
    try {
        analyzer = CustomAnalyzer.builder()
                .withTokenizer(StandardTokenizerFactory.class)
                .addTokenFilter(LowerCaseFilterFactory.class)
                .addTokenFilter(ASCIIFoldingFilterFactory.class)
                .build();
        index = new MMapDirectory(Files.createTempDirectory("quelea-mmap-song").toAbsolutePath());
    }
    catch(IOException ex) {
        LOGGER.log(Level.SEVERE, "Couldn't create song search index");
        throw new RuntimeException("Couldn't create song search index", ex);
    }
}
 
Example 2
Source Project: ambari-logsearch   Source File: SolrUtil.java    License: Apache License 2.0 6 votes vote down vote up
public static String putWildCardByType(String str, String fieldType, String fieldTypeMetaData) {
  Map<String, Object> fieldTypeInfoMap = getFieldTypeInfoMap(fieldTypeMetaData);
  if (StringUtils.isNotBlank(fieldType)) {
    if (isSolrFieldNumber(fieldTypeInfoMap)) {
      String value = putEscapeCharacterForNumber(str, fieldTypeInfoMap);
      if (StringUtils.isNotBlank(value)) {
        return value;
      } else {
        return null;
      }
    } else if (checkTokenizer(StandardTokenizerFactory.class, fieldTypeInfoMap)) {
      return escapeForStandardTokenizer(str);
    } else if (checkTokenizer(KeywordTokenizerFactory.class, fieldTypeInfoMap) || "string".equalsIgnoreCase(fieldType)) {
      return makeSolrSearchStringWithoutAsterisk(str);
    } else if (checkTokenizer(PathHierarchyTokenizerFactory.class, fieldTypeInfoMap)) {
      return str;
    } else {
      return escapeQueryChars(str);
    }
  }
  return str;
}
 
Example 3
private Builder createDefaultAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example 4
private Builder createArtistAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example 5
Source Project: Quelea   Source File: BibleSearchIndex.java    License: GNU General Public License v3.0 5 votes vote down vote up
/**
 * Create a new empty search index.
 */
public BibleSearchIndex() {
    chapters = new HashMap<>();
    try {
        analyzer = CustomAnalyzer.builder()
                .withTokenizer(StandardTokenizerFactory.class)
                .addTokenFilter(LowerCaseFilterFactory.class)
                .addTokenFilter(ASCIIFoldingFilterFactory.class)
                .build();
        index = new MMapDirectory(Files.createTempDirectory("quelea-mmap-bible").toAbsolutePath());
    } catch (IOException ex) {
        LOGGER.log(Level.SEVERE, "Couldn't create song search index");
        throw new RuntimeException("Couldn't create song search index", ex);
    }
}
 
Example 6
Source Project: airsonic   Source File: AnalyzerFactory.java    License: GNU General Public License v3.0 5 votes vote down vote up
private Builder createDefaultAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example 7
Source Project: airsonic   Source File: AnalyzerFactory.java    License: GNU General Public License v3.0 5 votes vote down vote up
private Builder createArtistAnalyzerBuilder() throws IOException {
    Builder builder = CustomAnalyzer.builder()
            .withTokenizer(StandardTokenizerFactory.class)
            .addTokenFilter(CJKWidthFilterFactory.class)
            .addTokenFilter(ASCIIFoldingFilterFactory.class, "preserveOriginal", "false")
            .addTokenFilter(LowerCaseFilterFactory.class)
            .addTokenFilter(StopFilterFactory.class, "words", STOP_WORDS_ARTIST)
            .addTokenFilter(EnglishPossessiveFilterFactory.class);
    addTokenFilterForUnderscoreRemovalAroundToken(builder);
    return builder;
}
 
Example 8
Source Project: lucene-solr   Source File: TestCustomAnalyzer.java    License: Apache License 2.0 5 votes vote down vote up
public void testSetTokenizerTwice() throws Exception {
  expectThrows(AlreadySetException.class, () -> {
    CustomAnalyzer.builder()
        .withTokenizer("whitespace")
        .withTokenizer(StandardTokenizerFactory.class)
        .build();
  });
}
 
Example 9
Source Project: cia   Source File: DataSearchAnalysisConfigurer.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void configure(LuceneAnalysisConfigurationContext context) {
	
	context.analyzer("ngram").custom().tokenizer(StandardTokenizerFactory.class)
	.tokenFilter(LowerCaseFilterFactory.class).tokenFilter(NGramFilterFactory.class)
	.param("minGramSize", "3").param("maxGramSize", "3");
	
	context.analyzer("se").custom()
	.tokenizer(StandardTokenizerFactory.class).tokenFilter(LowerCaseFilterFactory.class)
	.tokenFilter(SwedishLightStemFilterFactory.class);
	
	context.analyzer("en").custom()
	.tokenizer(StandardTokenizerFactory.class).tokenFilter(LowerCaseFilterFactory.class)
	.tokenFilter(PorterStemFilterFactory.class);		
}