Java Code Examples for org.apache.lucene.analysis.charfilter.NormalizeCharMap

The following examples show how to use org.apache.lucene.analysis.charfilter.NormalizeCharMap. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: TestSimplePatternTokenizer.java    License: Apache License 2.0 6 votes vote down vote up
public void testOffsetCorrection() throws Exception {
  final String INPUT = "Günther Günther is here";

  // create MappingCharFilter
  List<String> mappingRules = new ArrayList<>();
  mappingRules.add( "\"&uuml;\" => \"ü\"" );
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  builder.add("&uuml;", "ü");
  NormalizeCharMap normMap = builder.build();
  CharFilter charStream = new MappingCharFilter( normMap, new StringReader(INPUT));

  // create SimplePatternTokenizer
  Tokenizer stream = new SimplePatternTokenizer("Günther");
  stream.setReader(charStream);
  assertTokenStreamContents(stream,
      new String[] { "Günther", "Günther" },
      new int[] { 0, 13 },
      new int[] { 12, 25 },
      INPUT.length());
}
 
Example 2
public void testOffsetCorrection() throws Exception {
  final String INPUT = "G&uuml;nther G&uuml;nther is here";

  // create MappingCharFilter
  List<String> mappingRules = new ArrayList<>();
  mappingRules.add( "\"&uuml;\" => \"ü\"" );
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  builder.add("&uuml;", "ü");
  NormalizeCharMap normMap = builder.build();
  CharFilter charStream = new MappingCharFilter( normMap, new StringReader(INPUT));

  // create SimplePatternSplitTokenizer
  Tokenizer stream = new SimplePatternSplitTokenizer("Günther");
  stream.setReader(charStream);
  assertTokenStreamContents(stream,
      new String[] { " ", " is here" },
      new int[] { 12, 25 },
      new int[] { 13, 33 },
      INPUT.length());
}
 
Example 3
Source Project: Elasticsearch   Source File: MappingCharFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
@Inject
public MappingCharFilterFactory(Index index, IndexSettingsService indexSettingsService, Environment env, @Assisted String name, @Assisted Settings settings) {
    super(index, indexSettingsService.getSettings(), name);

    List<String> rules = Analysis.getWordList(env, settings, "mappings");
    if (rules == null) {
        throw new IllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
    }

    NormalizeCharMap.Builder normMapBuilder = new NormalizeCharMap.Builder();
    parseRules(rules, normMapBuilder);
    normMap = normMapBuilder.build();
}
 
Example 4
Source Project: Elasticsearch   Source File: MappingCharFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * parses a list of MappingCharFilter style rules into a normalize char map
 */
private void parseRules(List<String> rules, NormalizeCharMap.Builder map) {
    for (String rule : rules) {
        Matcher m = rulePattern.matcher(rule);
        if (!m.find())
            throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
        String lhs = parseString(m.group(1).trim());
        String rhs = parseString(m.group(2).trim());
        if (lhs == null || rhs == null)
            throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal mapping.");
        map.add(lhs, rhs);
    }
}
 
Example 5
Source Project: lucene-solr   Source File: TestPathHierarchyTokenizer.java    License: Apache License 2.0 5 votes vote down vote up
public void testNormalizeWinDelimToLinuxDelim() throws Exception {
  NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
  builder.add("\\", "/");
  NormalizeCharMap normMap = builder.build();
  String path = "c:\\a\\b\\c";
  Reader cs = new MappingCharFilter(normMap, new StringReader(path));
  PathHierarchyTokenizer t = new PathHierarchyTokenizer(newAttributeFactory(), DEFAULT_DELIMITER, DEFAULT_DELIMITER, DEFAULT_SKIP);
  t.setReader(cs);
  assertTokenStreamContents(t,
      new String[]{"c:", "c:/a", "c:/a/b", "c:/a/b/c"},
      new int[]{0, 0, 0, 0},
      new int[]{2, 4, 6, 8},
      new int[]{1, 0, 0, 0},
      path.length());
}
 
Example 6
Source Project: crate   Source File: MappingCharFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
MappingCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
    super(indexSettings, name);

    List<String> rules = Analysis.getWordList(env, settings, "mappings");
    if (rules == null) {
        throw new IllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured");
    }

    NormalizeCharMap.Builder normMapBuilder = new NormalizeCharMap.Builder();
    parseRules(rules, normMapBuilder);
    normMap = normMapBuilder.build();
}
 
Example 7
Source Project: crate   Source File: MappingCharFilterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * parses a list of MappingCharFilter style rules into a normalize char map
 */
private void parseRules(List<String> rules, NormalizeCharMap.Builder map) {
    for (String rule : rules) {
        Matcher m = rulePattern.matcher(rule);
        if (!m.find())
            throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]");
        String lhs = parseString(m.group(1).trim());
        String rhs = parseString(m.group(2).trim());
        if (lhs == null || rhs == null)
            throw new RuntimeException("Invalid Mapping Rule : [" + rule + "]. Illegal mapping.");
        map.add(lhs, rhs);
    }
}