Java Code Examples for com.ibm.icu.text.RuleBasedCollator#getRules()

The following examples show how to use com.ibm.icu.text.RuleBasedCollator#getRules() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestICUCollationField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
 * These are largish files, and jvm-specific (as our documentation says, you should always
 * look out for jvm differences with collation).
 * So it's preferable to create this file on-the-fly.
 */
public static String setupSolrHome() throws Exception {
  String tmpFile = createTempDir().toFile().getAbsolutePath();
  // make data and conf dirs
  new File(tmpFile  + "/collection1", "data").mkdirs();
  File confDir = new File(tmpFile + "/collection1", "conf");
  confDir.mkdirs();
  
  // copy over configuration files
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
  
  // generate custom collation rules (DIN 5007-2), saving to customrules.dat
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  final String osFileName = "customrules.dat";
  final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
  IOUtils.write(tailoredRules, os, "UTF-8");
  os.close();

  final ResourceLoader loader;
  if (random().nextBoolean()) {
    loader = new StringMockResourceLoader(tailoredRules);
  } else {
    loader = new FilesystemResourceLoader(confDir.toPath());
  }
  final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
  assertEquals(tailoredCollator, readCollator);

  return tmpFile;
}
 
Example 2
Source File: TestICUCollationFieldDocValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
 * These are largish files, and jvm-specific (as our documentation says, you should always
 * look out for jvm differences with collation).
 * So it's preferable to create this file on-the-fly.
 */
public static String setupSolrHome() throws Exception {
  File tmpFile = createTempDir().toFile();
  
  // make data and conf dirs
  new File(tmpFile + "/collection1", "data").mkdirs();
  File confDir = new File(tmpFile + "/collection1", "conf");
  confDir.mkdirs();
  
  // copy over configuration files
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml"), new File(confDir, "schema.xml"));
  
  // generate custom collation rules (DIN 5007-2), saving to customrules.dat
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  FileOutputStream os = new FileOutputStream(new File(confDir, "customrules.dat"));
  IOUtils.write(tailoredRules, os, "UTF-8");
  os.close();

  return tmpFile.getAbsolutePath();
}
 
Example 3
Source File: IcuCollationAnalyzerTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testCustomRules() throws Exception {
    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
    String DIN5007_2_tailorings =
            "& ae , a\u0308 & AE , A\u0308& oe , o\u0308 & OE , O\u0308& ue , u\u0308 & UE , u\u0308";

    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
    String tailoredRules = tailoredCollator.getRules();

    Settings settings = Settings.builder()
            .put("index.analysis.analyzer.myAnalyzer.type", "icu_collation")
            .put("index.analysis.analyzer.myAnalyzer.rules", tailoredRules)
            .put("index.analysis.analyzer.myAnalyzer.strength", "primary")
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY));
    Analyzer analyzer = analysis.indexAnalyzers.get("myAnalyzer");

    String germanUmlaut = "Töne";
    TokenStream tsUmlaut = analyzer.tokenStream(null, germanUmlaut);
    BytesRef b1 = bytesFromTokenStream(tsUmlaut);

    String germanExpandedUmlaut = "Toene";
    TokenStream tsExpanded = analyzer.tokenStream(null, germanExpandedUmlaut);
    BytesRef b2 = bytesFromTokenStream(tsExpanded);

    assertTrue(compare(b1.bytes, b2.bytes) == 0);
}