Java Code Examples for com.ibm.icu.text.Collator#getInstance()

The following examples show how to use com.ibm.icu.text.Collator#getInstance() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: QueryExecutor.java    From birt with Eclipse Public License 1.0 6 votes vote down vote up
private void initializeCollator( ) throws DataException
{
	if ( session != null )
	{
		IBaseDataSetDesign design = ( (DataEngineImpl) this.session.getEngine( ) ).getDataSetDesign( getDataSetName( ) );
		if ( design != null )
		{
			String nullOrdering = design.getNullsOrdering( );
			Collator collator = design.getCompareLocale( ) == null ? null
					: Collator.getInstance( design.getCompareLocale( ) );

			dataSet.setCompareLocale( collator );
			dataSet.setNullest( nullOrdering );

			dataSet.getScriptScope( ).put( "compare_locale",
					dataSet.getScriptScope( ),
					collator );
		}
	}
}
 
Example 2
Source File: ResultSetWrapper.java    From birt with Eclipse Public License 1.0 6 votes vote down vote up
private Collator createCollator(SeriesDefinition sd )
{
	// If sort strength is ASCII(-1), then just use default compare of
	// String class to do collator, so here just return null;
	if ( sd.isSetSortStrength( ) && sd.getSortStrength( ) < 0 )
	{
		return null;
	}
	Collator c = null;
	if ( sd.getSortLocale( ) != null )
	{
		c = Collator.getInstance( new ULocale( sd.getSortLocale( ) ) );
	}
	else {
		c = Collator.getInstance( );	
	}
	
	if ( sd.isSetSortStrength( ) )
	{
		c.setStrength( sd.getSortStrength( ) );
	}
	
	return c;
}
 
Example 3
Source File: TestICUCollationKeyAnalyzer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testThreadSafe() throws Exception {
  int iters = 20 * RANDOM_MULTIPLIER;
  for (int i = 0; i < iters; i++) {
    Locale locale = Locale.GERMAN;
    Collator collator = Collator.getInstance(locale);
    collator.setStrength(Collator.IDENTICAL);
    Analyzer a = new ICUCollationKeyAnalyzer(collator);
    assertThreadSafe(a);
    a.close();
  }
}
 
Example 4
Source File: GroupCalculationUtil.java    From birt with Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Set the sort conditions
 * 
 * @param sortKeys
 * @param sortOrderings
 */
void setSortCondition( Object[] sortKeys, boolean[] sortOrderings, int[] sortStrength, ULocale[] sortLocale )
{
	this.sortKeys = sortKeys;
	this.sortDirections = sortOrderings;
	this.comparator = new Collator[this.sortKeys.length];
	this.compareHints = new CompareHints[this.sortKeys.length];
	for( int i = 0; i < this.comparator.length; i++ )
	{
		this.comparator[i] = sortStrength[i] == ISortDefinition.ASCII_SORT_STRENGTH
				? null : Collator.getInstance( sortLocale[i]);
		this.compareHints[i] = new CompareHints( this.comparator[i], null );
	}
}
 
Example 5
Source File: QueryExecutor.java    From birt with Eclipse Public License 1.0 5 votes vote down vote up
private Collator createCollator( ISortDefinition sd )
{
	if ( sd.getSortStrength( ) != -1 )
	{
		Collator c = Collator.getInstance( sd.getSortLocale( ) == null
						? session.getEngineContext( ).getLocale( )
								: sd.getSortLocale( ) );
		c.setStrength( sd.getSortStrength( ) );
		return c;
	}
	return null;
}
 
Example 6
Source File: IcuCollationAnalyzerTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testCustomRules() throws Exception {
    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
    String DIN5007_2_tailorings =
            "& ae , a\u0308 & AE , A\u0308& oe , o\u0308 & OE , O\u0308& ue , u\u0308 & UE , u\u0308";

    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
    String tailoredRules = tailoredCollator.getRules();

    Settings settings = Settings.builder()
            .put("index.analysis.analyzer.myAnalyzer.type", "icu_collation")
            .put("index.analysis.analyzer.myAnalyzer.rules", tailoredRules)
            .put("index.analysis.analyzer.myAnalyzer.strength", "primary")
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY));
    Analyzer analyzer = analysis.indexAnalyzers.get("myAnalyzer");

    String germanUmlaut = "Töne";
    TokenStream tsUmlaut = analyzer.tokenStream(null, germanUmlaut);
    BytesRef b1 = bytesFromTokenStream(tsUmlaut);

    String germanExpandedUmlaut = "Toene";
    TokenStream tsExpanded = analyzer.tokenStream(null, germanExpandedUmlaut);
    BytesRef b2 = bytesFromTokenStream(tsExpanded);

    assertTrue(compare(b1.bytes, b2.bytes) == 0);
}
 
Example 7
Source File: IcuCollationKeyAnalyzerTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
CollationParameters(Locale locale) {
    collator = Collator.getInstance(locale);
    analyzer = new IcuCollationKeyAnalyzer(collator);
    firstRangeBeginning = new BytesRef(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
    firstRangeEnd = new BytesRef(collator.getCollationKey(firstRangeEndOriginal).toByteArray());
    secondRangeBeginning = new BytesRef(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
    secondRangeEnd = new BytesRef(collator.getCollationKey(secondRangeEndOriginal).toByteArray());
}
 
Example 8
Source File: IcuCollationKeyAnalyzerTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testThreadSafe() throws Exception {
    int iters = 20;
    for (int i = 0; i < iters; i++) {
        Locale locale = Locale.GERMAN;
        Collator collator = Collator.getInstance(locale);
        collator.setStrength(Collator.IDENTICAL);
        assertThreadSafe(Randomness.get(), new IcuCollationKeyAnalyzer(collator));
    }
}
 
Example 9
Source File: GenerateToStringDialog.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 5 votes vote down vote up
public void sort() {
	Comparator<IBinding> comparator= new Comparator<IBinding>() {
		Collator collator= Collator.getInstance();
		public int compare(IBinding b1, IBinding b2) {
			return collator.compare(b1.getName(), b2.getName());
		}
	};
	Arrays.sort(fFields, comparator);
	Arrays.sort(fMethods, comparator);
	Arrays.sort(fInheritedFields, comparator);
	Arrays.sort(fInheritedMethods, comparator);
}
 
Example 10
Source File: TestICUCollationFieldDocValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
 * These are largish files, and jvm-specific (as our documentation says, you should always
 * look out for jvm differences with collation).
 * So it's preferable to create this file on-the-fly.
 */
public static String setupSolrHome() throws Exception {
  File tmpFile = createTempDir().toFile();
  
  // make data and conf dirs
  new File(tmpFile + "/collection1", "data").mkdirs();
  File confDir = new File(tmpFile + "/collection1", "conf");
  confDir.mkdirs();
  
  // copy over configuration files
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml"), new File(confDir, "schema.xml"));
  
  // generate custom collation rules (DIN 5007-2), saving to customrules.dat
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  FileOutputStream os = new FileOutputStream(new File(confDir, "customrules.dat"));
  IOUtils.write(tailoredRules, os, "UTF-8");
  os.close();

  return tmpFile.getAbsolutePath();
}
 
Example 11
Source File: TestICUCollationField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
 * These are largish files, and jvm-specific (as our documentation says, you should always
 * look out for jvm differences with collation).
 * So it's preferable to create this file on-the-fly.
 */
public static String setupSolrHome() throws Exception {
  String tmpFile = createTempDir().toFile().getAbsolutePath();
  // make data and conf dirs
  new File(tmpFile  + "/collection1", "data").mkdirs();
  File confDir = new File(tmpFile + "/collection1", "conf");
  confDir.mkdirs();
  
  // copy over configuration files
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
  
  // generate custom collation rules (DIN 5007-2), saving to customrules.dat
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  final String osFileName = "customrules.dat";
  final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
  IOUtils.write(tailoredRules, os, "UTF-8");
  os.close();

  final ResourceLoader loader;
  if (random().nextBoolean()) {
    loader = new StringMockResourceLoader(tailoredRules);
  } else {
    loader = new FilesystemResourceLoader(confDir.toPath());
  }
  final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
  assertEquals(tailoredCollator, readCollator);

  return tmpFile;
}
 
Example 12
Source File: TestICUCollationDocValuesField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRanges() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  Field field = newField("field", "", StringField.TYPE_STORED);
  Collator collator = Collator.getInstance(); // uses -Dtests.locale
  if (random().nextBoolean()) {
    collator.setStrength(Collator.PRIMARY);
  }
  ICUCollationDocValuesField collationField = new ICUCollationDocValuesField("collated", collator);
  doc.add(field);
  doc.add(collationField);
  
  int numDocs = atLeast(500);
  for (int i = 0; i < numDocs; i++) {
    String value = TestUtil.randomSimpleString(random());
    field.setStringValue(value);
    collationField.setStringValue(value);
    iw.addDocument(doc);
  }
  
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  
  int numChecks = atLeast(100);
  for (int i = 0; i < numChecks; i++) {
    String start = TestUtil.randomSimpleString(random());
    String end = TestUtil.randomSimpleString(random());
    BytesRef lowerVal = new BytesRef(collator.getCollationKey(start).toByteArray());
    BytesRef upperVal = new BytesRef(collator.getCollationKey(end).toByteArray());
    doTestRanges(is, start, end, lowerVal, upperVal, collator);
  }
  
  ir.close();
  dir.close();
}
 
Example 13
Source File: SortMembersOperation.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 4 votes vote down vote up
public DefaultJavaElementComparator(boolean doNotSortFields) {
	fDoNotSortFields= doNotSortFields;
	fCollator= Collator.getInstance();
	fMemberOrderCache= JavaPlugin.getDefault().getMemberOrderPreferenceCache();
}
 
Example 14
Source File: TestICUPortabilityBug.java    From database with GNU General Public License v2.0 4 votes vote down vote up
/**
 * Unit test for ICU generation of Unicode sort keys.
 * <pre>
 * Input   : "__globalRowStore"
 * 
 * Expected: [7, -124, 7, -124, 53, 63, 69, 43, 41, 63, 75, 69, 85, 77, 79, 69, 75, 49, 1, 20, 1, 126, -113, -124, -113, 8]
 * </pre>
 */
public void test_ICU_Unicode_SortKey() {
    
    final String input = "__globalRowStore";

    // Buffer reused for each String from which a sort key is derived.
    final RawCollationKey raw = new RawCollationKey(128);

    /*
     * Setup the collator by specifying the locale, strength, and
     * decomposition mode.
     */
    final Locale locale = new Locale("en", "US");
    
    final RuleBasedCollator collator = (RuleBasedCollator) Collator
            .getInstance(locale);

    collator.setStrength(Collator.TERTIARY);

    collator.setDecomposition(Collator.NO_DECOMPOSITION);

    collator.getRawCollationKey(input, raw);

    // do not include the nul byte
    final byte[] actual = new byte[raw.size - 1];

    // copy data from the buffer.
    System.arraycopy(raw.bytes/* src */, 0/* srcPos */, actual/* dest */,
            0/* destPos */, actual.length);

    if (log.isInfoEnabled()) {
        log.info("Actual  : " + Arrays.toString(actual));
    }
    
    /*
     * The expected Unicode sort key (this depends on the runtime ICU
     * version).
     */
    final byte[] expected;
    if (VersionInfo.ICU_VERSION.getMajor() == 3
            && VersionInfo.ICU_VERSION.getMinor() == 6) {
        /*
         * bigdata was initially deployed against v3.6.
         */
        expected = new byte[] { 7, -124, 7, -124, 53, 63, 69, 43, 41, 63,
                75, 69, 85, 77, 79, 69, 75, 49, 1, 20, 1, 126, -113, -124,
                -113, 8 };
    } else if (VersionInfo.ICU_VERSION.getMajor() == 4
            && VersionInfo.ICU_VERSION.getMinor() == 8) {
        /*
         * The next bundled version was 4.8.
         */
        expected = new byte[] { 6, 12, 6, 12, 51, 61, 67, 41, 39, 61, 73,
                67, 83, 75, 77, 67, 73, 47, 1, 20, 1, 126, -113, -124,
                -113, 8};
    } else {

        throw new AssertionFailedError("Not an expected ICU version: "
                + VersionInfo.ICU_VERSION);

    }

    if (log.isInfoEnabled()) {
        log.info("Expected: " + Arrays.toString(expected));
    }

    if (!Arrays.equals(expected, actual)) {
        fail("Expected: " + Arrays.toString(expected) + ", " + //
                "Actual: " + Arrays.toString(actual));
    }

}
 
Example 15
Source File: TupleComparator.java    From birt with Eclipse Public License 1.0 4 votes vote down vote up
TupleComparator( GroupKey[] keys )
{
	iaSortKeys = keys;
	collator = Collator.getInstance( );
}
 
Example 16
Source File: CollatorObject.java    From es6draft with MIT License 4 votes vote down vote up
private Collator createCollator() {
    ULocale locale = ULocale.forLanguageTag(this.locale);
    if ("search".equals(usage)) {
        // "search" usage cannot be set through unicode extensions (u-co-search), handle here:
        locale = locale.setKeywordValue("collation", "search");
    }
    RuleBasedCollator collator = (RuleBasedCollator) Collator.getInstance(locale);
    collator.setDecomposition(Collator.CANONICAL_DECOMPOSITION);
    collator.setNumericCollation(numeric);
    switch (caseFirst) {
    case "upper":
        collator.setUpperCaseFirst(true);
        break;
    case "lower":
        collator.setLowerCaseFirst(true);
        break;
    case "false":
        if (collator.isLowerCaseFirst()) {
            collator.setLowerCaseFirst(false);
        }
        if (collator.isUpperCaseFirst()) {
            collator.setUpperCaseFirst(false);
        }
        break;
    default:
        throw new AssertionError();
    }
    switch (sensitivity) {
    case "base":
        collator.setStrength(Collator.PRIMARY);
        break;
    case "accent":
        collator.setStrength(Collator.SECONDARY);
        break;
    case "case":
        collator.setStrength(Collator.PRIMARY);
        collator.setCaseLevel(true);
        break;
    case "variant":
        collator.setStrength(Collator.TERTIARY);
        break;
    default:
        throw new AssertionError();
    }
    collator.setAlternateHandlingShifted(ignorePunctuation);
    return collator;
}
 
Example 17
Source File: QueryExecutor.java    From birt with Eclipse Public License 1.0 4 votes vote down vote up
public void execute( IEventHandler eventHandler ) throws DataException
{
	logger.logp( Level.FINER,
			QueryExecutor.class.getName( ),
			"execute",
			"Start to execute" );

	if ( this.isExecuted )
		return;

	ExecutorHelper helper = new ExecutorHelper( this.parentHelper );
	
	eventHandler.setExecutorHelper( helper );

   if ( eventHandler.getAppContext( ) != null && this.dataSet.getDesign( ) != null && dataSet.getSession( ) != null)
	{
		String nullOrdering = this.dataSet.getDesign( ).getNullsOrdering( );
		Collator collator = this.dataSet.getDesign( ).getCompareLocale( ) == null
				? null : Collator.getInstance( this.dataSet.getDesign( )
						.getCompareLocale( ) );

		eventHandler.getAppContext( )
				.put( "org.eclipse.birt.data.engine.expression.compareHints",
						new CompareHints( collator, nullOrdering ) );
	}
	    
	// Execute the query
	odiResult = executeOdiQuery( eventHandler );

	helper.setScriptable( this.dataSet.getJSResultRowObject( ) );
	
	resetComputedColumns( );
	// Bind the row object to the odi result set
	this.dataSet.setResultSet( odiResult, false );

	// Calculate aggregate values
	//this.aggrTable.calculate( odiResult, getQueryScope( ) );
	
	this.isExecuted = true;

	logger.logp( Level.FINER,
			QueryExecutor.class.getName( ),
			"execute",
			"Finish executing" );
}
 
Example 18
Source File: ModelUtil.java    From birt with Eclipse Public License 1.0 4 votes vote down vote up
/**
 * 
 * Performs property name sorting on a list of properties. Properties
 * returned are sorted by their (locale-specific) display name. The name for
 * sorting is assumed to be "groupName.displayName" in which "groupName" is
 * the localized name of the property group, if any; and "displayName" is
 * the localized name of the property. That is, properties without groups
 * sort by their property display names. Properties with groups sort first
 * by group name within the overall list, then by property name within the
 * group. Sorting in English ignores case.
 * <p>
 * For example, if we have the groups "G" and "R", and the properties
 * "alpha", "G.beta", "G.sigma", "iota", "R.delta", "R.epsilon" and "theta",
 * the Properties returned is assumed to be sorted into that order.
 * 
 * Sorts a list of <code>PropertyDefn</code> s by there localized name. Uses
 * <code>Collator</code> to do the comparison, sorting in English ignores
 * case.
 * 
 * @param propDefns
 *            a list that contains PropertyDefns.
 * @return the list of <code>PropertyDefn</code> s that is sorted by their
 *         display name.
 */

public static List<IPropertyDefn> sortPropertiesByLocalizedName(
		List<IPropertyDefn> propDefns )
{
	// Use the static factory method, getInstance, to obtain the appropriate
	// Collator object for the current
	// locale.

	// The Collator instance that performs locale-sensitive String
	// comparison.

	ULocale locale = ThreadResources.getLocale( );
	Collator collator = Collator.getInstance( locale );

	// Sorting in English should ignore case.
	if ( ULocale.ENGLISH.equals( locale ) )
	{

		// Set Collator strength value as PRIMARY, only PRIMARY differences
		// are considered significant during comparison. The assignment of
		// strengths to language features is locale defendant. A common
		// example is for different base letters ("a" vs "b") to be
		// considered a PRIMARY difference.

		collator.setStrength( Collator.PRIMARY );
	}

	final Map<PropertyDefn, CollationKey> keysMap = new HashMap<PropertyDefn, CollationKey>( );
	for ( int i = 0; i < propDefns.size( ); i++ )
	{
		PropertyDefn propDefn = (PropertyDefn) propDefns.get( i );

		// Transforms the String into a series of bits that can be compared
		// bitwise to other CollationKeys.
		// CollationKeys provide better performance than Collator.

		CollationKey key = collator.getCollationKey( propDefn
				.getDisplayName( ) );
		keysMap.put( propDefn, key );
	}

	Collections.sort( propDefns, new Comparator<IPropertyDefn>( ) {

		public int compare( IPropertyDefn o1, IPropertyDefn o2 )
		{
			PropertyDefn p1 = (PropertyDefn) o1;
			PropertyDefn p2 = (PropertyDefn) o2;

			CollationKey key1 = keysMap.get( p1 );
			CollationKey key2 = keysMap.get( p2 );

			// Comparing two CollationKeys returns the relative order of the
			// Strings they represent. Using CollationKeys to compare
			// Strings is generally faster than using Collator.compare.

			return key1.compareTo( key2 );
		}
	} );

	return propDefns;
}
 
Example 19
Source File: GlobalizationPreferences.java    From fitnotifications with Apache License 2.0 3 votes vote down vote up
/**
 * This function can be overridden by subclasses to use different heuristics.
 * <b>It MUST return a 'safe' value,
 * one whose modification will not affect this object.</b>
 *
 * @draft ICU 3.6
 * @provisional This API might change or be removed in a future release.
 */
protected Collator guessCollator() {
    ULocale collLocale = getAvailableLocale(TYPE_COLLATOR);
    if (collLocale == null) {
        collLocale = ULocale.ROOT;
    }
    return Collator.getInstance(collLocale);
}
 
Example 20
Source File: ICUCollationField.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Create a locale from localeID.
 * Then return the appropriate collator for the locale.
 */
private Collator createFromLocale(String localeID) {
  return Collator.getInstance(new ULocale(localeID));
}