com.ibm.icu.text.Collator Java Examples

The following examples show how to use com.ibm.icu.text.Collator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AccessorClassCreator.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 6 votes vote down vote up
private String createStaticFields() {
	HashSet<String> added= new HashSet<String>();
	List<NLSSubstitution> subs= new ArrayList<NLSSubstitution>();
	for (int i= 0; i < fNLSSubstitutions.length; i++) {
		NLSSubstitution substitution= fNLSSubstitutions[i];
		int newState= substitution.getState();
		if ((substitution.hasStateChanged() || substitution.isAccessorRename())&& newState == NLSSubstitution.EXTERNALIZED) {
			if (added.add(substitution.getKey()))
				subs.add(substitution);
		}
	}
	Collections.sort(subs, new Comparator<NLSSubstitution>() {
		private Collator fCollator= Collator.getInstance();
		public int compare(NLSSubstitution s0, NLSSubstitution s1) {
			return fCollator.compare(s0.getKey(), s1.getKey());
		}
	});
	StringBuffer buf= new StringBuffer();
	for (Iterator<NLSSubstitution> iter= subs.iterator(); iter.hasNext();) {
		NLSSubstitution element= iter.next();
		appendStaticField(buf, element);
	}
	return buf.toString();
}
 
Example #2
Source File: CollationBuilder.java    From fitnotifications with Apache License 2.0 6 votes vote down vote up
/**
 * Makes and inserts a new tailored node into the list, after the one at index.
 * Skips over nodes of weaker strength to maintain collation order
 * ("postpone insertion").
 * @return the new node's index
 */
private int insertTailoredNodeAfter(int index, int strength) {
    assert(0 <= index && index < nodes.size());
    if(strength >= Collator.SECONDARY) {
        index = findCommonNode(index, Collator.SECONDARY);
        if(strength >= Collator.TERTIARY) {
            index = findCommonNode(index, Collator.TERTIARY);
        }
    }
    // Postpone insertion:
    // Insert the new node before the next one with a strength at least as strong.
    long node = nodes.elementAti(index);
    int nextIndex;
    while((nextIndex = nextIndexFromNode(node)) != 0) {
        node = nodes.elementAti(nextIndex);
        if(strengthFromNode(node) <= strength) { break; }
        // Skip the next node which has a weaker (larger) strength than the new one.
        index = nextIndex;
    }
    node = IS_TAILORED | nodeFromStrength(strength);
    return insertNodeBetween(index, nextIndex, node);
}
 
Example #3
Source File: CollationBuilder.java    From fitnotifications with Apache License 2.0 6 votes vote down vote up
private int findOrInsertNodeForRootCE(long ce, int strength) {
    assert((int)(ce >>> 56) != Collation.UNASSIGNED_IMPLICIT_BYTE);

    // Find or insert the node for each of the root CE's weights,
    // down to the requested level/strength.
    // Root CEs must have common=zero quaternary weights (for which we never insert any nodes).
    assert((ce & 0xc0) == 0);
    int index = findOrInsertNodeForPrimary(ce >>> 32);
    if(strength >= Collator.SECONDARY) {
        int lower32 = (int)ce;
        index = findOrInsertWeakNode(index, lower32 >>> 16, Collator.SECONDARY);
        if(strength >= Collator.TERTIARY) {
            index = findOrInsertWeakNode(index, lower32 & Collation.ONLY_TERTIARY_MASK,
                                        Collator.TERTIARY);
        }
    }
    return index;
}
 
Example #4
Source File: CollatorConstructor.java    From es6draft with MIT License 6 votes vote down vote up
private List<String> getCollationInfo() {
    // FIXME: spec issue? Search collators cannot specify any other collation type through a Unicode extension
    // value. Does this apply to all Collator implementations or just when implemented with ICU? If the former,
    // does it make sense to change the spec to make it more clear that %Collator%.[[SearchLocaleData]] always
    // returns a list containing only `null` for the "co" extension key?
    if (usage == CollatorUsage.Search) {
        return Collections.singletonList((String) null);
    }
    String[] values = Collator.getKeywordValuesForLocale("collation", locale, false);
    ArrayList<String> result = new ArrayList<>(values.length);
    result.add(null); // null must be first value, cf. 10.2.3
    for (int i = 0; i < values.length; ++i) {
        CollationType type = CollationType.forName(values[i]);
        if (type == CollationType.standard || type == CollationType.search) {
            // 'standard' and 'search' must not be elements of 'co' array, cf. 10.2.3
            continue;
        }
        result.add(type.getName());
    }
    return result;
}
 
Example #5
Source File: CleanUpRegistry.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 6 votes vote down vote up
private synchronized void ensurePagesRegistered() {
	if (fPageDescriptors != null)
		return;
	
	ArrayList<CleanUpTabPageDescriptor> result= new ArrayList<CleanUpTabPageDescriptor>();

	IExtensionPoint point= Platform.getExtensionRegistry().getExtensionPoint(JavaPlugin.getPluginId(), EXTENSION_POINT_NAME);
	IConfigurationElement[] elements= point.getConfigurationElements();
	for (int i= 0; i < elements.length; i++) {
		IConfigurationElement element= elements[i];

		if (TABPAGE_CONFIGURATION_ELEMENT_NAME.equals(element.getName())) {
			result.add(new CleanUpTabPageDescriptor(element));
		}
	}

	fPageDescriptors= result.toArray(new CleanUpTabPageDescriptor[result.size()]);
	Arrays.sort(fPageDescriptors, new Comparator<CleanUpTabPageDescriptor>() {
		public int compare(CleanUpTabPageDescriptor o1, CleanUpTabPageDescriptor o2) {
			String name1= o1.getName();
			String name2= o2.getName();
			return Collator.getInstance().compare(name1.replaceAll("&", ""), name2.replaceAll("&", "")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
		}
	});
}
 
Example #6
Source File: AddImportOnSelectionAction.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 6 votes vote down vote up
public int compare(String o1, String o2) {
	if (o1.equals(o2))
		return 0;

	History history= QualifiedTypeNameHistory.getDefault();

	int pos1= history.getPosition(o1);
	int pos2= history.getPosition(o2);

	if (pos1 == pos2)
		return Collator.getInstance().compare(o1, o2);

	if (pos1 > pos2) {
		return -1;
	} else {
		return 1;
	}
}
 
Example #7
Source File: CollationData.java    From fitnotifications with Apache License 2.0 6 votes vote down vote up
private int getScriptIndex(int script) {
    if(script < 0) {
        return 0;
    } else if(script < numScripts) {
        return scriptsIndex[script];
    } else if(script < Collator.ReorderCodes.FIRST) {
        return 0;
    } else {
        script -= Collator.ReorderCodes.FIRST;
        if(script < MAX_NUM_SPECIAL_REORDER_CODES) {
            return scriptsIndex[numScripts + script];
        } else {
            return 0;
        }
    }
}
 
Example #8
Source File: BirtComp.java    From birt with Eclipse Public License 1.0 6 votes vote down vote up
public Object execute( Object[] args, IScriptFunctionContext context ) throws BirtException
{
	Collator collator = (Collator) context.findProperty( "compare_locale" );
	if ( args == null || args.length != 3 )
		throw new IllegalArgumentException( MessageFormat.format( WRONG_ARGUMENT,
				new Object[]{
					BETWEEN
				} ) );
	
	try
	{
		return this.mode
				? Boolean.valueOf( compare( args[0], args[1],collator ) >= 0
						&& compare( args[0], args[2],collator ) <= 0 )
				: Boolean.valueOf( !( compare( args[0], args[1],collator ) >= 0 && compare( args[0],
						args[2],collator ) <= 0 ) );
	}
	catch ( BirtException e )
	{
		throw new IllegalArgumentException( e.getLocalizedMessage( ) );
	}
}
 
Example #9
Source File: CollationRuleParser.java    From fitnotifications with Apache License 2.0 6 votes vote down vote up
/**
 * Gets a script or reorder code from its string representation.
 * @return the script/reorder code, or
 * -1 if not recognized
 */
public static int getReorderCode(String word) {
    for(int i = 0; i < gSpecialReorderCodes.length; ++i) {
        if(word.equalsIgnoreCase(gSpecialReorderCodes[i])) {
            return Collator.ReorderCodes.FIRST + i;
        }
    }
    try {
        int script = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, word);
        if(script >= 0) {
            return script;
        }
    } catch (IllegalIcuArgumentException e) {
        // fall through
    }
    if(word.equalsIgnoreCase("others")) {
        return Collator.ReorderCodes.OTHERS;  // same as Zzzz = USCRIPT_UNKNOWN 
    }
    return -1;
}
 
Example #10
Source File: CollationSpecifier.java    From sql-layer with GNU Affero General Public License v3.0 6 votes vote down vote up
private static void setCollatorStrength(RuleBasedCollator collator, CollationSpecifier specifier) {
    if (specifier.caseSensitive() && specifier.accentSensitive()) {
        collator.setStrength(Collator.TERTIARY);
        collator.setCaseLevel(false);
    }
    else if (specifier.caseSensitive() && !specifier.accentSensitive()) {
        collator.setCaseLevel(true);
        collator.setStrength(Collator.PRIMARY);
    }
    else if (!specifier.caseSensitive() && specifier.accentSensitive()) {
        collator.setStrength(Collator.SECONDARY);
        collator.setCaseLevel(false);
    }
    else {
        collator.setStrength(Collator.PRIMARY);
        collator.setCaseLevel(false);
    }
}
 
Example #11
Source File: CollationFastLatinBuilder.java    From fitnotifications with Apache License 2.0 6 votes vote down vote up
private boolean loadGroups(CollationData data) {
    headerLength = 1 + NUM_SPECIAL_GROUPS;
    int r0 = (CollationFastLatin.VERSION << 8) | headerLength;
    result.append((char)r0);
    // The first few reordering groups should be special groups
    // (space, punct, ..., digit) followed by Latn, then Grek and other scripts.
    for(int i = 0; i < NUM_SPECIAL_GROUPS; ++i) {
        lastSpecialPrimaries[i] = data.getLastPrimaryForGroup(Collator.ReorderCodes.FIRST + i);
        if(lastSpecialPrimaries[i] == 0) {
            // missing data
            return false;
        }
        result.append(0);  // reserve a slot for this group
    }

    firstDigitPrimary = data.getFirstPrimaryForGroup(Collator.ReorderCodes.DIGIT);
    firstLatinPrimary = data.getFirstPrimaryForGroup(UScript.LATIN);
    lastLatinPrimary = data.getLastPrimaryForGroup(UScript.LATIN);
    if(firstDigitPrimary == 0 || firstLatinPrimary == 0) {
        // missing data
        return false;
    }
    return true;
}
 
Example #12
Source File: QueryExecutor.java    From birt with Eclipse Public License 1.0 6 votes vote down vote up
private void initializeCollator( ) throws DataException
{
	if ( session != null )
	{
		IBaseDataSetDesign design = ( (DataEngineImpl) this.session.getEngine( ) ).getDataSetDesign( getDataSetName( ) );
		if ( design != null )
		{
			String nullOrdering = design.getNullsOrdering( );
			Collator collator = design.getCompareLocale( ) == null ? null
					: Collator.getInstance( design.getCompareLocale( ) );

			dataSet.setCompareLocale( collator );
			dataSet.setNullest( nullOrdering );

			dataSet.getScriptScope( ).put( "compare_locale",
					dataSet.getScriptScope( ),
					collator );
		}
	}
}
 
Example #13
Source File: IcuCollationAttributeFactory.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Create a new ICU c ollated term attribute implementation.
 * @param collator Collation key generator
 */
IcuCollatedTermAttributeImpl(Collator collator) {
    // clone the collator: see http://userguide.icu-project.org/collation/architecture
    try {
        this.collator = (Collator) collator.clone();
    } catch (CloneNotSupportedException e) {
        throw new UnsupportedOperationException(e);
    }
}
 
Example #14
Source File: PropertyFileDocumentModel.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 5 votes vote down vote up
private InsertEdit insert(KeyValuePair keyValuePair) {
      KeyValuePairModell keyValuePairModell = new KeyValuePairModell(keyValuePair);
      int index = findInsertPosition(keyValuePairModell);
      KeyValuePairModell insertHere = fKeyValuePairs.get(index);
      int offset = insertHere.fOffset;

      String extra= ""; //$NON-NLS-1$
      if (insertHere instanceof LastKeyValuePair && ((LastKeyValuePair)insertHere).needsNewLine()) {
      	extra= fLineDelimiter;
      	((LastKeyValuePair)insertHere).resetNeedsNewLine();
      	offset-= insertHere.fLeadingWhiteSpaces;
      } else if (index > 0) {
      	String beforeKey= fKeyValuePairs.get(index - 1).fKey;
	String afterKey= insertHere.fKey;
	String key= keyValuePair.fKey;
	int distBefore= NLSUtil.invertDistance(key, beforeKey);
	int distAfter= NLSUtil.invertDistance(key, afterKey);
	if (distBefore > distAfter) {
		offset-= insertHere.fLeadingWhiteSpaces;
	} else if (distBefore == distAfter && Collator.getInstance().compare(beforeKey, afterKey) < 0) {
		offset-= insertHere.fLeadingWhiteSpaces;
	} else {
		//insert it before afterKey -> move the leading white spaces to the inserted pair
		keyValuePairModell.fLeadingWhiteSpaces= insertHere.fLeadingWhiteSpaces;
		insertHere.fLeadingWhiteSpaces= 0;
	}
      }

      String text= extra + keyValuePairModell.getKeyValueText();
      keyValuePairModell.fOffset= offset;
      keyValuePairModell.fLength= text.length();
      fKeyValuePairs.add(index, keyValuePairModell);
return new InsertEdit(offset, text);
  }
 
Example #15
Source File: QuickTemplateProcessor.java    From Eclipse-Postfix-Code-Completion with Eclipse Public License 1.0 5 votes vote down vote up
private void sort(ArrayList<IJavaCompletionProposal> proposals) {
	Collections.sort(proposals, new Comparator<IJavaCompletionProposal>() {
		public int compare(IJavaCompletionProposal p1, IJavaCompletionProposal p2) {
			return Collator.getInstance().compare(p1.getDisplayString(), p2.getDisplayString());
		}
	});
}
 
Example #16
Source File: IcuCollationKeyAnalyzerTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
CollationParameters(Locale locale) {
    collator = Collator.getInstance(locale);
    analyzer = new IcuCollationKeyAnalyzer(collator);
    firstRangeBeginning = new BytesRef(collator.getCollationKey(firstRangeBeginningOriginal).toByteArray());
    firstRangeEnd = new BytesRef(collator.getCollationKey(firstRangeEndOriginal).toByteArray());
    secondRangeBeginning = new BytesRef(collator.getCollationKey(secondRangeBeginningOriginal).toByteArray());
    secondRangeEnd = new BytesRef(collator.getCollationKey(secondRangeEndOriginal).toByteArray());
}
 
Example #17
Source File: IcuCollationKeyAnalyzerTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testThreadSafe() throws Exception {
    int iters = 20;
    for (int i = 0; i < iters; i++) {
        Locale locale = Locale.GERMAN;
        Collator collator = Collator.getInstance(locale);
        collator.setStrength(Collator.IDENTICAL);
        assertThreadSafe(Randomness.get(), new IcuCollationKeyAnalyzer(collator));
    }
}
 
Example #18
Source File: IcuCollationKeyFieldMapper.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
protected IcuCollationKeyFieldMapper(String simpleName, MappedFieldType fieldType, MappedFieldType defaultFieldType,
                                     Settings indexSettings, MultiFields multiFields,
                                     CopyTo copyTo, Settings collatorSettings, Collator collator) {
    super(simpleName, fieldType, defaultFieldType, indexSettings, multiFields, copyTo);
    assert collator.isFrozen();
    this.collatorSettings = collatorSettings;
    this.collator = collator;
    this.getDVField = SortedSetDocValuesField::new;
}
 
Example #19
Source File: IcuCollationAnalyzerTests.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public void testCustomRules() throws Exception {
    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de_DE"));
    String DIN5007_2_tailorings =
            "& ae , a\u0308 & AE , A\u0308& oe , o\u0308 & OE , O\u0308& ue , u\u0308 & UE , u\u0308";

    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
    String tailoredRules = tailoredCollator.getRules();

    Settings settings = Settings.builder()
            .put("index.analysis.analyzer.myAnalyzer.type", "icu_collation")
            .put("index.analysis.analyzer.myAnalyzer.rules", tailoredRules)
            .put("index.analysis.analyzer.myAnalyzer.strength", "primary")
            .build();
    ESTestCase.TestAnalysis analysis = ESTestCase.createTestAnalysis(new Index("test", "_na_"),
            settings,
            new BundlePlugin(Settings.EMPTY));
    Analyzer analyzer = analysis.indexAnalyzers.get("myAnalyzer");

    String germanUmlaut = "Töne";
    TokenStream tsUmlaut = analyzer.tokenStream(null, germanUmlaut);
    BytesRef b1 = bytesFromTokenStream(tsUmlaut);

    String germanExpandedUmlaut = "Toene";
    TokenStream tsExpanded = analyzer.tokenStream(null, germanExpandedUmlaut);
    BytesRef b2 = bytesFromTokenStream(tsExpanded);

    assertTrue(compare(b1.bytes, b2.bytes) == 0);
}
 
Example #20
Source File: AggregationCellProviderWrapper.java    From birt with Eclipse Public License 1.0 5 votes vote down vote up
public int compare( Object arg0, Object arg1 )
{
	// TODO Auto-generated method stub
	assert(arg0 instanceof IAggregationCellViewProvider);
	assert(arg1 instanceof IAggregationCellViewProvider);
	
	String name0 = ((IAggregationCellViewProvider)arg0).getViewDisplayName( );
	String name1 = ((IAggregationCellViewProvider)arg1).getViewDisplayName( );
	
	if ( name0 == null )
	{
		name0 = "";//$NON-NLS-1$
	}
	if ( name1 == null )
	{
		name1 = "";//$NON-NLS-1$
	}
	
	if ( ascending )
	{
		return Collator.getInstance( ).compare( name0, name1 );
	}
	else
	{
		return Collator.getInstance( ).compare( name1, name0 );
	}
}
 
Example #21
Source File: TestICUCollationField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
 * These are largish files, and jvm-specific (as our documentation says, you should always
 * look out for jvm differences with collation).
 * So it's preferable to create this file on-the-fly.
 */
public static String setupSolrHome() throws Exception {
  String tmpFile = createTempDir().toFile().getAbsolutePath();
  // make data and conf dirs
  new File(tmpFile  + "/collection1", "data").mkdirs();
  File confDir = new File(tmpFile + "/collection1", "conf");
  confDir.mkdirs();
  
  // copy over configuration files
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
  
  // generate custom collation rules (DIN 5007-2), saving to customrules.dat
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  final String osFileName = "customrules.dat";
  final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
  IOUtils.write(tailoredRules, os, "UTF-8");
  os.close();

  final ResourceLoader loader;
  if (random().nextBoolean()) {
    loader = new StringMockResourceLoader(tailoredRules);
  } else {
    loader = new FilesystemResourceLoader(confDir.toPath());
  }
  final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
  assertEquals(tailoredCollator, readCollator);

  return tmpFile;
}
 
Example #22
Source File: AkCollatorFactoryTest.java    From sql-layer with GNU Affero General Public License v3.0 5 votes vote down vote up
@Test
public void uniquePerThread() throws Exception {
    final AtomicInteger threadIndex = new AtomicInteger();
    final Collator[] array = new Collator[NTHREADS];
    Thread[] threads = new Thread[NTHREADS];
    for (int i = 0; i < NTHREADS; i++) {
        threads[i] = new Thread(new Runnable() {
            public void run() {
                int index = threadIndex.getAndIncrement();
                AkCollatorICU icu = (AkCollatorICU) (AkCollatorFactory.getAkCollator("sv_se_ci"));
                array[index] = icu.collator.get();
            }
        });
    }
    for (int i = 0; i < NTHREADS; i++) {
        threads[i].start();
    }
    for (int i = 0; i < NTHREADS; i++) {
        threads[i].join();
    }
    for (int i = 0; i < NTHREADS; i++) {
        assertNotNull("Null", array[i]);
        for (int j = 0; j < i; j++) {
            assertTrue("Not unique", array[i] != array[j]);
        }
    }
}
 
Example #23
Source File: AkCollatorFactory.java    From sql-layer with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Construct an actual ICU Collator given a collation specifier. The
 * result is a Collator that must be use in a thread-private manner.
 */
static synchronized Collator forScheme(final CollationSpecifier specifier) {
    RuleBasedCollator collator = (RuleBasedCollator) sourceMap.get(specifier.toString());
    if (collator == null) {
        collator = specifier.createCollator();
        sourceMap.put(specifier.toString(), collator);
    }
    collator = collator.cloneAsThawed();
    return collator;
}
 
Example #24
Source File: SortingTestCase.java    From vespa with Apache License 2.0 5 votes vote down vote up
@Test
@Ignore
public void requireThatArabicSortCorrect() {
    requireThatArabicHasCorrectRules(Collator.getInstance(new ULocale("ar")));
    Sorting ar = Sorting.fromString("uca(a,ar)");
    assertEquals(1, ar.fieldOrders().size());
    Sorting.FieldOrder fo = ar.fieldOrders().get(0);
    assertTrue(fo.getSorter() instanceof Sorting.UcaSorter);
    Sorting.UcaSorter uca = (Sorting.UcaSorter) fo.getSorter();
    requireThatArabicHasCorrectRules(uca.getCollator());
    Sorting.AttributeSorter sorter = fo.getSorter();
    assertTrue(sorter.compare("a", "b") < 0);
    assertTrue(sorter.compare("a", "aس") < 0);
    assertTrue(sorter.compare("س", "a") < 0);
}
 
Example #25
Source File: SortingTestCase.java    From vespa with Apache License 2.0 5 votes vote down vote up
private void requireThatChineseHasCorrectRules(Collator col) {
    final int reorderCodes [] = {UScript.HAN};
    assertEquals("8.0.0.0", col.getUCAVersion().toString());
    assertEquals("153.64.29.0", col.getVersion().toString());
    assertEquals(Arrays.toString(reorderCodes), Arrays.toString(col.getReorderCodes()));

    assertNotEquals("", ((RuleBasedCollator) col).getRules());
}
 
Example #26
Source File: SortingTestCase.java    From vespa with Apache License 2.0 5 votes vote down vote up
private void requireThatArabicHasCorrectRules(Collator col) {
    final int reorderCodes [] = {UScript.ARABIC};
    assertEquals("6.2.0.0", col.getUCAVersion().toString());
    assertEquals("58.0.0.6", col.getVersion().toString());
    assertEquals(Arrays.toString(reorderCodes), Arrays.toString(col.getReorderCodes()));
    assertTrue(col.compare("a", "b") < 0);
    assertTrue(col.compare("a", "aس") < 0);
    assertFalse(col.compare("س", "a") < 0);

    assertEquals(" [reorder Arab]&ت<<ة<<<ﺔ<<<ﺓ&ي<<ى<<<ﯨ<<<ﯩ<<<ﻰ<<<ﻯ<<<ﲐ<<<ﱝ", ((RuleBasedCollator) col).getRules());
    assertFalse(col.compare("س", "a") < 0);
}
 
Example #27
Source File: TextCollatorRegistryICU.java    From fdb-record-layer with Apache License 2.0 5 votes vote down vote up
@Override
@Nonnull
public TextCollator getTextCollator(@Nonnull String locale, int strength) {
    return MapUtils.computeIfAbsent(collators, Pair.of(locale, strength), key -> {
        final Collator collator = DEFAULT_LOCALE.equals(locale) ?
                                  Collator.getInstance() :
                                  Collator.getInstance(new ULocale(locale));
        collator.setStrength(strength);
        return new TextCollatorICU(collator);
    });
}
 
Example #28
Source File: Sorting.java    From vespa with Apache License 2.0 5 votes vote down vote up
static private int strength2Collator(Strength strength) {
    switch (strength) {
        case PRIMARY: return Collator.PRIMARY;
        case SECONDARY: return Collator.SECONDARY;
        case TERTIARY: return Collator.TERTIARY;
        case QUATERNARY: return Collator.QUATERNARY;
        case IDENTICAL: return Collator.IDENTICAL;
        case UNDEFINED: return Collator.PRIMARY;
    }
    return Collator.PRIMARY;
}
 
Example #29
Source File: TestICUCollationFieldDocValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
 * These are largish files, and jvm-specific (as our documentation says, you should always
 * look out for jvm differences with collation).
 * So it's preferable to create this file on-the-fly.
 */
public static String setupSolrHome() throws Exception {
  File tmpFile = createTempDir().toFile();
  
  // make data and conf dirs
  new File(tmpFile + "/collection1", "data").mkdirs();
  File confDir = new File(tmpFile + "/collection1", "conf");
  confDir.mkdirs();
  
  // copy over configuration files
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
  FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml"), new File(confDir, "schema.xml"));
  
  // generate custom collation rules (DIN 5007-2), saving to customrules.dat
  RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));

  String DIN5007_2_tailorings =
    "& ae , a\u0308 & AE , A\u0308"+
    "& oe , o\u0308 & OE , O\u0308"+
    "& ue , u\u0308 & UE , u\u0308";

  RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
  String tailoredRules = tailoredCollator.getRules();
  FileOutputStream os = new FileOutputStream(new File(confDir, "customrules.dat"));
  IOUtils.write(tailoredRules, os, "UTF-8");
  os.close();

  return tmpFile.getAbsolutePath();
}
 
Example #30
Source File: SortingTestCase.java    From vespa with Apache License 2.0 5 votes vote down vote up
@Test
public void requireThatChineseSortCorrect() {
    requireThatChineseHasCorrectRules(Collator.getInstance(new ULocale("zh")));
    Sorting ch = Sorting.fromString("uca(a,zh)");
    assertEquals(1, ch.fieldOrders().size());
    Sorting.FieldOrder fo = ch.fieldOrders().get(0);
    assertTrue(fo.getSorter() instanceof Sorting.UcaSorter);
    Sorting.UcaSorter uca = (Sorting.UcaSorter) fo.getSorter();
    requireThatChineseHasCorrectRules(uca.getCollator());
    Sorting.AttributeSorter sorter = fo.getSorter();
    assertTrue(sorter.compare("a", "b") < 0);
    assertTrue(sorter.compare("a", "a\u81EA") < 0);
    assertTrue(sorter.compare("\u81EA", "a") < 0);
}