Java Code Examples for com.ibm.icu.text.Collator#IDENTICAL

The following examples show how to use com.ibm.icu.text.Collator#IDENTICAL . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CollationSettings.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public void setStrength(int value) {
    int noStrength = options & ~STRENGTH_MASK;
    switch(value) {
    case Collator.PRIMARY:
    case Collator.SECONDARY:
    case Collator.TERTIARY:
    case Collator.QUATERNARY:
    case Collator.IDENTICAL:
        options = noStrength | (value << STRENGTH_SHIFT);
        break;
    default:
        throw new IllegalArgumentException("illegal strength value " + value);
    }
}
 
Example 2
Source File: CollationRuleParser.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
private void parseRuleChain() throws ParseException {
    int resetStrength = parseResetAndPosition();
    boolean isFirstRelation = true;
    for(;;) {
        int result = parseRelationOperator();
        if(result < 0) {
            if(ruleIndex < rules.length() && rules.charAt(ruleIndex) == 0x23) {
                // '#' starts a comment, until the end of the line
                ruleIndex = skipComment(ruleIndex + 1);
                continue;
            }
            if(isFirstRelation) {
                setParseError("reset not followed by a relation");
            }
            return;
        }
        int strength = result & STRENGTH_MASK;
        if(resetStrength < Collator.IDENTICAL) {
            // reset-before rule chain
            if(isFirstRelation) {
                if(strength != resetStrength) {
                    setParseError("reset-before strength differs from its first relation");
                    return;
                }
            } else {
                if(strength < resetStrength) {
                    setParseError("reset-before strength followed by a stronger relation");
                    return;
                }
            }
        }
        int i = ruleIndex + (result >> OFFSET_SHIFT);  // skip over the relation operator
        if((result & STARRED_FLAG) == 0) {
            parseRelationStrings(strength, i);
        } else {
            parseStarredCharacters(strength, i);
        }
        isFirstRelation = false;
    }
}
 
Example 3
Source File: CollationRuleParser.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
private int parseResetAndPosition() throws ParseException {
    int i = skipWhiteSpace(ruleIndex + 1);
    int j;
    char c;
    int resetStrength;
    if(rules.regionMatches(i, BEFORE, 0, BEFORE.length()) &&
            (j = i + BEFORE.length()) < rules.length() &&
            PatternProps.isWhiteSpace(rules.charAt(j)) &&
            ((j = skipWhiteSpace(j + 1)) + 1) < rules.length() &&
            0x31 <= (c = rules.charAt(j)) && c <= 0x33 &&
            rules.charAt(j + 1) == 0x5d) {
        // &[before n] with n=1 or 2 or 3
        resetStrength = Collator.PRIMARY + (c - 0x31);
        i = skipWhiteSpace(j + 2);
    } else {
        resetStrength = Collator.IDENTICAL;
    }
    if(i >= rules.length()) {
        setParseError("reset without position");
        return UCOL_DEFAULT;
    }
    if(rules.charAt(i) == 0x5b) {  // '['
        i = parseSpecialPosition(i, rawBuilder);
    } else {
        i = parseTailoringString(i, rawBuilder);
    }
    try {
        sink.addReset(resetStrength, rawBuilder);
    } catch(Exception e) {
        setParseError("adding reset failed", e);
        return UCOL_DEFAULT;
    }
    ruleIndex = i;
    return resetStrength;
}
 
Example 4
Source File: CollationBuilder.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
private static int ceStrength(long ce) {
    return
        isTempCE(ce) ? strengthFromTempCE(ce) :
        (ce & 0xff00000000000000L) != 0 ? Collator.PRIMARY :
        ((int)ce & 0xff000000) != 0 ? Collator.SECONDARY :
        ce != 0 ? Collator.TERTIARY :
        Collator.IDENTICAL;
}
 
Example 5
Source File: Sorting.java    From vespa with Apache License 2.0 5 votes vote down vote up
static private int strength2Collator(Strength strength) {
    switch (strength) {
        case PRIMARY: return Collator.PRIMARY;
        case SECONDARY: return Collator.SECONDARY;
        case TERTIARY: return Collator.TERTIARY;
        case QUATERNARY: return Collator.QUATERNARY;
        case IDENTICAL: return Collator.IDENTICAL;
        case UNDEFINED: return Collator.PRIMARY;
    }
    return Collator.PRIMARY;
}
 
Example 6
Source File: CollationRuleParser.java    From fitnotifications with Apache License 2.0 4 votes vote down vote up
private int parseRelationOperator() {
    ruleIndex = skipWhiteSpace(ruleIndex);
    if(ruleIndex >= rules.length()) { return UCOL_DEFAULT; }
    int strength;
    int i = ruleIndex;
    char c = rules.charAt(i++);
    switch(c) {
    case 0x3c:  // '<'
        if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<
            ++i;
            if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<<
                ++i;
                if(i < rules.length() && rules.charAt(i) == 0x3c) {  // <<<<
                    ++i;
                    strength = Collator.QUATERNARY;
                } else {
                    strength = Collator.TERTIARY;
                }
            } else {
                strength = Collator.SECONDARY;
            }
        } else {
            strength = Collator.PRIMARY;
        }
        if(i < rules.length() && rules.charAt(i) == 0x2a) {  // '*'
            ++i;
            strength |= STARRED_FLAG;
        }
        break;
    case 0x3b:  // ';' same as <<
        strength = Collator.SECONDARY;
        break;
    case 0x2c:  // ',' same as <<<
        strength = Collator.TERTIARY;
        break;
    case 0x3d:  // '='
        strength = Collator.IDENTICAL;
        if(i < rules.length() && rules.charAt(i) == 0x2a) {  // '*'
            ++i;
            strength |= STARRED_FLAG;
        }
        break;
    default:
        return UCOL_DEFAULT;
    }
    return ((i - ruleIndex) << OFFSET_SHIFT) | strength;
}
 
Example 7
Source File: CollationBuilder.java    From fitnotifications with Apache License 2.0 4 votes vote down vote up
/** Implements CollationRuleParser.Sink. */
@Override
void addRelation(int strength, CharSequence prefix, CharSequence str, CharSequence extension) {
    String nfdPrefix;
    if(prefix.length() == 0) {
        nfdPrefix = "";
    } else {
        nfdPrefix = nfd.normalize(prefix);
    }
    String nfdString = nfd.normalize(str);

    // The runtime code decomposes Hangul syllables on the fly,
    // with recursive processing but without making the Jamo pieces visible for matching.
    // It does not work with certain types of contextual mappings.
    int nfdLength = nfdString.length();
    if(nfdLength >= 2) {
        char c = nfdString.charAt(0);
        if(Hangul.isJamoL(c) || Hangul.isJamoV(c)) {
            // While handling a Hangul syllable, contractions starting with Jamo L or V
            // would not see the following Jamo of that syllable.
            throw new UnsupportedOperationException(
                    "contractions starting with conjoining Jamo L or V not supported");
        }
        c = nfdString.charAt(nfdLength - 1);
        if(Hangul.isJamoL(c) ||
                (Hangul.isJamoV(c) && Hangul.isJamoL(nfdString.charAt(nfdLength - 2)))) {
            // A contraction ending with Jamo L or L+V would require
            // generating Hangul syllables in addTailComposites() (588 for a Jamo L),
            // or decomposing a following Hangul syllable on the fly, during contraction matching.
            throw new UnsupportedOperationException(
                    "contractions ending with conjoining Jamo L or L+V not supported");
        }
        // A Hangul syllable completely inside a contraction is ok.
    }
    // Note: If there is a prefix, then the parser checked that
    // both the prefix and the string beging with NFC boundaries (not Jamo V or T).
    // Therefore: prefix.isEmpty() || !isJamoVOrT(nfdString.charAt(0))
    // (While handling a Hangul syllable, prefixes on Jamo V or T
    // would not see the previous Jamo of that syllable.)

    if(strength != Collator.IDENTICAL) {
        // Find the node index after which we insert the new tailored node.
        int index = findOrInsertNodeForCEs(strength);
        assert(cesLength > 0);
        long ce = ces[cesLength - 1];
        if(strength == Collator.PRIMARY && !isTempCE(ce) && (ce >>> 32) == 0) {
            // There is no primary gap between ignorables and the space-first-primary.
            throw new UnsupportedOperationException(
                    "tailoring primary after ignorables not supported");
        }
        if(strength == Collator.QUATERNARY && ce == 0) {
            // The CE data structure does not support non-zero quaternary weights
            // on tertiary ignorables.
            throw new UnsupportedOperationException(
                    "tailoring quaternary after tertiary ignorables not supported");
        }
        // Insert the new tailored node.
        index = insertTailoredNodeAfter(index, strength);
        // Strength of the temporary CE:
        // The new relation may yield a stronger CE but not a weaker one.
        int tempStrength = ceStrength(ce);
        if(strength < tempStrength) { tempStrength = strength; }
        ces[cesLength - 1] = tempCEFromIndexAndStrength(index, tempStrength);
    }

    setCaseBits(nfdString);

    int cesLengthBeforeExtension = cesLength;
    if(extension.length() != 0) {
        String nfdExtension = nfd.normalize(extension);
        cesLength = dataBuilder.getCEs(nfdExtension, ces, cesLength);
        if(cesLength > Collation.MAX_EXPANSION_LENGTH) {
            throw new IllegalArgumentException(
                    "extension string adds too many collation elements (more than 31 total)");
        }
    }
    int ce32 = Collation.UNASSIGNED_CE32;
    if((!nfdPrefix.contentEquals(prefix) || !nfdString.contentEquals(str)) &&
            !ignorePrefix(prefix) && !ignoreString(str)) {
        // Map from the original input to the CEs.
        // We do this in case the canonical closure is incomplete,
        // so that it is possible to explicitly provide the missing mappings.
        ce32 = addIfDifferent(prefix, str, ces, cesLength, ce32);
    }
    addWithClosure(nfdPrefix, nfdString, ces, cesLength, ce32);
    cesLength = cesLengthBeforeExtension;
}
 
Example 8
Source File: TestICUUnicodeKeyBuilder.java    From database with GNU General Public License v2.0 4 votes vote down vote up
/**
     * Test examines the behavior when the
     * {@link SuccessorUtil#successor(String)} of an Unicode string is formed by
     * appending a <code>nul</code> character and reports an error if the
     * resulting byte[] when the key are formed compares as equal to the
     * original string from which the successor was formed.
     * <p>
     * Note: Since {@link Collator#IDENTICAL} appears to be required to
     * differentiate a trailing nul character (i.e., the successor of some
     * Unicode string), then I would strongly recommend that you form the sort
     * key first and then its successor (by appending a trailing nul).
     */
    public void test_keyBuilder_unicode_trailingNuls() {

        /*
         * Setup for US English.
         */

        final Properties properties = new Properties();
        
        properties.setProperty(Options.USER_LANGUAGE, Locale.US.getLanguage());

        properties.setProperty(Options.USER_COUNTRY, Locale.US.getCountry());
        
        final int[] strengths = new int[] { 
                Collator.PRIMARY,
                Collator.SECONDARY,
                Collator.TERTIARY,
                Collator.QUATERNARY,
                Collator.IDENTICAL,
                };
        
        int minStrength = -1;
        
        for(int i=0; i<strengths.length; i++) {
           
            final int strength = strengths[i];

            // set the strength on the collator.
            properties.setProperty(Options.STRENGTH, ""+Collator.IDENTICAL);
            
//            RuleBasedCollator collator = (RuleBasedCollator) Collator
//                    .getInstance(Locale.getDefault());
//            
//            collator.setStrength(strength);
            
            if(!doSuccessorTest( "Hello World!", properties)) {
                
                log.warn("Collator does not differentiate trailing nul characters at strength="+strength);
                
            } else {
                
                minStrength = strength;
                
            }

        }
        
        assertFalse(
                "Collator will not differentiate trailing nul characters at any strength.",
                minStrength == -1); 

        System.err
                .println("Minimum strength ("+minStrength+") to differentiate trailing nul character is: "
                        + (minStrength == Collator.PRIMARY ? "PRIMARY"
                                : (minStrength == Collator.SECONDARY ? "SECONDARY"
                                        : (minStrength == Collator.TERTIARY ? "TERTIARY"
                                                : (minStrength == Collator.QUATERNARY ? "QUARERNARY"
                                                        : (minStrength == Collator.IDENTICAL ? "IDENTICAL"
                                                                : ""
                                                                        + minStrength))))));
        
    }