Java Code Examples for android.icu.lang.UCharacter#getIntPropertyValue()

The following examples show how to use android.icu.lang.UCharacter#getIntPropertyValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: KhmerBreakEngine.java    From j2objc with Apache License 2.0 5 votes vote down vote up
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.KHMER);
    }
    return false;
}
 
Example 2
Source File: BurmeseBreakEngine.java    From j2objc with Apache License 2.0 5 votes vote down vote up
@Override
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.MYANMAR);
    }
    return false;
}
 
Example 3
Source File: UnhandledBreakEngine.java    From j2objc with Apache License 2.0 5 votes vote down vote up
/**
 * Update the set of unhandled characters for the specified breakType to include
 * all that have the same script as c.
 * May be called concurrently with handles() or findBreaks().
 * Must not be called concurrently with itself.
 */
public void handleChar(int c, int breakType) {
    if (breakType >= 0 && breakType < fHandled.length() && c != DONE32) {
        UnicodeSet originalSet = fHandled.get(breakType);
        if (!originalSet.contains(c)) {
            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
            UnicodeSet newSet = new UnicodeSet();
            newSet.applyIntPropertyValue(UProperty.SCRIPT, script);
            newSet.addAll(originalSet);
            fHandled.set(breakType, newSet);
        }
    }
}
 
Example 4
Source File: LaoBreakEngine.java    From j2objc with Apache License 2.0 5 votes vote down vote up
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.LAO);
    }
    return false;
}
 
Example 5
Source File: ThaiBreakEngine.java    From j2objc with Apache License 2.0 5 votes vote down vote up
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.THAI);
    }
    return false;
}
 
Example 6
Source File: UCharacterTest.java    From j2objc with Apache License 2.0 5 votes vote down vote up
@Test
public void TestGetIntPropertyValue(){
    /* Testing UCharacter.getIntPropertyValue(ch, type) */
    // Testing when "if (type < UProperty.BINARY_START)" is true
    int[] negative_cases = {-100,-50,-10,-5,-2,-1};
    for(int i=0; i<negative_cases.length; i++){
        if(UCharacter.getIntPropertyValue(0, negative_cases[i]) != 0){
            errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " +
                    "when passing a negative value of " + negative_cases[i]);

        }
    }

    // Testing when "if(ch<NormalizerImpl.JAMO_L_BASE)" is true
    for(int i=Normalizer2Impl.Hangul.JAMO_L_BASE-5; i<Normalizer2Impl.Hangul.JAMO_L_BASE; i++){
        if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){
            errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " +
                    "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE");

        }
    }

    // Testing when "else if((ch-=NormalizerImpl.HANGUL_BASE)<0)" is true
    for(int i=Normalizer2Impl.Hangul.HANGUL_BASE-5; i<Normalizer2Impl.Hangul.HANGUL_BASE; i++){
        if(UCharacter.getIntPropertyValue(i, UProperty.HANGUL_SYLLABLE_TYPE) != 0){
            errln("UCharacter.getIntPropertyValue(ch, type) was suppose to return 0 " +
                    "when passing ch: " + i + "and type of Property.HANGUL_SYLLABLE_TYPE");

        }
    }
}
 
Example 7
Source File: RBBITableBuilder.java    From j2objc with Apache License 2.0 4 votes vote down vote up
void calcChainedFollowPos(RBBINode tree) {

           List<RBBINode> endMarkerNodes = new ArrayList<RBBINode>();
           List<RBBINode> leafNodes      = new ArrayList<RBBINode>();

            // get a list of all endmarker nodes.
           tree.findNodes(endMarkerNodes, RBBINode.endMark);

           // get a list all leaf nodes
           tree.findNodes(leafNodes, RBBINode.leafChar);

           // Collect all leaf nodes that can start matches for rules
           // with inbound chaining enabled, which is the union of the
           // firstPosition sets from each of the rule root nodes.

           List<RBBINode> ruleRootNodes = new ArrayList<RBBINode>();
           addRuleRootNodes(ruleRootNodes, tree);

           Set<RBBINode> matchStartNodes = new HashSet<RBBINode>();
           for (RBBINode node: ruleRootNodes) {
               if (node.fChainIn) {
                   matchStartNodes.addAll(node.fFirstPosSet);
               }
           }

           // Iterate over all leaf nodes,
           //
           for (RBBINode tNode : leafNodes) {
               RBBINode endNode = null;

               // Identify leaf nodes that correspond to overall rule match positions.
               //   These include an endMarkerNode in their followPos sets.
               for (RBBINode endMarkerNode : endMarkerNodes) {
                   if (tNode.fFollowPos.contains(endMarkerNode)) {
                       endNode = tNode;
                       break;
                   }
               }
               if (endNode == null) {
                   // node wasn't an end node.  Try again with the next.
                   continue;
               }

               // We've got a node that can end a match.

               // Line Break Specific hack:  If this node's val correspond to the $CM char class,
               //                            don't chain from it.
               // TODO:  Add rule syntax for this behavior, get specifics out of here and
               //        into the rule file.
               if (fRB.fLBCMNoChain) {
                   int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal);
                   if (c != -1) {
                       // c == -1 occurs with sets containing only the {eof} marker string.
                       int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
                       if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) {
                           continue;
                       }
                   }
               }


               // Now iterate over the nodes that can start a match, looking for ones
               //   with the same char class as our ending node.
               for (RBBINode startNode : matchStartNodes) {
                   if (startNode.fType != RBBINode.leafChar) {
                       continue;
                   }

                   if (endNode.fVal == startNode.fVal) {
                       // The end val (character class) of one possible match is the
                       //   same as the start of another.

                       // Add all nodes from the followPos of the start node to the
                       //  followPos set of the end node, which will have the effect of
                       //  letting matches transition from a match state at endNode
                       //  to the second char of a match starting with startNode.
                       endNode.fFollowPos.addAll(startNode.fFollowPos);
                   }
               }
           }
       }
 
Example 8
Source File: UnicodeSet.java    From j2objc with Apache License 2.0 4 votes vote down vote up
@Override
public boolean contains(int ch) {
    return UCharacter.getIntPropertyValue(ch, prop) == value;
}
 
Example 9
Source File: BasicTest.java    From j2objc with Apache License 2.0 4 votes vote down vote up
@Test
public void TestQuickCheckPerCP() {
    int c, lead, trail;
    String s, nfd;
    int lccc1, lccc2, tccc1, tccc2;
    int qc1, qc2;

    if(
        UCharacter.getIntPropertyMaxValue(UProperty.NFD_QUICK_CHECK)!=1 || // YES
        UCharacter.getIntPropertyMaxValue(UProperty.NFKD_QUICK_CHECK)!=1 ||
        UCharacter.getIntPropertyMaxValue(UProperty.NFC_QUICK_CHECK)!=2 || // MAYBE
        UCharacter.getIntPropertyMaxValue(UProperty.NFKC_QUICK_CHECK)!=2 ||
        UCharacter.getIntPropertyMaxValue(UProperty.LEAD_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS) ||
        UCharacter.getIntPropertyMaxValue(UProperty.TRAIL_CANONICAL_COMBINING_CLASS)!=UCharacter.getIntPropertyMaxValue(UProperty.CANONICAL_COMBINING_CLASS)
    ) {
        errln("wrong result from one of the u_getIntPropertyMaxValue(UCHAR_NF*_QUICK_CHECK) or UCHAR_*_CANONICAL_COMBINING_CLASS");
    }

    /*
     * compare the quick check property values for some code points
     * to the quick check results for checking same-code point strings
     */
    c=0;
    while(c<0x110000) {
        s=UTF16.valueOf(c);

        qc1=UCharacter.getIntPropertyValue(c, UProperty.NFC_QUICK_CHECK);
        qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFC));
        if(qc1!=qc2) {
            errln("getIntPropertyValue(NFC)="+qc1+" != "+qc2+"=quickCheck(NFC) for U+"+Integer.toHexString(c));
        }

        qc1=UCharacter.getIntPropertyValue(c, UProperty.NFD_QUICK_CHECK);
        qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFD));
        if(qc1!=qc2) {
            errln("getIntPropertyValue(NFD)="+qc1+" != "+qc2+"=quickCheck(NFD) for U+"+Integer.toHexString(c));
        }

        qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKC_QUICK_CHECK);
        qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKC));
        if(qc1!=qc2) {
            errln("getIntPropertyValue(NFKC)="+qc1+" != "+qc2+"=quickCheck(NFKC) for U+"+Integer.toHexString(c));
        }

        qc1=UCharacter.getIntPropertyValue(c, UProperty.NFKD_QUICK_CHECK);
        qc2=qcToInt(Normalizer.quickCheck(s, Normalizer.NFKD));
        if(qc1!=qc2) {
            errln("getIntPropertyValue(NFKD)="+qc1+" != "+qc2+"=quickCheck(NFKD) for U+"+Integer.toHexString(c));
        }

        nfd=Normalizer.normalize(s, Normalizer.NFD);
        lead=UTF16.charAt(nfd, 0);
        trail=UTF16.charAt(nfd, nfd.length()-1);

        lccc1=UCharacter.getIntPropertyValue(c, UProperty.LEAD_CANONICAL_COMBINING_CLASS);
        lccc2=UCharacter.getCombiningClass(lead);
        tccc1=UCharacter.getIntPropertyValue(c, UProperty.TRAIL_CANONICAL_COMBINING_CLASS);
        tccc2=UCharacter.getCombiningClass(trail);

        if(lccc1!=lccc2) {
            errln("getIntPropertyValue(lccc)="+lccc1+" != "+lccc2+"=getCombiningClass(lead) for U+"+Integer.toHexString(c));
        }
        if(tccc1!=tccc2) {
            errln("getIntPropertyValue(tccc)="+tccc1+" != "+tccc2+"=getCombiningClass(trail) for U+"+Integer.toHexString(c));
        }

        /* skip some code points */
        c=(20*c)/19+1;
    }
}
 
Example 10
Source File: UCharacterTest.java    From j2objc with Apache License 2.0 4 votes vote down vote up
/**
 * Test mirroring
 */
@Test
public void TestMirror()
{
    if (!(UCharacter.isMirrored(0x28) && UCharacter.isMirrored(0xbb) &&
          UCharacter.isMirrored(0x2045) && UCharacter.isMirrored(0x232a)
          && !UCharacter.isMirrored(0x27) &&
          !UCharacter.isMirrored(0x61) && !UCharacter.isMirrored(0x284)
          && !UCharacter.isMirrored(0x3400))) {
        errln("isMirrored() does not work correctly");
    }

    if (!(UCharacter.getMirror(0x3c) == 0x3e &&
          UCharacter.getMirror(0x5d) == 0x5b &&
          UCharacter.getMirror(0x208d) == 0x208e &&
          UCharacter.getMirror(0x3017) == 0x3016 &&

          UCharacter.getMirror(0xbb) == 0xab &&
          UCharacter.getMirror(0x2215) == 0x29F5 &&
          UCharacter.getMirror(0x29F5) == 0x2215 && /* large delta between the code points */

          UCharacter.getMirror(0x2e) == 0x2e &&
          UCharacter.getMirror(0x6f3) == 0x6f3 &&
          UCharacter.getMirror(0x301c) == 0x301c &&
          UCharacter.getMirror(0xa4ab) == 0xa4ab &&

          /* see Unicode Corrigendum #6 at http://www.unicode.org/versions/corrigendum6.html */
          UCharacter.getMirror(0x2018) == 0x2018 &&
          UCharacter.getMirror(0x201b) == 0x201b &&
          UCharacter.getMirror(0x301d) == 0x301d)) {
        errln("getMirror() does not work correctly");
    }

    /* verify that Bidi_Mirroring_Glyph roundtrips */
    UnicodeSet set=new UnicodeSet("[:Bidi_Mirrored:]");
    UnicodeSetIterator iter=new UnicodeSetIterator(set);
    int start, end, c2, c3;
    while(iter.nextRange() && (start=iter.codepoint)>=0) {
        end=iter.codepointEnd;
        do {
            c2=UCharacter.getMirror(start);
            c3=UCharacter.getMirror(c2);
            if(c3!=start) {
                errln("getMirror() does not roundtrip: U+"+hex(start)+"->U+"+hex(c2)+"->U+"+hex(c3));
            }
            c3=UCharacter.getBidiPairedBracket(start);
            if(UCharacter.getIntPropertyValue(start, UProperty.BIDI_PAIRED_BRACKET_TYPE)==UCharacter.BidiPairedBracketType.NONE) {
                if(c3!=start) {
                    errln("u_getBidiPairedBracket(U+"+hex(start)+") != self for bpt(c)==None");
                }
            } else {
                if(c3!=c2) {
                    errln("u_getBidiPairedBracket(U+"+hex(start)+") != U+"+hex(c2)+" = bmg(c)'");
                }
            }
        } while(++start<=end);
    }

    // verify that Unicode Corrigendum #6 reverts mirrored status of the following
    if (UCharacter.isMirrored(0x2018) ||
        UCharacter.isMirrored(0x201d) ||
        UCharacter.isMirrored(0x201f) ||
        UCharacter.isMirrored(0x301e)) {
        errln("Unicode Corrigendum #6 conflict, one or more of 2018/201d/201f/301e has mirrored property");
    }
}
 
Example 11
Source File: UCharacterTest.java    From j2objc with Apache License 2.0 4 votes vote down vote up
/**
 * Test the property values API.  See JB#2410.
 */
@Test
public void TestPropertyValues() {
    int i, p, min, max;

    /* Min should be 0 for everything. */
    /* Until JB#2478 is fixed, the one exception is UProperty.BLOCK. */
    for (p=UProperty.INT_START; p<UProperty.INT_LIMIT; ++p) {
        min = UCharacter.getIntPropertyMinValue(p);
        if (min != 0) {
            if (p == UProperty.BLOCK) {
                /* This is okay...for now.  See JB#2487.
                   TODO Update this for JB#2487. */
            } else {
                String name;
                name = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG);
                errln("FAIL: UCharacter.getIntPropertyMinValue(" + name + ") = " +
                      min + ", exp. 0");
            }
        }
    }

    if (UCharacter.getIntPropertyMinValue(UProperty.GENERAL_CATEGORY_MASK)
        != 0
        || UCharacter.getIntPropertyMaxValue(
                                           UProperty.GENERAL_CATEGORY_MASK)
           != -1) {
        errln("error: UCharacter.getIntPropertyMin/MaxValue("
              + "UProperty.GENERAL_CATEGORY_MASK) is wrong");
    }

    /* Max should be -1 for invalid properties. */
    max = UCharacter.getIntPropertyMaxValue(-1);
    if (max != -1) {
        errln("FAIL: UCharacter.getIntPropertyMaxValue(-1) = " +
              max + ", exp. -1");
    }

    /* Script should return 0 for an invalid code point. If the API
       throws an exception then that's fine too. */
    for (i=0; i<2; ++i) {
        try {
            int script = 0;
            String desc = null;
            switch (i) {
            case 0:
                script = UScript.getScript(-1);
                desc = "UScript.getScript(-1)";
                break;
            case 1:
                script = UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT);
                desc = "UCharacter.getIntPropertyValue(-1, UProperty.SCRIPT)";
                break;
            }
            if (script != 0) {
                errln("FAIL: " + desc + " = " + script + ", exp. 0");
            }
        } catch (IllegalArgumentException e) {}
    }
}
 
Example 12
Source File: WordIterator.java    From android_9.0.0_r45 with Apache License 2.0 3 votes vote down vote up
/**
 * Indicates if the codepoint is a mid-word-only punctuation.
 *
 * At the moment, this is locale-independent, and includes all the characters in
 * the MidLetter, MidNumLet, and Single_Quote class of Unicode word breaking algorithm (see
 * UAX #29 "Unicode Text Segmentation" at http://unicode.org/reports/tr29/). These are all the
 * characters that according to the rules WB6 and WB7 of UAX #29 prevent word breaks if they are
 * in the middle of a word, but they become word breaks if they happen at the end of a word
 * (accroding to rule WB999 that breaks word in any place that is not prohibited otherwise).
 *
 * @param locale the locale to consider the codepoint in. Presently ignored.
 * @param codePoint the codepoint to check.
 * @return True if the codepoint is a mid-word punctuation.
 */
public static boolean isMidWordPunctuation(Locale locale, int codePoint) {
    final int wb = UCharacter.getIntPropertyValue(codePoint, UProperty.WORD_BREAK);
    return (wb == UCharacter.WordBreak.MIDLETTER
            || wb == UCharacter.WordBreak.MIDNUMLET
            || wb == UCharacter.WordBreak.SINGLE_QUOTE);
}