Java Code Examples for com.ibm.icu.lang.UCharacter#getIntPropertyValue()

The following examples show how to use com.ibm.icu.lang.UCharacter#getIntPropertyValue() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PrintUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Check codePoint is FullWidth or not according to Unicode Standard version 12.0.0.
 * See http://unicode.org/reports/tr11/
 */
public static boolean isFullWidth(int codePoint) {
	int value = UCharacter.getIntPropertyValue(codePoint, UProperty.EAST_ASIAN_WIDTH);
	switch (value) {
		case UCharacter.EastAsianWidth.NEUTRAL:
			return false;
		case UCharacter.EastAsianWidth.AMBIGUOUS:
			return false;
		case UCharacter.EastAsianWidth.HALFWIDTH:
			return false;
		case UCharacter.EastAsianWidth.FULLWIDTH:
			return true;
		case UCharacter.EastAsianWidth.NARROW:
			return false;
		case UCharacter.EastAsianWidth.WIDE:
			return true;
		default:
			throw new RuntimeException("unknown UProperty.EAST_ASIAN_WIDTH: " + value);
	}
}
 
Example 2
Source File: KhmerBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.KHMER);
    }
    return false;
}
 
Example 3
Source File: BurmeseBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
@Override
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.MYANMAR);
    }
    return false;
}
 
Example 4
Source File: UnhandledBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public synchronized void handleChar(int c, int breakType) {
    if (breakType >= 0 && breakType < fHandled.length && c != DONE32) {
        if (!fHandled[breakType].contains(c)) {
            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
            fHandled[breakType].applyIntPropertyValue(UProperty.SCRIPT, script);
        }
    }
}
 
Example 5
Source File: LaoBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.LAO);
    }
    return false;
}
 
Example 6
Source File: ThaiBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.THAI);
    }
    return false;
}
 
Example 7
Source File: CharacterPropertiesImpl.java    From trekarta with GNU General Public License v3.0 5 votes vote down vote up
private static UnicodeSet getIntPropInclusions(int prop) {
    assert(UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT);
    int inclIndex = UCharacterProperty.SRC_COUNT + prop - UProperty.INT_START;
    if (inclusions[inclIndex] != null) {
        return inclusions[inclIndex];
    }
    int src = UCharacterProperty.INSTANCE.getSource(prop);
    UnicodeSet incl = getInclusionsForSource(src);

    UnicodeSet intPropIncl = new UnicodeSet(0, 0);
    int numRanges = incl.getRangeCount();
    int prevValue = 0;
    for (int i = 0; i < numRanges; ++i) {
        int rangeEnd = incl.getRangeEnd(i);
        for (int c = incl.getRangeStart(i); c <= rangeEnd; ++c) {
            // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
            int value = UCharacter.getIntPropertyValue(c, prop);
            if (value != prevValue) {
                intPropIncl.add(c);
                prevValue = value;
            }
        }
    }

    // Compact for caching.
    return inclusions[inclIndex] = intPropIncl.compact();
}
 
Example 8
Source File: RBBITableBuilder.java    From fitnotifications with Apache License 2.0 4 votes vote down vote up
void calcChainedFollowPos(RBBINode tree) {

           List<RBBINode> endMarkerNodes = new ArrayList<RBBINode>();
           List<RBBINode> leafNodes      = new ArrayList<RBBINode>();

            // get a list of all endmarker nodes.
           tree.findNodes(endMarkerNodes, RBBINode.endMark);

           // get a list all leaf nodes
           tree.findNodes(leafNodes, RBBINode.leafChar);

           // Collect all leaf nodes that can start matches for rules
           // with inbound chaining enabled, which is the union of the
           // firstPosition sets from each of the rule root nodes.

           List<RBBINode> ruleRootNodes = new ArrayList<RBBINode>();
           addRuleRootNodes(ruleRootNodes, tree);

           Set<RBBINode> matchStartNodes = new HashSet<RBBINode>();
           for (RBBINode node: ruleRootNodes) {
               if (node.fChainIn) {
                   matchStartNodes.addAll(node.fFirstPosSet);
               }
           }

           // Iterate over all leaf nodes,
           //
           for (RBBINode tNode : leafNodes) {
               RBBINode endNode = null;

               // Identify leaf nodes that correspond to overall rule match positions.
               //   These include an endMarkerNode in their followPos sets.
               for (RBBINode endMarkerNode : endMarkerNodes) {
                   if (tNode.fFollowPos.contains(endMarkerNode)) {
                       endNode = tNode;
                       break;
                   }
               }
               if (endNode == null) {
                   // node wasn't an end node.  Try again with the next.
                   continue;
               }

               // We've got a node that can end a match.

               // Line Break Specific hack:  If this node's val correspond to the $CM char class,
               //                            don't chain from it.
               // TODO:  Add rule syntax for this behavior, get specifics out of here and
               //        into the rule file.
               if (fRB.fLBCMNoChain) {
                   int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal);
                   if (c != -1) {
                       // c == -1 occurs with sets containing only the {eof} marker string.
                       int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
                       if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) {
                           continue;
                       }
                   }
               }


               // Now iterate over the nodes that can start a match, looking for ones
               //   with the same char class as our ending node.
               for (RBBINode startNode : matchStartNodes) {
                   if (startNode.fType != RBBINode.leafChar) {
                       continue;
                   }

                   if (endNode.fVal == startNode.fVal) {
                       // The end val (character class) of one possible match is the
                       //   same as the start of another.

                       // Add all nodes from the followPos of the start node to the
                       //  followPos set of the end node, which will have the effect of
                       //  letting matches transition from a match state at endNode
                       //  to the second char of a match starting with startNode.
                       endNode.fFollowPos.addAll(startNode.fFollowPos);
                   }
               }
           }
       }
 
Example 9
Source File: UnicodeSet.java    From fitnotifications with Apache License 2.0 4 votes vote down vote up
@Override
public boolean contains(int ch) {
    return UCharacter.getIntPropertyValue(ch, prop) == value;
}
 
Example 10
Source File: RuleBasedBreakIterator.java    From fitnotifications with Apache License 2.0 4 votes vote down vote up
private LanguageBreakEngine getLanguageBreakEngine(int c) {

        // We have a dictionary character.
        // Does an already instantiated break engine handle it?
        for (LanguageBreakEngine candidate : fBreakEngines.values()) {
            if (candidate.handles(c, fBreakType)) {
                return candidate;
            }
        }

        // if we don't have an existing engine, build one.
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        if (script == UScript.KATAKANA || script == UScript.HIRAGANA) {
            // Katakana, Hiragana and Han are handled by the same dictionary engine.
            // Fold them together for mapping from script -> engine.
            script = UScript.HAN;
        }

        LanguageBreakEngine eng = fBreakEngines.get(script);
        /*
        if (eng != null && !eng.handles(c, fBreakType)) {
            fUnhandledBreakEngine.handleChar(c, getBreakType());
            eng = fUnhandledBreakEngine;
        } else  */  {
            try {
                switch (script) {
                case UScript.THAI:
                    eng = new ThaiBreakEngine();
                    break;
                case UScript.LAO:
                    eng = new LaoBreakEngine();
                    break;
                case UScript.MYANMAR:
                    eng = new BurmeseBreakEngine();
                    break;
                case UScript.KHMER:
                    eng = new KhmerBreakEngine();
                    break;
                case UScript.HAN:
                    if (getBreakType() == KIND_WORD) {
                        eng = new CjkBreakEngine(false);
                    }
                    else {
                        fUnhandledBreakEngine.handleChar(c, getBreakType());
                        eng = fUnhandledBreakEngine;
                    }
                    break;
                case UScript.HANGUL:
                    if (getBreakType() == KIND_WORD) {
                        eng = new CjkBreakEngine(true);
                    } else {
                        fUnhandledBreakEngine.handleChar(c, getBreakType());
                        eng = fUnhandledBreakEngine;
                    }
                    break;
                default:
                    fUnhandledBreakEngine.handleChar(c, getBreakType());
                    eng = fUnhandledBreakEngine;
                    break;
                }
            } catch (IOException e) {
                eng = null;
            }
        }

        if (eng != null && eng != fUnhandledBreakEngine) {
            LanguageBreakEngine existingEngine = fBreakEngines.putIfAbsent(script, eng);
            if (existingEngine != null) {
                // There was a race & another thread was first to register an engine for this script.
                // Use theirs and discard the one we just created.
                eng = existingEngine;
            }
            // assert eng.handles(c, fBreakType);
        }
        return eng;
    }
 
Example 11
Source File: UnicodeSet.java    From trekarta with GNU General Public License v3.0 4 votes vote down vote up
@Override
public boolean contains(int ch) {
    return UCharacter.getIntPropertyValue(ch, prop) == value;
}
 
Example 12
Source File: UnicodeData.java    From es6draft with MIT License 4 votes vote down vote up
public boolean has(int codePoint, int value) {
    return UCharacter.getIntPropertyValue(codePoint, propertyId) == value;
}