com.ibm.icu.lang.UProperty Java Examples

The following examples show how to use com.ibm.icu.lang.UProperty. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UnicodeDataTest.java    From es6draft with MIT License 6 votes vote down vote up
@SuppressWarnings("deprecation")
@Test
public void testAllICUBinaryProperties() {
    for (int p = UProperty.BINARY_START; p < UProperty.BINARY_LIMIT; ++p) {
        String shortName = UCharacter.getPropertyName(p, UProperty.NameChoice.SHORT);
        if (shortName != null) {
            // Does not throw.
            isBinaryProperty(shortName);
        }
        String longName = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG);
        if (longName != null) {
            // Does not throw.
            isBinaryProperty(longName);
        }
    }
}
 
Example #2
Source File: ICUTokenizerFactory.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Creates a new ICUTokenizerFactory */
public ICUTokenizerFactory(Map<String,String> args) {
  super(args);
  tailored = new HashMap<>();
  String rulefilesArg = get(args, RULEFILES);
  if (rulefilesArg != null) {
    List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg);
    for (String scriptAndResourcePath : scriptAndResourcePaths) {
      int colonPos = scriptAndResourcePath.indexOf(":");
      String scriptCode = scriptAndResourcePath.substring(0, colonPos).trim();
      String resourcePath = scriptAndResourcePath.substring(colonPos+1).trim();
      tailored.put(UCharacter.getPropertyValueEnum(UProperty.SCRIPT, scriptCode), resourcePath);
    }
  }
  cjkAsWords = getBoolean(args, "cjkAsWords", true);
  myanmarAsWords = getBoolean(args, "myanmarAsWords", true);
  if (!args.isEmpty()) {
    throw new IllegalArgumentException("Unknown parameters: " + args);
  }
}
 
Example #3
Source File: UBiDiProps.java    From fitnotifications with Apache License 2.0 6 votes vote down vote up
public final int getMaxValue(int which) {
    int max;

    max=indexes[IX_MAX_VALUES];
    switch(which) {
    case UProperty.BIDI_CLASS:
        return (max&CLASS_MASK);
    case UProperty.JOINING_GROUP:
        return (max&MAX_JG_MASK)>>MAX_JG_SHIFT;
    case UProperty.JOINING_TYPE:
        return (max&JT_MASK)>>JT_SHIFT;
    case UProperty.BIDI_PAIRED_BRACKET_TYPE:
        return (max&BPT_MASK)>>BPT_SHIFT;
    default:
        return -1; /* undefined */
    }
}
 
Example #4
Source File: CollationRuleParser.java    From fitnotifications with Apache License 2.0 6 votes vote down vote up
/**
 * Gets a script or reorder code from its string representation.
 * @return the script/reorder code, or
 * -1 if not recognized
 */
public static int getReorderCode(String word) {
    for(int i = 0; i < gSpecialReorderCodes.length; ++i) {
        if(word.equalsIgnoreCase(gSpecialReorderCodes[i])) {
            return Collator.ReorderCodes.FIRST + i;
        }
    }
    try {
        int script = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, word);
        if(script >= 0) {
            return script;
        }
    } catch (IllegalIcuArgumentException e) {
        // fall through
    }
    if(word.equalsIgnoreCase("others")) {
        return Collator.ReorderCodes.OTHERS;  // same as Zzzz = USCRIPT_UNKNOWN 
    }
    return -1;
}
 
Example #5
Source File: UBiDiProps.java    From trekarta with GNU General Public License v3.0 6 votes vote down vote up
public final int getMaxValue(int which) {
    int max;

    max=indexes[IX_MAX_VALUES];
    switch(which) {
    case UProperty.BIDI_CLASS:
        return (max&CLASS_MASK);
    case UProperty.JOINING_GROUP:
        return (max&MAX_JG_MASK)>>MAX_JG_SHIFT;
    case UProperty.JOINING_TYPE:
        return (max&JT_MASK)>>JT_SHIFT;
    case UProperty.BIDI_PAIRED_BRACKET_TYPE:
        return (max&BPT_MASK)>>BPT_SHIFT;
    default:
        return -1; /* undefined */
    }
}
 
Example #6
Source File: PrintUtils.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Check codePoint is FullWidth or not according to Unicode Standard version 12.0.0.
 * See http://unicode.org/reports/tr11/
 */
public static boolean isFullWidth(int codePoint) {
	int value = UCharacter.getIntPropertyValue(codePoint, UProperty.EAST_ASIAN_WIDTH);
	switch (value) {
		case UCharacter.EastAsianWidth.NEUTRAL:
			return false;
		case UCharacter.EastAsianWidth.AMBIGUOUS:
			return false;
		case UCharacter.EastAsianWidth.HALFWIDTH:
			return false;
		case UCharacter.EastAsianWidth.FULLWIDTH:
			return true;
		case UCharacter.EastAsianWidth.NARROW:
			return false;
		case UCharacter.EastAsianWidth.WIDE:
			return true;
		default:
			throw new RuntimeException("unknown UProperty.EAST_ASIAN_WIDTH: " + value);
	}
}
 
Example #7
Source File: UnicodeData.java    From es6draft with MIT License 6 votes vote down vote up
public boolean isValue(String valueAlias) {
    // Don't allow loose matching.
    try {
        int value = UCharacter.getPropertyValueEnum(propertyId, valueAlias);
        String shortName = UCharacter.getPropertyValueName(propertyId, value, UProperty.NameChoice.SHORT);
        if (shortName != null && shortName.equals(valueAlias)) {
            return true;
        }
        for (int i = 0;; ++i) {
            String longName = UCharacter.getPropertyValueName(propertyId, value, UProperty.NameChoice.LONG + i);
            if (longName != null && longName.equals(valueAlias)) {
                return true;
            }
        }
    } catch (IllegalArgumentException e) {
        return false;
    }
}
 
Example #8
Source File: UnicodeData.java    From es6draft with MIT License 5 votes vote down vote up
static Property from(String name) {
    // Don't allow loose matching.
    int property;
    CHECK: try {
        property = UCharacter.getPropertyEnum(name);
        // Filter out synthetic names.
        if (property == UProperty.GENERAL_CATEGORY_MASK) {
            return null;
        }
        String shortName = UCharacter.getPropertyName(property, UProperty.NameChoice.SHORT);
        if (shortName != null && shortName.equals(name)) {
            break CHECK;
        }
        for (int i = 0;; ++i) {
            String longName = UCharacter.getPropertyName(property, UProperty.NameChoice.LONG + i);
            if (longName != null && longName.equals(name)) {
                break CHECK;
            }
        }
    } catch (IllegalArgumentException e) {
        return null;
    }
    if (property >= UProperty.BINARY_START && property < BINARY_PROPERTY_LIMIT) {
        return BinaryProperty.forId(property);
    }
    return EnumProperty.forId(property);
}
 
Example #9
Source File: UnicodeData.java    From es6draft with MIT License 5 votes vote down vote up
static BinaryProperty forId(int propertyId) {
    if (propertyId >= UProperty.BINARY_START && propertyId < BINARY_PROPERTY_LIMIT) {
        for (BinaryProperty binary : BinaryProperty.values()) {
            if (binary.propertyId == propertyId) {
                return binary;
            }
        }
    }
    return null;
}
 
Example #10
Source File: UCharacterProperty.java    From trekarta with GNU General Public License v3.0 5 votes vote down vote up
public int getIntPropertyMaxValue(int which) {
    if(which<UProperty.INT_START) {
        if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
            return 1;  // maximum TRUE for all binary properties
        }
    } else if(which<UProperty.INT_LIMIT) {
        return intProps[which-UProperty.INT_START].getMaxValue(which);
    }
    return -1; // undefined
}
 
Example #11
Source File: UCharacterProperty.java    From trekarta with GNU General Public License v3.0 5 votes vote down vote up
public int getIntPropertyValue(int c, int which) {
    if(which<UProperty.INT_START) {
        if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
            return binProps[which].contains(c) ? 1 : 0;
        }
    } else if(which<UProperty.INT_LIMIT) {
        return intProps[which-UProperty.INT_START].getValue(c);
    } else if (which == UProperty.GENERAL_CATEGORY_MASK) {
        return getMask(getType(c));
    }
    return 0; // undefined
}
 
Example #12
Source File: UCharacterProperty.java    From trekarta with GNU General Public License v3.0 5 votes vote down vote up
public boolean hasBinaryProperty(int c, int which) {
     if(which<UProperty.BINARY_START || UProperty.BINARY_LIMIT<=which) {
        // not a known binary property
        return false;
    } else {
        return binProps[which].contains(c);
    }
}
 
Example #13
Source File: UPropertyAliases.java    From trekarta with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns a value enum given a property enum and one of its value names. Does not throw.
 * @return value enum, or UProperty.UNDEFINED if not defined for that property
 */
public int getPropertyValueEnumNoThrow(int property, CharSequence alias) {
    int valueMapIndex=findProperty(property);
    if(valueMapIndex==0) {
        return UProperty.UNDEFINED;
    }
    valueMapIndex=valueMaps[valueMapIndex+1];
    if(valueMapIndex==0) {
        return UProperty.UNDEFINED;
    }
    // valueMapIndex is the start of the property's valueMap,
    // where the first word is the BytesTrie offset.
    return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
}
 
Example #14
Source File: UnicodeData.java    From es6draft with MIT License 5 votes vote down vote up
static EnumProperty forId(int propertyId) {
    if ((propertyId >= UProperty.INT_START && propertyId < INT_PROPERTY_LIMIT)
            || propertyId == UProperty.GENERAL_CATEGORY_MASK || propertyId == UProperty.SCRIPT_EXTENSIONS) {
        if (propertyId == UProperty.GENERAL_CATEGORY) {
            return General_Category;
        }
        for (EnumProperty e : EnumProperty.values()) {
            if (e.propertyId == propertyId) {
                return e;
            }
        }
    }
    return null;
}
 
Example #15
Source File: UnicodeDataTest.java    From es6draft with MIT License 5 votes vote down vote up
@SuppressWarnings("deprecation")
@Test
public void testLimits() {
    // integer valued properties
    for (int p = UProperty.INT_START; p < UProperty.INT_LIMIT; ++p) {
        int min = UCharacter.getIntPropertyMinValue(p);
        int max = UCharacter.getIntPropertyMaxValue(p);

        assertTrue(String.format("min=%d", min), min >= 0);
        assertTrue(String.format("min=%d, max=%d", min, max), min <= max);
        assertTrue(String.format("max=%d", max), max < 512); // BINARY_MASK in UEncoding
    }
}
 
Example #16
Source File: UPropertyAliases.java    From trekarta with GNU General Public License v3.0 5 votes vote down vote up
private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) {
    BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset);
    if(containsName(trie, alias)) {
        return trie.getValue();
    } else {
        return UProperty.UNDEFINED;
    }
}
 
Example #17
Source File: KhmerBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.KHMER);
    }
    return false;
}
 
Example #18
Source File: CharacterPropertiesImpl.java    From trekarta with GNU General Public License v3.0 5 votes vote down vote up
private static UnicodeSet getIntPropInclusions(int prop) {
    assert(UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT);
    int inclIndex = UCharacterProperty.SRC_COUNT + prop - UProperty.INT_START;
    if (inclusions[inclIndex] != null) {
        return inclusions[inclIndex];
    }
    int src = UCharacterProperty.INSTANCE.getSource(prop);
    UnicodeSet incl = getInclusionsForSource(src);

    UnicodeSet intPropIncl = new UnicodeSet(0, 0);
    int numRanges = incl.getRangeCount();
    int prevValue = 0;
    for (int i = 0; i < numRanges; ++i) {
        int rangeEnd = incl.getRangeEnd(i);
        for (int c = incl.getRangeStart(i); c <= rangeEnd; ++c) {
            // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
            int value = UCharacter.getIntPropertyValue(c, prop);
            if (value != prevValue) {
                intPropIncl.add(c);
                prevValue = value;
            }
        }
    }

    // Compact for caching.
    return inclusions[inclIndex] = intPropIncl.compact();
}
 
Example #19
Source File: CharacterPropertiesImpl.java    From trekarta with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Returns a mutable UnicodeSet -- do not modify!
 */
public static synchronized UnicodeSet getInclusionsForProperty(int prop) {
    if (UProperty.INT_START <= prop && prop < UProperty.INT_LIMIT) {
        return getIntPropInclusions(prop);
    } else {
        int src = UCharacterProperty.INSTANCE.getSource(prop);
        return getInclusionsForSource(src);
    }
}
 
Example #20
Source File: BurmeseBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
@Override
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.MYANMAR);
    }
    return false;
}
 
Example #21
Source File: UCharacterProperty.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public int getIntPropertyMaxValue(int which) {
    if(which<UProperty.INT_START) {
        if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
            return 1;  // maximum TRUE for all binary properties
        }
    } else if(which<UProperty.INT_LIMIT) {
        return intProps[which-UProperty.INT_START].getMaxValue(which);
    }
    return -1; // undefined
}
 
Example #22
Source File: ThaiBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.THAI);
    }
    return false;
}
 
Example #23
Source File: SpoofChecker.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
private void addScriptChars(ULocale locale, UnicodeSet allowedChars) {
    int scripts[] = UScript.getCode(locale);
    if (scripts != null) {
        UnicodeSet tmpSet = new UnicodeSet();
        for (int i = 0; i < scripts.length; i++) {
            tmpSet.applyIntPropertyValue(UProperty.SCRIPT, scripts[i]);
            allowedChars.addAll(tmpSet);
        }
    }
    // else it's an unknown script.
    // Maybe they asked for the script of "zxx", which refers to no linguistic content.
    // Maybe they asked for the script of a newer locale that we don't know in the older version of ICU.
}
 
Example #24
Source File: SpoofChecker.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
/**
 * Limit characters that are acceptable in identifiers being checked to those normally used with the languages
 * associated with the specified locales. Any previously specified list of locales is replaced by the new
 * settings.
 *
 * A set of languages is determined from the locale(s), and from those a set of acceptable Unicode scripts is
 * determined. Characters from this set of scripts, along with characters from the "common" and "inherited"
 * Unicode Script categories will be permitted.
 *
 * Supplying an empty string removes all restrictions; characters from any script will be allowed.
 *
 * The {@link #CHAR_LIMIT} test is automatically enabled for this SpoofChecker when calling this function with a
 * non-empty list of locales.
 *
 * The Unicode Set of characters that will be allowed is accessible via the {@link #getAllowedChars} function.
 * setAllowedLocales() will <i>replace</i> any previously applied set of allowed characters.
 *
 * Adjustments, such as additions or deletions of certain classes of characters, can be made to the result of
 * {@link #setAllowedChars} by fetching the resulting set with {@link #getAllowedChars}, manipulating it with
 * the Unicode Set API, then resetting the spoof detectors limits with {@link #setAllowedChars}.
 *
 * @param locales
 *            A Set of ULocales, from which the language and associated script are extracted. If the locales Set
 *            is null, no restrictions will be placed on the allowed characters.
 *
 * @return self
 * @stable ICU 4.6
 */
public Builder setAllowedLocales(Set<ULocale> locales) {
    fAllowedCharsSet.clear();

    for (ULocale locale : locales) {
        // Add the script chars for this locale to the accumulating set
        // of allowed chars.
        addScriptChars(locale, fAllowedCharsSet);
    }

    // If our caller provided an empty list of locales, we disable the
    // allowed characters checking
    fAllowedLocales.clear();
    if (locales.size() == 0) {
        fAllowedCharsSet.add(0, 0x10ffff);
        fChecks &= ~CHAR_LIMIT;
        return this;
    }

    // Add all common and inherited characters to the set of allowed
    // chars.
    UnicodeSet tempSet = new UnicodeSet();
    tempSet.applyIntPropertyValue(UProperty.SCRIPT, UScript.COMMON);
    fAllowedCharsSet.addAll(tempSet);
    tempSet.applyIntPropertyValue(UProperty.SCRIPT, UScript.INHERITED);
    fAllowedCharsSet.addAll(tempSet);

    // Store the updated spoof checker state.
    fAllowedLocales.clear();
    fAllowedLocales.addAll(locales);
    fChecks |= CHAR_LIMIT;
    return this;
}
 
Example #25
Source File: UPropertyAliases.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
private int getPropertyOrValueEnum(int bytesTrieOffset, CharSequence alias) {
    BytesTrie trie=new BytesTrie(bytesTries, bytesTrieOffset);
    if(containsName(trie, alias)) {
        return trie.getValue();
    } else {
        return UProperty.UNDEFINED;
    }
}
 
Example #26
Source File: UPropertyAliases.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
/**
 * Returns a value enum given a property enum and one of its value names. Does not throw.
 * @return value enum, or UProperty.UNDEFINED if not defined for that property
 */
public int getPropertyValueEnumNoThrow(int property, CharSequence alias) {
    int valueMapIndex=findProperty(property);
    if(valueMapIndex==0) {
        return UProperty.UNDEFINED;
    }
    valueMapIndex=valueMaps[valueMapIndex+1];
    if(valueMapIndex==0) {
        return UProperty.UNDEFINED;
    }
    // valueMapIndex is the start of the property's valueMap,
    // where the first word is the BytesTrie offset.
    return getPropertyOrValueEnum(valueMaps[valueMapIndex], alias);
}
 
Example #27
Source File: UCharacterProperty.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public boolean hasBinaryProperty(int c, int which) {
     if(which<UProperty.BINARY_START || UProperty.BINARY_LIMIT<=which) {
        // not a known binary property
        return false;
    } else {
        return binProps[which].contains(c);
    }
}
 
Example #28
Source File: LaoBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public boolean handles(int c, int breakType) {
    if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
        return (script == UScript.LAO);
    }
    return false;
}
 
Example #29
Source File: UnhandledBreakEngine.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public synchronized void handleChar(int c, int breakType) {
    if (breakType >= 0 && breakType < fHandled.length && c != DONE32) {
        if (!fHandled[breakType].contains(c)) {
            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
            fHandled[breakType].applyIntPropertyValue(UProperty.SCRIPT, script);
        }
    }
}
 
Example #30
Source File: UCharacterProperty.java    From fitnotifications with Apache License 2.0 5 votes vote down vote up
public int getIntPropertyValue(int c, int which) {
    if(which<UProperty.INT_START) {
        if(UProperty.BINARY_START<=which && which<UProperty.BINARY_LIMIT) {
            return binProps[which].contains(c) ? 1 : 0;
        }
    } else if(which<UProperty.INT_LIMIT) {
        return intProps[which-UProperty.INT_START].getValue(c);
    } else if (which == UProperty.GENERAL_CATEGORY_MASK) {
        return getMask(getType(c));
    }
    return 0; // undefined
}