com.ibm.icu.lang.UCharacter Java Exaples

Source File: UCharacterName.java From trekarta with GNU General Public License v3.0

6 votes

/**
* Gets the character extended type
* @param ch character to be tested
* @return extended type it is associated with
*/
private static int getType(int ch)
{
    if (UCharacterUtility.isNonCharacter(ch)) {
        // not a character we return a invalid category count
        return NON_CHARACTER_;
    }
    int result = UCharacter.getType(ch);
    if (result == UCharacterCategory.SURROGATE) {
        if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
            result = LEAD_SURROGATE_;
        }
        else {
            result = TRAIL_SURROGATE_;
        }
    }
    return result;
}

Source File: SpoofChecker.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Computes the set of numerics for a string, according to UTS 39 section 5.3.
 */
private void getNumerics(String input, UnicodeSet result) {
    result.clear();

    for (int utf16Offset = 0; utf16Offset < input.length();) {
        int codePoint = Character.codePointAt(input, utf16Offset);
        utf16Offset += Character.charCount(codePoint);

        // Store a representative character for each kind of decimal digit
        if (UCharacter.getType(codePoint) == UCharacterCategory.DECIMAL_DIGIT_NUMBER) {
            // Store the zero character as a representative for comparison.
            // Unicode guarantees it is codePoint - value
            result.add(codePoint - UCharacter.getNumericValue(codePoint));
        }
    }
}

Source File: CollationRuleParser.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Gets a script or reorder code from its string representation.
 * @return the script/reorder code, or
 * -1 if not recognized
 */
public static int getReorderCode(String word) {
    for(int i = 0; i < gSpecialReorderCodes.length; ++i) {
        if(word.equalsIgnoreCase(gSpecialReorderCodes[i])) {
            return Collator.ReorderCodes.FIRST + i;
        }
    }
    try {
        int script = UCharacter.getPropertyValueEnum(UProperty.SCRIPT, word);
        if(script >= 0) {
            return script;
        }
    } catch (IllegalIcuArgumentException e) {
        // fall through
    }
    if(word.equalsIgnoreCase("others")) {
        return Collator.ReorderCodes.OTHERS;  // same as Zzzz = USCRIPT_UNKNOWN 
    }
    return -1;
}

Source File: RuleBasedNumberFormat.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Adjust capitalization of formatted result for display context
 */
private String adjustForContext(String result) {
    if (result != null && result.length() > 0 && UCharacter.isLowerCase(result.codePointAt(0))) {
        DisplayContext capitalization = getContext(DisplayContext.Type.CAPITALIZATION);
        if (  capitalization==DisplayContext.CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE ||
              (capitalization == DisplayContext.CAPITALIZATION_FOR_UI_LIST_OR_MENU && capitalizationForListOrMenu) ||
              (capitalization == DisplayContext.CAPITALIZATION_FOR_STANDALONE && capitalizationForStandAlone) ) {
            if (capitalizationBrkIter == null) {
                // should only happen when deserializing, etc.
                capitalizationBrkIter = BreakIterator.getSentenceInstance(locale);
            }
            return UCharacter.toTitleCase(locale, result, capitalizationBrkIter,
                            UCharacter.TITLECASE_NO_LOWERCASE | UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT);
        }
    }
    return result;
}

Source File: Utility.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Parse a Unicode identifier from the given string at the given
 * position.  Return the identifier, or null if there is no
 * identifier.
 * @param str the string to parse
 * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
 * first character to examine.  It must be less than str.length(),
 * and it must not point to a whitespace character.  That is, must
 * have pos[0] < str.length().  On
 * OUTPUT, the position after the last parsed character.
 * @return the Unicode identifier, or null if there is no valid
 * identifier at pos[0].
 */
public static String parseUnicodeIdentifier(String str, int[] pos) {
    // assert(pos[0] < str.length());
    StringBuilder buf = new StringBuilder();
    int p = pos[0];
    while (p < str.length()) {
        int ch = Character.codePointAt(str, p);
        if (buf.length() == 0) {
            if (UCharacter.isUnicodeIdentifierStart(ch)) {
                buf.appendCodePoint(ch);
            } else {
                return null;
            }
        } else {
            if (UCharacter.isUnicodeIdentifierPart(ch)) {
                buf.appendCodePoint(ch);
            } else {
                break;
            }
        }
        p += UTF16.getCharCount(ch);
    }
    pos[0] = p;
    return buf.toString();
}

Source File: BreakIteratorWrapper.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

6 votes

private int calcStatus(int current, int next) {
    if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
        return RuleBasedBreakIterator.WORD_NONE;
    }
    int begin = start + current;
    int end = start + next;
    int codepoint;
    for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
        codepoint = UTF16.charAt(text, 0, end, begin);
        if (UCharacter.isDigit(codepoint)) {
            return RuleBasedBreakIterator.WORD_NUMBER;
        } else if (UCharacter.isLetter(codepoint)) {
            return RuleBasedBreakIterator.WORD_LETTER;
        }
    }
    return RuleBasedBreakIterator.WORD_NONE;
}

Source File: UCharacterName.java From fitnotifications with Apache License 2.0

6 votes

/**
* Retrieve the name of a Unicode code point.
* Depending on <code>choice</code>, the character name written into the
* buffer is the "modern" name or the name that was defined in Unicode
* version 1.0.
* The name contains only "invariant" characters
* like A-Z, 0-9, space, and '-'.
*
* @param ch the code point for which to get the name.
* @param choice Selector for which name to get.
* @return if code point is above 0x1fff, null is returned
*/
public String getName(int ch, int choice)
{
    if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
        choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
        return null;
    }

    String result = null;

    result = getAlgName(ch, choice);

    // getting normal character name
    if (result == null || result.length() == 0) {
        if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
            result = getExtendedName(ch);
        } else {
            result = getGroupName(ch, choice);
        }
    }

    return result;
}

Source File: Locale.java From tcl-regex-java with Apache License 2.0

6 votes

static int element(String what) throws RegexException {
    // this is a single character or the name of a character.
    // Surrogate pairs? we can't deal yet. This function
    // returns 'int' but no one upstairs is home yet.

    if (what.length() == 1) {
        return what.charAt(0);
    }

    if (CNAME.containsKey(what)) {
        return CNAME.get(what);
    }

    int uc = UCharacter.getCharFromName(what); // what if someone names a non-BMP char?
    if (uc != -1) {
        if (uc > 0xffff) {
            throw new RegexException(String.format(
                    "Limitation: cannot handle equivalence outside of the BMP: %s not possible.",
                    what));
        }
        return uc;
    }

    return -1;
}

Source File: RBBIRuleScanner.java From fitnotifications with Apache License 2.0

6 votes

static String stripRules(String rules) {
    StringBuilder strippedRules = new StringBuilder();
    int rulesLength = rules.length();
    for (int idx = 0; idx < rulesLength;) {
        char ch = rules.charAt(idx++);
        if (ch == '#') {
            while (idx < rulesLength
                    && ch != '\r' && ch != '\n' && ch != chNEL) {
                ch = rules.charAt(idx++);
            }
        }
        if (!UCharacter.isISOControl(ch)) {
            strippedRules.append(ch);
        }
    }
    return strippedRules.toString();
}

Source File: UCharacterName.java From fitnotifications with Apache License 2.0

6 votes

/**
* Gets the character extended type
* @param ch character to be tested
* @return extended type it is associated with
*/
private static int getType(int ch)
{
    if (UCharacterUtility.isNonCharacter(ch)) {
        // not a character we return a invalid category count
        return NON_CHARACTER_;
    }
    int result = UCharacter.getType(ch);
    if (result == UCharacterCategory.SURROGATE) {
        if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
            result = LEAD_SURROGATE_;
        }
        else {
            result = TRAIL_SURROGATE_;
        }
    }
    return result;
}

Source File: AlphabeticIndex.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Return the string with interspersed CGJs. Input must have more than 2 codepoints.
 * <p>This is used to test whether contractions sort differently from their components.
 */
private String separated(String item) {
    StringBuilder result = new StringBuilder();
    // add a CGJ except within surrogates
    char last = item.charAt(0);
    result.append(last);
    for (int i = 1; i < item.length(); ++i) {
        char ch = item.charAt(i);
        if (!UCharacter.isHighSurrogate(last) || !UCharacter.isLowSurrogate(ch)) {
            result.append(CGJ);
        }
        result.append(ch);
        last = ch;
    }
    return result.toString();
}

Source File: UnicodeDataTest.java From es6draft with MIT License

6 votes

@SuppressWarnings("deprecation")
@Test
public void testAllICUBinaryProperties() {
    for (int p = UProperty.BINARY_START; p < UProperty.BINARY_LIMIT; ++p) {
        String shortName = UCharacter.getPropertyName(p, UProperty.NameChoice.SHORT);
        if (shortName != null) {
            // Does not throw.
            isBinaryProperty(shortName);
        }
        String longName = UCharacter.getPropertyName(p, UProperty.NameChoice.LONG);
        if (longName != null) {
            // Does not throw.
            isBinaryProperty(longName);
        }
    }
}

Source File: TransliteratorParser.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Implement SymbolTable API.  Parse out a symbol reference
 * name.
 */
@Override
public String parseReference(String text, ParsePosition pos, int limit) {
    int start = pos.getIndex();
    int i = start;
    while (i < limit) {
        char c = text.charAt(i);
        if ((i==start && !UCharacter.isUnicodeIdentifierStart(c)) ||
            !UCharacter.isUnicodeIdentifierPart(c)) {
            break;
        }
        ++i;
    }
    if (i == start) { // No valid name chars
        return null;
    }
    pos.setIndex(i);
    return text.substring(start, i);
}

Source File: Trie.java From fitnotifications with Apache License 2.0

6 votes

/**
* Internal trie getter from a code point.
* Could be faster(?) but longer with
*   if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }
* Gets the offset to data which the codepoint points to
* @param ch codepoint
* @return offset to data
*/
protected final int getCodePointOffset(int ch)
{
    // if ((ch >> 16) == 0) slower
    if (ch < 0) {
        return -1;
    } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
        // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
        return getRawOffset(0, (char)ch);
    } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
        // BMP codepoint
        return getBMPOffset((char)ch);
    } else if (ch <= UCharacter.MAX_VALUE) {
        // look at the construction of supplementary characters
        // trail forms the ends of it.
        return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
                                  (char)(ch & SURROGATE_MASK_));
    } else {
        // return -1 if there is an error, in this case we return
        return -1;
    }
}

Source File: UCharacterName.java From trekarta with GNU General Public License v3.0

6 votes

/**
* Retrieve the name of a Unicode code point.
* Depending on <code>choice</code>, the character name written into the
* buffer is the "modern" name or the name that was defined in Unicode
* version 1.0.
* The name contains only "invariant" characters
* like A-Z, 0-9, space, and '-'.
*
* @param ch the code point for which to get the name.
* @param choice Selector for which name to get.
* @return if code point is above 0x1fff, null is returned
*/
public String getName(int ch, int choice)
{
    if (ch < UCharacter.MIN_VALUE || ch > UCharacter.MAX_VALUE ||
        choice > UCharacterNameChoice.CHAR_NAME_CHOICE_COUNT) {
        return null;
    }

    String result = null;

    result = getAlgName(ch, choice);

    // getting normal character name
    if (result == null || result.length() == 0) {
        if (choice == UCharacterNameChoice.EXTENDED_CHAR_NAME) {
            result = getExtendedName(ch);
        } else {
            result = getGroupName(ch, choice);
        }
    }

    return result;
}

Source File: IntTrieBuilder.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Sets a 32 bit data in the table data
 * @param ch codepoint which data is to be set
 * @param value to set
 * @return true if the set is successful, otherwise 
 *              if the table has been compacted return false
 */
public boolean setValue(int ch, int value) 
{
    // valid, uncompacted trie and valid c? 
    if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
        return false;
    }

    int block = getDataBlock(ch);
    if (block < 0) {
        return false;
    }

    m_data_[block + (ch & MASK_)] = value;
    return true;
}

Source File: UTR30DataFileGenerator.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

6 votes

private static void expandSingleRule(StringBuilder builder, String leftHandSide, String rightHandSide) {
    UnicodeSet set = new UnicodeSet(leftHandSide, UnicodeSet.IGNORE_SPACE);
    boolean numericValue = NUMERIC_VALUE_PATTERN.matcher(rightHandSide).matches();
    for (UnicodeSetIterator it = new UnicodeSetIterator(set); it.nextRange(); ) {
        if (it.codepoint != UnicodeSetIterator.IS_STRING) {
            if (numericValue) {
                for (int cp = it.codepoint; cp <= it.codepointEnd; ++cp) {
                    builder.append(String.format(Locale.ROOT, "%04X", cp)).append('>');
                    builder.append(String.format(Locale.ROOT, "%04X", 0x30 + UCharacter.getNumericValue(cp)));
                    builder.append("   # ").append(UCharacter.getName(cp));
                    builder.append("\n");
                }
            } else {
                builder.append(String.format(Locale.ROOT, "%04X", it.codepoint));
                if (it.codepointEnd > it.codepoint) {
                    builder.append("..").append(String.format(Locale.ROOT, "%04X", it.codepointEnd));
                }
                builder.append('>').append(rightHandSide).append("\n");
            }
        } else {
            logger.error("ERROR: String '" + it.getString() + "' found in UnicodeSet");
        }
    }
}

Source File: IntTrieBuilder.java From fitnotifications with Apache License 2.0

6 votes

/**
 * Get a 32 bit data from the table data
 * @param ch  code point for which data is to be retrieved.
 * @param inBlockZero  Output parameter, inBlockZero[0] returns true if the
 *                      char maps into block zero, otherwise false.
 * @return the 32 bit data value.
 */
public int getValue(int ch, boolean [] inBlockZero) 
{
    // valid, uncompacted trie and valid c?
    if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
        if (inBlockZero != null) {
            inBlockZero[0] = true;
        }
        return 0;
    }

    int block = m_index_[ch >> SHIFT_];
    if (inBlockZero != null) {
        inBlockZero[0] = (block == 0);
    }
    return m_data_[Math.abs(block) + (ch & MASK_)];
}

Source File: UCharacterName.java From trekarta with GNU General Public License v3.0

5 votes

/**
* Sets the information for accessing the algorithmic names
* @param rangestart starting code point that lies within this name group
* @param rangeend end code point that lies within this name group
* @param type algorithm type. There's 2 kinds of algorithmic type. First
*        which uses code point as part of its name and the other uses
*        variant postfix strings
* @param variant algorithmic variant
* @return true if values are valid
*/
boolean setInfo(int rangestart, int rangeend, byte type, byte variant)
{
    if (rangestart >= UCharacter.MIN_VALUE && rangestart <= rangeend
        && rangeend <= UCharacter.MAX_VALUE &&
        (type == TYPE_0_ || type == TYPE_1_)) {
        m_rangestart_ = rangestart;
        m_rangeend_ = rangeend;
        m_type_ = type;
        m_variant_ = variant;
        return true;
    }
    return false;
}

Source File: StringFormatSpecifierImpl.java From birt with Eclipse Public License 1.0

5 votes

/**
 * @param val
 *            string to be handled
 * @param option
 *            to upper case or to lower case
 * @return
 */
private String handleCase( String val, char option, ULocale locale )
{
	if ( option == '<' )
		return UCharacter.toLowerCase( locale, val );
	else if ( option == '>' )
		return UCharacter.toUpperCase( locale, val );
	else
		return val;

}

Source File: UnicodeDataTest.java From es6draft with MIT License

5 votes

@SuppressWarnings("deprecation")
@Test
public void testLimits() {
    // integer valued properties
    for (int p = UProperty.INT_START; p < UProperty.INT_LIMIT; ++p) {
        int min = UCharacter.getIntPropertyMinValue(p);
        int max = UCharacter.getIntPropertyMaxValue(p);

        assertTrue(String.format("min=%d", min), min >= 0);
        assertTrue(String.format("min=%d, max=%d", min, max), min <= max);
        assertTrue(String.format("max=%d", max), max < 512); // BINARY_MASK in UEncoding
    }
}

Source File: GenerateUTR30DataFiles.java From lucene-solr with Apache License 2.0

5 votes

private static void expandSingleRule
    (StringBuilder builder, String leftHandSide, String rightHandSide)
    throws IllegalArgumentException {
  UnicodeSet set = new UnicodeSet(leftHandSide, UnicodeSet.IGNORE_SPACE);
  boolean numericValue = NUMERIC_VALUE_PATTERN.matcher(rightHandSide).matches();
  for (UnicodeSetIterator it = new UnicodeSetIterator(set) ; it.nextRange() ; ) {
    if (it.codepoint != UnicodeSetIterator.IS_STRING) {
      if (numericValue) {
        for (int cp = it.codepoint ; cp <= it.codepointEnd ; ++cp) {
          builder.append(String.format(Locale.ROOT, "%04X", cp)).append('>');
          builder.append(String.format(Locale.ROOT, "%04X", 0x30 + UCharacter.getNumericValue(cp)));
          builder.append("   # ").append(UCharacter.getName(cp));
          builder.append("\n");
        }
      } else {
        builder.append(String.format(Locale.ROOT, "%04X", it.codepoint));
        if (it.codepointEnd > it.codepoint) {
          builder.append("..").append(String.format(Locale.ROOT, "%04X", it.codepointEnd));
        }
        builder.append('>').append(rightHandSide).append("\n");
      }
    } else {
      System.err.println("ERROR: String '" + it.getString() + "' found in UnicodeSet");
      System.exit(1);
    }
  }
}

Source File: MCRLanguageDetector.java From mycore with GNU General Public License v3.0

5 votes

private static void buildScores(String text, Map<Integer, AtomicInteger> scores) {
    try {
        char[] chararray = text.toCharArray();
        for (int i = 0; i < text.length(); i++) {
            int code = UScript.getScript(UCharacter.codePointAt(chararray, i));
            increaseScoreFor(scores, code);
        }
    } catch (Exception ignored) {
    }
}

Source File: TrieBuilder.java From fitnotifications with Apache License 2.0

5 votes

/**
 * Checks if the character belongs to a zero block in the trie
 * @param ch codepoint which data is to be retrieved
 * @return true if ch is in the zero block
 */
public boolean isInZeroBlock(int ch) 
{
    // valid, uncompacted trie and valid c?
    if (m_isCompacted_ || ch > UCharacter.MAX_VALUE 
        || ch < UCharacter.MIN_VALUE) {
        return true;
    }

    return m_index_[ch >> SHIFT_] == 0;
}

Source File: Utility.java From fitnotifications with Apache License 2.0

5 votes

/**
 * Parse an unsigned 31-bit integer at the given offset.  Use
 * UCharacter.digit() to parse individual characters into digits.
 * @param text the text to be parsed
 * @param pos INPUT-OUTPUT parameter.  On entry, pos[0] is the
 * offset within text at which to start parsing; it should point
 * to a valid digit.  On exit, pos[0] is the offset after the last
 * parsed character.  If the parse failed, it will be unchanged on
 * exit.  Must be >= 0 on entry.
 * @param radix the radix in which to parse; must be >= 2 and <=
 * 36.
 * @return a non-negative parsed number, or -1 upon parse failure.
 * Parse fails if there are no digits, that is, if pos[0] does not
 * point to a valid digit on entry, or if the number to be parsed
 * does not fit into a 31-bit unsigned integer.
 */
public static int parseNumber(String text, int[] pos, int radix) {
    // assert(pos[0] >= 0);
    // assert(radix >= 2);
    // assert(radix <= 36);
    int n = 0;
    int p = pos[0];
    while (p < text.length()) {
        int ch = Character.codePointAt(text, p);
        int d = UCharacter.digit(ch, radix);
        if (d < 0) {
            break;
        }
        n = radix*n + d;
        // ASSUME that when a 32-bit integer overflows it becomes
        // negative.  E.g., 214748364 * 10 + 8 => negative value.
        if (n < 0) {
            return -1;
        }
        ++p;
    }
    if (p == pos[0]) {
        return -1;
    }
    pos[0] = p;
    return n;
}

Source File: Utility.java From fitnotifications with Apache License 2.0

5 votes

/**
 * Parse an integer at pos, either of the form \d+ or of the form
 * 0x[0-9A-Fa-f]+ or 0[0-7]+, that is, in standard decimal, hex,
 * or octal format.
 * @param pos INPUT-OUTPUT parameter.  On input, the first
 * character to parse.  On output, the character after the last
 * parsed character.
 */
public static int parseInteger(String rule, int[] pos, int limit) {
    int count = 0;
    int value = 0;
    int p = pos[0];
    int radix = 10;

    if (rule.regionMatches(true, p, "0x", 0, 2)) {
        p += 2;
        radix = 16;
    } else if (p < limit && rule.charAt(p) == '0') {
        p++;
        count = 1;
        radix = 8;
    }

    while (p < limit) {
        int d = UCharacter.digit(rule.charAt(p++), radix);
        if (d < 0) {
            --p;
            break;
        }
        ++count;
        int v = (value * radix) + d;
        if (v <= value) {
            // If there are too many input digits, at some point
            // the value will go negative, e.g., if we have seen
            // "0x8000000" already and there is another '0', when
            // we parse the next 0 the value will go negative.
            return 0;
        }
        value = v;
    }
    if (count > 0) {
        pos[0] = p;
    }
    return value;
}

Source File: TrieIterator.java From fitnotifications with Apache License 2.0

5 votes

/**
* <p>Returns true if we are not at the end of the iteration, false
* otherwise.</p>
* <p>The next set of codepoints with the same value type will be
* calculated during this call and returned in the arguement element.</p>
* @param element return result
* @return true if we are not at the end of the iteration, false otherwise.
* @exception NoSuchElementException - if no more elements exist.
* @see com.ibm.icu.util.RangeValueIterator.Element
*/
@Override
public final boolean next(Element element)
{
    if (m_nextCodepoint_ > UCharacter.MAX_VALUE) {
        return false;
    }
    if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE &&
        calculateNextBMPElement(element)) {
        return true;
    }
    calculateNextSupplementaryElement(element);
    return true;
}

Source File: XMLRecordReader.java From fitnotifications with Apache License 2.0

5 votes

private String readNextTag() {
    int c = '\0';
    while (!atTag) {
        c = readChar();
        if (c == '<' || c == -1) {
            if (c == '<') {
                atTag = true;
            }
            break;
        }
        if (!UCharacter.isWhitespace(c)) {
            System.err.println("Unexpected non-whitespace character "
                    + Integer.toHexString(c));
            break;
        }
    }

    if (atTag) {
        atTag = false;
        StringBuilder sb = new StringBuilder();
        while (true) {
            c = readChar();
            if (c == '>' || c == -1) {
                break;
            }
            sb.append((char) c);
        }
        // System.err.println("read tag: '" + sb.toString() + "'");
        return sb.toString();
    }
    return null;
}

Source File: IntTrieBuilder.java From fitnotifications with Apache License 2.0

5 votes

/**
 * Gets a 32 bit data from the table data
 * @param ch codepoint which data is to be retrieved
 * @return the 32 bit data
 */
public int getValue(int ch) 
{
    // valid, uncompacted trie and valid c?
    if (m_isCompacted_ || ch > UCharacter.MAX_VALUE || ch < 0) {
        return 0;
    }

    int block = m_index_[ch >> SHIFT_];
    return m_data_[Math.abs(block) + (ch & MASK_)];
}

Source File: StringPrep.java From trekarta with GNU General Public License v3.0

5 votes

private StringPrep(ByteBuffer bytes) throws IOException {
    StringPrepDataReader reader = new StringPrepDataReader(bytes);

    // read the indexes
    indexes = reader.readIndexes(INDEX_TOP);

    sprepTrie = new CharTrie(bytes, null);

    //indexes[INDEX_MAPPING_DATA_SIZE] store the size of mappingData in bytes
    // load the rest of the data data and initialize the data members
    mappingData = reader.read(indexes[INDEX_MAPPING_DATA_SIZE]/2);

    // get the options
    doNFKC            = ((indexes[OPTIONS] & NORMALIZATION_ON) > 0);
    checkBiDi         = ((indexes[OPTIONS] & CHECK_BIDI_ON) > 0);
    sprepUniVer   = getVersionInfo(reader.getUnicodeVersion());
    normCorrVer   = getVersionInfo(indexes[NORM_CORRECTNS_LAST_UNI_VERSION]);
    VersionInfo normUniVer = UCharacter.getUnicodeVersion();
    if(normUniVer.compareTo(sprepUniVer) < 0 && /* the Unicode version of SPREP file must be less than the Unicode Vesion of the normalization data */
       normUniVer.compareTo(normCorrVer) < 0 && /* the Unicode version of the NormalizationCorrections.txt file should be less than the Unicode Vesion of the normalization data */
       ((indexes[OPTIONS] & NORMALIZATION_ON) > 0) /* normalization turned on*/
       ){
        throw new IOException("Normalization Correction version not supported");
    }

    if(checkBiDi) {
        bdp=UBiDiProps.INSTANCE;
    }
}

com.ibm.icu.lang.UCharacter Java Examples