Java Code Examples for com.ibm.icu.text.UTF16

The following examples show how to use com.ibm.icu.text.UTF16. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: fitnotifications   Author: abhijitvalluri   File: CharTrie.java    License: Apache License 2.0 6 votes vote down vote up
/**
* Gets the value associated with the codepoint.
* If no value is associated with the codepoint, a default value will be
* returned.
* @param ch codepoint
* @return offset to data
*/
public final char getCodePointValue(int ch)
{
    int offset;

    // fastpath for U+0000..U+D7FF
    if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
        // copy of getRawOffset()
        offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
                + (ch & INDEX_STAGE_3_MASK_);
        return m_data_[offset];
    }

    // handle U+D800..U+10FFFF
    offset = getCodePointOffset(ch);

    // return -1 if there is an error, in this case we return the default
    // value: m_initialValue_
    return (offset >= 0) ? m_data_[offset] : m_initialValue_;
}
 
Example #2
Source Project: fitnotifications   Author: abhijitvalluri   File: Trie.java    License: Apache License 2.0 6 votes vote down vote up
/**
* Internal trie getter from a code point.
* Could be faster(?) but longer with
*   if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }
* Gets the offset to data which the codepoint points to
* @param ch codepoint
* @return offset to data
*/
protected final int getCodePointOffset(int ch)
{
    // if ((ch >> 16) == 0) slower
    if (ch < 0) {
        return -1;
    } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
        // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
        return getRawOffset(0, (char)ch);
    } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
        // BMP codepoint
        return getBMPOffset((char)ch);
    } else if (ch <= UCharacter.MAX_VALUE) {
        // look at the construction of supplementary characters
        // trail forms the ends of it.
        return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
                                  (char)(ch & SURROGATE_MASK_));
    } else {
        // return -1 if there is an error, in this case we return
        return -1;
    }
}
 
Example #3
Source Project: fitnotifications   Author: abhijitvalluri   File: ReplaceableUCharacterIterator.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Returns the current codepoint
 * @return current codepoint
 */
@Override
public int currentCodePoint(){
    // cannot use charAt due to it different
    // behaviour when index is pointing at a
    // trail surrogate, check for surrogates

    int ch = current();
    if(UTF16.isLeadSurrogate((char)ch)){
        // advance the index to get the next code point
        next();
        // due to post increment semantics current() after next()
        // actually returns the next char which is what we want
        int ch2 = current();
        // current should never change the current index so back off
        previous();

        if(UTF16.isTrailSurrogate((char)ch2)){
            // we found a surrogate pair
            return Character.toCodePoint((char)ch, (char)ch2);
        }
    }
    return ch;
}
 
Example #4
Source Project: fitnotifications   Author: abhijitvalluri   File: TrieIterator.java    License: Apache License 2.0 6 votes vote down vote up
/**
* Checks if we are beginning at the start of a initial block.
* If we are then the rest of the codepoints in this initial block
* has the same values.
* We increment m_nextCodepoint_ and relevant data members if so.
* This is used only in for the supplementary codepoints because
* the offset to the trail indexes could be 0.
* @return true if we are at the start of a initial block.
*/
private final boolean checkNullNextTrailIndex()
{
    if (m_nextIndex_ <= 0) {
        m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
        int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
        int leadBlock =
               m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
                                               Trie.INDEX_STAGE_2_SHIFT_;
        if (m_trie_.m_dataManipulate_ == null) {
            throw new NullPointerException(
                        "The field DataManipulate in this Trie is null");
        }
        m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
                           m_trie_.getValue(leadBlock +
                               (nextLead & Trie.INDEX_STAGE_3_MASK_)));
        m_nextIndex_ --;
        m_nextBlockIndex_ =  DATA_BLOCK_LENGTH_;
        return true;
    }
    return false;
}
 
Example #5
Source Project: fitnotifications   Author: abhijitvalluri   File: Utility.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Convert characters outside the range U+0020 to U+007F to
 * Unicode escapes, and convert backslash to a double backslash.
 */
public static final String escape(String s) {
    StringBuilder buf = new StringBuilder();
    for (int i=0; i<s.length(); ) {
        int c = Character.codePointAt(s, i);
        i += UTF16.getCharCount(c);
        if (c >= ' ' && c <= 0x007F) {
            if (c == '\\') {
                buf.append("\\\\"); // That is, "\\"
            } else {
                buf.append((char)c);
            }
        } else {
            boolean four = c <= 0xFFFF;
            buf.append(four ? "\\u" : "\\U");
            buf.append(hex(c, four ? 4 : 8));
        }
    }
    return buf.toString();
}
 
Example #6
Source Project: fitnotifications   Author: abhijitvalluri   File: Utility.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Convert a string to separated groups of hex uppercase
 * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
 * to the given Appendable.
 */
public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
    try {
        if (useCodePoints) {
            int cp;
            for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
                cp = Character.codePointAt(s, i);
                if (i != 0) {
                    result.append(separator);
                }
                result.append(hex(cp,width));
            }
        } else {
            for (int i = 0; i < s.length(); ++i) {
                if (i != 0) {
                    result.append(separator);
                }
                result.append(hex(s.charAt(i),width));
            }
        }
        return result;
    } catch (IOException e) {
        throw new IllegalIcuArgumentException(e);
    }
}
 
Example #7
Source Project: fitnotifications   Author: abhijitvalluri   File: Utility.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Parse a Unicode identifier from the given string at the given
 * position.  Return the identifier, or null if there is no
 * identifier.
 * @param str the string to parse
 * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
 * first character to examine.  It must be less than str.length(),
 * and it must not point to a whitespace character.  That is, must
 * have pos[0] < str.length().  On
 * OUTPUT, the position after the last parsed character.
 * @return the Unicode identifier, or null if there is no valid
 * identifier at pos[0].
 */
public static String parseUnicodeIdentifier(String str, int[] pos) {
    // assert(pos[0] < str.length());
    StringBuilder buf = new StringBuilder();
    int p = pos[0];
    while (p < str.length()) {
        int ch = Character.codePointAt(str, p);
        if (buf.length() == 0) {
            if (UCharacter.isUnicodeIdentifierStart(ch)) {
                buf.appendCodePoint(ch);
            } else {
                return null;
            }
        } else {
            if (UCharacter.isUnicodeIdentifierPart(ch)) {
                buf.appendCodePoint(ch);
            } else {
                break;
            }
        }
        p += UTF16.getCharCount(ch);
    }
    pos[0] = p;
    return buf.toString();
}
 
Example #8
Source Project: fitnotifications   Author: abhijitvalluri   File: IntTrie.java    License: Apache License 2.0 6 votes vote down vote up
/**
* Gets the value associated with the codepoint.
* If no value is associated with the codepoint, a default value will be
* returned.
* @param ch codepoint
* @return offset to data
*/
public final int getCodePointValue(int ch)
{
    int offset;

    // fastpath for U+0000..U+D7FF
    if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
        // copy of getRawOffset()
        offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
                + (ch & INDEX_STAGE_3_MASK_);
        return m_data_[offset];
    }

    // handle U+D800..U+10FFFF
    offset = getCodePointOffset(ch);
    return (offset >= 0) ? m_data_[offset] : m_initialValue_;
}
 
Example #9
Source Project: fitnotifications   Author: abhijitvalluri   File: IntTrie.java    License: Apache License 2.0 6 votes vote down vote up
/**
* Get the value associated with a pair of surrogates.
* @param lead a lead surrogate
* @param trail a trail surrogate
*/
public final int getSurrogateValue(char lead, char trail)
{
    if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) {
        throw new IllegalArgumentException(
            "Argument characters do not form a supplementary character");
    }
    // get fold position for the next trail surrogate
    int offset = getSurrogateOffset(lead, trail);

    // get the real data from the folded lead/trail units
    if (offset > 0) {
        return m_data_[offset];
    }

    // return m_initialValue_ if there is an error
    return m_initialValue_;
}
 
Example #10
Source Project: fitnotifications   Author: abhijitvalluri   File: UCharacterName.java    License: Apache License 2.0 6 votes vote down vote up
/**
* Gets the character extended type
* @param ch character to be tested
* @return extended type it is associated with
*/
private static int getType(int ch)
{
    if (UCharacterUtility.isNonCharacter(ch)) {
        // not a character we return a invalid category count
        return NON_CHARACTER_;
    }
    int result = UCharacter.getType(ch);
    if (result == UCharacterCategory.SURROGATE) {
        if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
            result = LEAD_SURROGATE_;
        }
        else {
            result = TRAIL_SURROGATE_;
        }
    }
    return result;
}
 
Example #11
Source Project: fitnotifications   Author: abhijitvalluri   File: CharacterIteration.java    License: Apache License 2.0 6 votes vote down vote up
public static int nextTrail32(CharacterIterator ci, int lead) {
    if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) {
        return DONE32;
    }
    int retVal = lead;
    if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
        char  cTrail = ci.next();
        if (UTF16.isTrailSurrogate(cTrail)) {
            retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                        (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                        UTF16.SUPPLEMENTARY_MIN_VALUE;
        } else {
            ci.previous();
        }
    }
    return retVal;
}
 
Example #12
Source Project: fitnotifications   Author: abhijitvalluri   File: CharacterIteration.java    License: Apache License 2.0 6 votes vote down vote up
public static int previous32(CharacterIterator ci) {
    if (ci.getIndex() <= ci.getBeginIndex()) {
        return DONE32;   
    }
    char trail = ci.previous();
    int retVal = trail;
    if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) {
        char lead = ci.previous();
        if (UTF16.isLeadSurrogate(lead)) {
            retVal = (((int)lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                      ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                      UTF16.SUPPLEMENTARY_MIN_VALUE;
        } else {
            ci.next();
        }           
    }
    return retVal;
}
 
Example #13
Source Project: fitnotifications   Author: abhijitvalluri   File: CharacterIteration.java    License: Apache License 2.0 6 votes vote down vote up
public static int current32(CharacterIterator ci) {
    char  lead   = ci.current();
    int   retVal = lead;
    if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) {
        return retVal;   
    }
    if (UTF16.isLeadSurrogate(lead)) {
        int  trail = (int)ci.next();
        ci.previous();
        if (UTF16.isTrailSurrogate((char)trail)) {
            retVal = ((lead  - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) +
                     (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) +
                     UTF16.SUPPLEMENTARY_MIN_VALUE;
        }
     } else {
        if (lead == CharacterIterator.DONE) {
            if (ci.getIndex() >= ci.getEndIndex())   {
                retVal = DONE32;   
            }
        }
     }
    return retVal;
}
 
Example #14
Source Project: lucene-solr   Author: apache   File: BreakIteratorWrapper.java    License: Apache License 2.0 6 votes vote down vote up
/** Returns true if the current text represents emoji character or sequence */
private boolean isEmoji(int current, int next) {
  int begin = start + current;
  int end = start + next;
  int codepoint = UTF16.charAt(text, 0, end, begin);
  if (EMOJI.contains(codepoint)) {
    if (EMOJI_RK.contains(codepoint)) {
      // if its in EmojiRK, we don't treat it as emoji unless there is evidence it forms emoji sequence,
      // an emoji presentation selector or keycap follows.
      int trailer = begin + Character.charCount(codepoint);
      return trailer < end && (text[trailer] == 0xFE0F || text[trailer] == 0x20E3);
    } else {
      return true;
    }
  }
  return false;
}
 
Example #15
Source Project: trekarta   Author: andreynovikov   File: CharTrie.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
* Gets the value associated with the codepoint.
* If no value is associated with the codepoint, a default value will be
* returned.
* @param ch codepoint
* @return offset to data
*/
public final char getCodePointValue(int ch)
{
    int offset;

    // fastpath for U+0000..U+D7FF
    if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
        // copy of getRawOffset()
        offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_)
                + (ch & INDEX_STAGE_3_MASK_);
        return m_data_[offset];
    }

    // handle U+D800..U+10FFFF
    offset = getCodePointOffset(ch);

    // return -1 if there is an error, in this case we return the default
    // value: m_initialValue_
    return (offset >= 0) ? m_data_[offset] : m_initialValue_;
}
 
Example #16
Source Project: trekarta   Author: andreynovikov   File: Trie.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
* Internal trie getter from a code point.
* Could be faster(?) but longer with
*   if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); }
* Gets the offset to data which the codepoint points to
* @param ch codepoint
* @return offset to data
*/
protected final int getCodePointOffset(int ch)
{
    // if ((ch >> 16) == 0) slower
    if (ch < 0) {
        return -1;
    } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) {
        // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works
        return getRawOffset(0, (char)ch);
    } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) {
        // BMP codepoint
        return getBMPOffset((char)ch);
    } else if (ch <= UCharacter.MAX_VALUE) {
        // look at the construction of supplementary characters
        // trail forms the ends of it.
        return getSurrogateOffset(UTF16.getLeadSurrogate(ch),
                                  (char)(ch & SURROGATE_MASK_));
    } else {
        // return -1 if there is an error, in this case we return
        return -1;
    }
}
 
Example #17
Source Project: trekarta   Author: andreynovikov   File: ReplaceableUCharacterIterator.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * Returns the current codepoint
 * @return current codepoint
 */
@Override
public int currentCodePoint(){
    // cannot use charAt due to it different
    // behaviour when index is pointing at a
    // trail surrogate, check for surrogates

    int ch = current();
    if(UTF16.isLeadSurrogate((char)ch)){
        // advance the index to get the next code point
        next();
        // due to post increment semantics current() after next()
        // actually returns the next char which is what we want
        int ch2 = current();
        // current should never change the current index so back off
        previous();

        if(UTF16.isTrailSurrogate((char)ch2)){
            // we found a surrogate pair
            return Character.toCodePoint((char)ch, (char)ch2);
        }
    }
    return ch;
}
 
Example #18
Source Project: trekarta   Author: andreynovikov   File: TrieIterator.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
* Checks if we are beginning at the start of a initial block.
* If we are then the rest of the codepoints in this initial block
* has the same values.
* We increment m_nextCodepoint_ and relevant data members if so.
* This is used only in for the supplementary codepoints because
* the offset to the trail indexes could be 0.
* @return true if we are at the start of a initial block.
*/
private final boolean checkNullNextTrailIndex()
{
    if (m_nextIndex_ <= 0) {
        m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1;
        int nextLead  = UTF16.getLeadSurrogate(m_nextCodepoint_);
        int leadBlock =
               m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] <<
                                               Trie.INDEX_STAGE_2_SHIFT_;
        if (m_trie_.m_dataManipulate_ == null) {
            throw new NullPointerException(
                        "The field DataManipulate in this Trie is null");
        }
        m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset(
                           m_trie_.getValue(leadBlock +
                               (nextLead & Trie.INDEX_STAGE_3_MASK_)));
        m_nextIndex_ --;
        m_nextBlockIndex_ =  DATA_BLOCK_LENGTH_;
        return true;
    }
    return false;
}
 
Example #19
Source Project: trekarta   Author: andreynovikov   File: Utility.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * Convert characters outside the range U+0020 to U+007F to
 * Unicode escapes, and convert backslash to a double backslash.
 */
public static final String escape(String s) {
    StringBuilder buf = new StringBuilder();
    for (int i=0; i<s.length(); ) {
        int c = Character.codePointAt(s, i);
        i += UTF16.getCharCount(c);
        if (c >= ' ' && c <= 0x007F) {
            if (c == '\\') {
                buf.append("\\\\"); // That is, "\\"
            } else {
                buf.append((char)c);
            }
        } else {
            boolean four = c <= 0xFFFF;
            buf.append(four ? "\\u" : "\\U");
            buf.append(hex(c, four ? 4 : 8));
        }
    }
    return buf.toString();
}
 
Example #20
Source Project: trekarta   Author: andreynovikov   File: Utility.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * Convert a string to separated groups of hex uppercase
 * digits.  E.g., hex('ab'...) => "0041,0042".  Append the output
 * to the given Appendable.
 */
public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) {
    try {
        if (useCodePoints) {
            int cp;
            for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) {
                cp = Character.codePointAt(s, i);
                if (i != 0) {
                    result.append(separator);
                }
                result.append(hex(cp,width));
            }
        } else {
            for (int i = 0; i < s.length(); ++i) {
                if (i != 0) {
                    result.append(separator);
                }
                result.append(hex(s.charAt(i),width));
            }
        }
        return result;
    } catch (IOException e) {
        throw new IllegalIcuArgumentException(e);
    }
}
 
Example #21
Source Project: trekarta   Author: andreynovikov   File: Utility.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
 * Parse a Unicode identifier from the given string at the given
 * position.  Return the identifier, or null if there is no
 * identifier.
 * @param str the string to parse
 * @param pos INPUT-OUPUT parameter.  On INPUT, pos[0] is the
 * first character to examine.  It must be less than str.length(),
 * and it must not point to a whitespace character.  That is, must
 * have pos[0] < str.length().  On
 * OUTPUT, the position after the last parsed character.
 * @return the Unicode identifier, or null if there is no valid
 * identifier at pos[0].
 */
public static String parseUnicodeIdentifier(String str, int[] pos) {
    // assert(pos[0] < str.length());
    StringBuilder buf = new StringBuilder();
    int p = pos[0];
    while (p < str.length()) {
        int ch = Character.codePointAt(str, p);
        if (buf.length() == 0) {
            if (UCharacter.isUnicodeIdentifierStart(ch)) {
                buf.appendCodePoint(ch);
            } else {
                return null;
            }
        } else {
            if (UCharacter.isUnicodeIdentifierPart(ch)) {
                buf.appendCodePoint(ch);
            } else {
                break;
            }
        }
        p += UTF16.getCharCount(ch);
    }
    pos[0] = p;
    return buf.toString();
}
 
Example #22
Source Project: trekarta   Author: andreynovikov   File: UCharacterName.java    License: GNU General Public License v3.0 6 votes vote down vote up
/**
* Gets the character extended type
* @param ch character to be tested
* @return extended type it is associated with
*/
private static int getType(int ch)
{
    if (UCharacterUtility.isNonCharacter(ch)) {
        // not a character we return a invalid category count
        return NON_CHARACTER_;
    }
    int result = UCharacter.getType(ch);
    if (result == UCharacterCategory.SURROGATE) {
        if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) {
            result = LEAD_SURROGATE_;
        }
        else {
            result = TRAIL_SURROGATE_;
        }
    }
    return result;
}
 
Example #23
private int calcStatus(int current, int next) {
    if (current == BreakIterator.DONE || next == BreakIterator.DONE) {
        return RuleBasedBreakIterator.WORD_NONE;
    }
    int begin = start + current;
    int end = start + next;
    int codepoint;
    for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) {
        codepoint = UTF16.charAt(text, 0, end, begin);
        if (UCharacter.isDigit(codepoint)) {
            return RuleBasedBreakIterator.WORD_NUMBER;
        } else if (UCharacter.isLetter(codepoint)) {
            return RuleBasedBreakIterator.WORD_LETTER;
        }
    }
    return RuleBasedBreakIterator.WORD_NONE;
}
 
Example #24
Source Project: fitnotifications   Author: abhijitvalluri   File: StringTokenizer.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Gets the index of the next delimiter after offset
 * @param offset to the source string
 * @return offset of the immediate next delimiter, otherwise 
 *         (- source string length - 1) if there
 *         are no more delimiters after m_nextOffset
 */
private int getNextDelimiter(int offset)
{
    if (offset >= 0) {
        int result = offset; 
        int c = 0;
        if (delims == null) {
            do {
                c = UTF16.charAt(m_source_, result);
                if (m_delimiters_.contains(c)) {
                    break;
                }
                result ++;
            } while (result < m_length_);
        } else {
            do {
                c = UTF16.charAt(m_source_, result);
                if (c < delims.length && delims[c]) {
                    break;
                }
                result ++;
            } while (result < m_length_);
        }                
        if (result < m_length_) {
            return result;
        }
    }
    return -1 - m_length_;
}
 
Example #25
Source Project: fitnotifications   Author: abhijitvalluri   File: StringTokenizer.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Gets the index of the next non-delimiter after m_nextOffset_
 * @param offset to the source string
 * @return offset of the immediate next non-delimiter, otherwise 
 *         (- source string length - 1) if there
 *         are no more delimiters after m_nextOffset
 */
private int getNextNonDelimiter(int offset)
{
    if (offset >= 0) {
        int result = offset; 
        int c = 0;
        if (delims == null) {
            do {
                c = UTF16.charAt(m_source_, result);
                if (!m_delimiters_.contains(c)) {
                    break;
                }
                result ++;
            } while (result < m_length_);
        } else {
            do {
                c = UTF16.charAt(m_source_, result);
                if (!(c < delims.length && delims[c])) {
                    break;
                }
                result ++;
            } while (result < m_length_);
        }
        if (result < m_length_) {
            return result;
        }
    }
    return -1 - m_length_;
}
 
Example #26
Source Project: fitnotifications   Author: abhijitvalluri   File: CharsTrie.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Traverses the trie from the current state for the
 * one or two UTF-16 code units for this input code point.
 * @param cp A Unicode code point 0..0x10ffff.
 * @return The match/value Result.
 * @stable ICU 4.8
 */
public Result nextForCodePoint(int cp) {
    return cp<=0xffff ?
        next(cp) :
        (next(UTF16.getLeadSurrogate(cp)).hasNext() ?
            next(UTF16.getTrailSurrogate(cp)) :
            Result.NO_MATCH);
}
 
Example #27
Source Project: fitnotifications   Author: abhijitvalluri   File: Trie.java    License: Apache License 2.0 5 votes vote down vote up
/**
* Gets the offset to data which the BMP character points to
* Treats a lead surrogate as a normal code point.
* @param ch BMP character
* @return offset to data
*/
protected final int getBMPOffset(char ch)
{
    return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE
            && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE)
            ? getRawOffset(LEAD_INDEX_OFFSET_, ch)
            : getRawOffset(0, ch);
            // using a getRawOffset(ch) makes no diff
}
 
Example #28
Source Project: fitnotifications   Author: abhijitvalluri   File: Normalizer2Impl.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Gets the decomposition for one code point.
 * @param c code point
 * @return c's decomposition, if it has one; returns null if it does not have a decomposition
 */
public String getDecomposition(int c) {
    int decomp=-1;
    int norm16;
    for(;;) {
        if(c<minDecompNoCP || isDecompYes(norm16=getNorm16(c))) {
            // c does not decompose
        } else if(isHangul(norm16)) {
            // Hangul syllable: decompose algorithmically
            StringBuilder buffer=new StringBuilder();
            Hangul.decompose(c, buffer);
            return buffer.toString();
        } else if(isDecompNoAlgorithmic(norm16)) {
            decomp=c=mapAlgorithmic(c, norm16);
            continue;
        } else {
            // c decomposes, get everything from the variable-length extra data
            int length=extraData.charAt(norm16++)&MAPPING_LENGTH_MASK;
            return extraData.substring(norm16, norm16+length);
        }
        if(decomp<0) {
            return null;
        } else {
            return UTF16.valueOf(decomp);
        }
    }
}
 
Example #29
Source Project: fitnotifications   Author: abhijitvalluri   File: CollationFCD.java    License: Apache License 2.0 5 votes vote down vote up
static boolean mayHaveLccc(int c) {
    // Handles all of Unicode 0..10FFFF.
    // c can be negative, e.g., Collation.SENTINEL_CP.
    // U+0300 is the first character with lccc!=0.
    if(c < 0x300) { return false; }
    if(c > 0xffff) { c = UTF16.getLeadSurrogate(c); }
    int i;
    return
        (i = lcccIndex[c >> 5]) != 0 &&
        (lcccBits[i] & (1 << (c & 0x1f))) != 0;
}
 
Example #30
Source Project: fitnotifications   Author: abhijitvalluri   File: RuleCharacterIterator.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Skips ahead past any ignored characters, as indicated by the given
 * options.  This is useful in conjunction with the lookahead() method.
 *
 * Currently, this only has an effect for SKIP_WHITESPACE.
 * @param options one or more of the following options, bitwise-OR-ed
 * together: PARSE_VARIABLES, PARSE_ESCAPES, SKIP_WHITESPACE.
 */
public void skipIgnored(int options) {
    if ((options & SKIP_WHITESPACE) != 0) {
        for (;;) {
            int a = _current();
            if (!PatternProps.isWhiteSpace(a)) break;
            _advance(UTF16.getCharCount(a));
        }
    }
}