com.ibm.icu.text.UTF16 Java Examples
The following examples show how to use
com.ibm.icu.text.UTF16.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: IntTrie.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Get the value associated with a pair of surrogates. * @param lead a lead surrogate * @param trail a trail surrogate */ public final int getSurrogateValue(char lead, char trail) { if (!UTF16.isLeadSurrogate(lead) || !UTF16.isTrailSurrogate(trail)) { throw new IllegalArgumentException( "Argument characters do not form a supplementary character"); } // get fold position for the next trail surrogate int offset = getSurrogateOffset(lead, trail); // get the real data from the folded lead/trail units if (offset > 0) { return m_data_[offset]; } // return m_initialValue_ if there is an error return m_initialValue_; }
Example #2
Source File: Utility.java From trekarta with GNU General Public License v3.0 | 6 votes |
/** * Convert a string to separated groups of hex uppercase * digits. E.g., hex('ab'...) => "0041,0042". Append the output * to the given Appendable. */ public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) { try { if (useCodePoints) { int cp; for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { cp = Character.codePointAt(s, i); if (i != 0) { result.append(separator); } result.append(hex(cp,width)); } } else { for (int i = 0; i < s.length(); ++i) { if (i != 0) { result.append(separator); } result.append(hex(s.charAt(i),width)); } } return result; } catch (IOException e) { throw new IllegalIcuArgumentException(e); } }
Example #3
Source File: Utility.java From trekarta with GNU General Public License v3.0 | 6 votes |
/** * Convert characters outside the range U+0020 to U+007F to * Unicode escapes, and convert backslash to a double backslash. */ public static final String escape(String s) { StringBuilder buf = new StringBuilder(); for (int i=0; i<s.length(); ) { int c = Character.codePointAt(s, i); i += UTF16.getCharCount(c); if (c >= ' ' && c <= 0x007F) { if (c == '\\') { buf.append("\\\\"); // That is, "\\" } else { buf.append((char)c); } } else { boolean four = c <= 0xFFFF; buf.append(four ? "\\u" : "\\U"); buf.append(hex(c, four ? 4 : 8)); } } return buf.toString(); }
Example #4
Source File: TrieIterator.java From trekarta with GNU General Public License v3.0 | 6 votes |
/** * Checks if we are beginning at the start of a initial block. * If we are then the rest of the codepoints in this initial block * has the same values. * We increment m_nextCodepoint_ and relevant data members if so. * This is used only in for the supplementary codepoints because * the offset to the trail indexes could be 0. * @return true if we are at the start of a initial block. */ private final boolean checkNullNextTrailIndex() { if (m_nextIndex_ <= 0) { m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); int leadBlock = m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << Trie.INDEX_STAGE_2_SHIFT_; if (m_trie_.m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( m_trie_.getValue(leadBlock + (nextLead & Trie.INDEX_STAGE_3_MASK_))); m_nextIndex_ --; m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; return true; } return false; }
Example #5
Source File: ReplaceableUCharacterIterator.java From trekarta with GNU General Public License v3.0 | 6 votes |
/** * Returns the current codepoint * @return current codepoint */ @Override public int currentCodePoint(){ // cannot use charAt due to it different // behaviour when index is pointing at a // trail surrogate, check for surrogates int ch = current(); if(UTF16.isLeadSurrogate((char)ch)){ // advance the index to get the next code point next(); // due to post increment semantics current() after next() // actually returns the next char which is what we want int ch2 = current(); // current should never change the current index so back off previous(); if(UTF16.isTrailSurrogate((char)ch2)){ // we found a surrogate pair return Character.toCodePoint((char)ch, (char)ch2); } } return ch; }
Example #6
Source File: Utility.java From trekarta with GNU General Public License v3.0 | 6 votes |
/** * Parse a Unicode identifier from the given string at the given * position. Return the identifier, or null if there is no * identifier. * @param str the string to parse * @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the * first character to examine. It must be less than str.length(), * and it must not point to a whitespace character. That is, must * have pos[0] < str.length(). On * OUTPUT, the position after the last parsed character. * @return the Unicode identifier, or null if there is no valid * identifier at pos[0]. */ public static String parseUnicodeIdentifier(String str, int[] pos) { // assert(pos[0] < str.length()); StringBuilder buf = new StringBuilder(); int p = pos[0]; while (p < str.length()) { int ch = Character.codePointAt(str, p); if (buf.length() == 0) { if (UCharacter.isUnicodeIdentifierStart(ch)) { buf.appendCodePoint(ch); } else { return null; } } else { if (UCharacter.isUnicodeIdentifierPart(ch)) { buf.appendCodePoint(ch); } else { break; } } p += UTF16.getCharCount(ch); } pos[0] = p; return buf.toString(); }
Example #7
Source File: Trie.java From trekarta with GNU General Public License v3.0 | 6 votes |
/** * Internal trie getter from a code point. * Could be faster(?) but longer with * if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); } * Gets the offset to data which the codepoint points to * @param ch codepoint * @return offset to data */ protected final int getCodePointOffset(int ch) { // if ((ch >> 16) == 0) slower if (ch < 0) { return -1; } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works return getRawOffset(0, (char)ch); } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) { // BMP codepoint return getBMPOffset((char)ch); } else if (ch <= UCharacter.MAX_VALUE) { // look at the construction of supplementary characters // trail forms the ends of it. return getSurrogateOffset(UTF16.getLeadSurrogate(ch), (char)(ch & SURROGATE_MASK_)); } else { // return -1 if there is an error, in this case we return return -1; } }
Example #8
Source File: CharTrie.java From trekarta with GNU General Public License v3.0 | 6 votes |
/** * Gets the value associated with the codepoint. * If no value is associated with the codepoint, a default value will be * returned. * @param ch codepoint * @return offset to data */ public final char getCodePointValue(int ch) { int offset; // fastpath for U+0000..U+D7FF if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // copy of getRawOffset() offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_); return m_data_[offset]; } // handle U+D800..U+10FFFF offset = getCodePointOffset(ch); // return -1 if there is an error, in this case we return the default // value: m_initialValue_ return (offset >= 0) ? m_data_[offset] : m_initialValue_; }
Example #9
Source File: BreakIteratorWrapper.java From lucene-solr with Apache License 2.0 | 6 votes |
/** Returns true if the current text represents emoji character or sequence */ private boolean isEmoji(int current, int next) { int begin = start + current; int end = start + next; int codepoint = UTF16.charAt(text, 0, end, begin); if (EMOJI.contains(codepoint)) { if (EMOJI_RK.contains(codepoint)) { // if its in EmojiRK, we don't treat it as emoji unless there is evidence it forms emoji sequence, // an emoji presentation selector or keycap follows. int trailer = begin + Character.charCount(codepoint); return trailer < end && (text[trailer] == 0xFE0F || text[trailer] == 0x20E3); } else { return true; } } return false; }
Example #10
Source File: CharacterIteration.java From fitnotifications with Apache License 2.0 | 6 votes |
public static int current32(CharacterIterator ci) { char lead = ci.current(); int retVal = lead; if (retVal < UTF16.LEAD_SURROGATE_MIN_VALUE) { return retVal; } if (UTF16.isLeadSurrogate(lead)) { int trail = (int)ci.next(); ci.previous(); if (UTF16.isTrailSurrogate((char)trail)) { retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + (trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + UTF16.SUPPLEMENTARY_MIN_VALUE; } } else { if (lead == CharacterIterator.DONE) { if (ci.getIndex() >= ci.getEndIndex()) { retVal = DONE32; } } } return retVal; }
Example #11
Source File: CharacterIteration.java From fitnotifications with Apache License 2.0 | 6 votes |
public static int previous32(CharacterIterator ci) { if (ci.getIndex() <= ci.getBeginIndex()) { return DONE32; } char trail = ci.previous(); int retVal = trail; if (UTF16.isTrailSurrogate(trail) && ci.getIndex()>ci.getBeginIndex()) { char lead = ci.previous(); if (UTF16.isLeadSurrogate(lead)) { retVal = (((int)lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + ((int)trail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + UTF16.SUPPLEMENTARY_MIN_VALUE; } else { ci.next(); } } return retVal; }
Example #12
Source File: CharacterIteration.java From fitnotifications with Apache License 2.0 | 6 votes |
public static int nextTrail32(CharacterIterator ci, int lead) { if (lead == CharacterIterator.DONE && ci.getIndex() >= ci.getEndIndex()) { return DONE32; } int retVal = lead; if (lead <= UTF16.LEAD_SURROGATE_MAX_VALUE) { char cTrail = ci.next(); if (UTF16.isTrailSurrogate(cTrail)) { retVal = ((lead - UTF16.LEAD_SURROGATE_MIN_VALUE) << 10) + (cTrail - UTF16.TRAIL_SURROGATE_MIN_VALUE) + UTF16.SUPPLEMENTARY_MIN_VALUE; } else { ci.previous(); } } return retVal; }
Example #13
Source File: UCharacterName.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Gets the character extended type * @param ch character to be tested * @return extended type it is associated with */ private static int getType(int ch) { if (UCharacterUtility.isNonCharacter(ch)) { // not a character we return a invalid category count return NON_CHARACTER_; } int result = UCharacter.getType(ch); if (result == UCharacterCategory.SURROGATE) { if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { result = LEAD_SURROGATE_; } else { result = TRAIL_SURROGATE_; } } return result; }
Example #14
Source File: IntTrie.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Gets the value associated with the codepoint. * If no value is associated with the codepoint, a default value will be * returned. * @param ch codepoint * @return offset to data */ public final int getCodePointValue(int ch) { int offset; // fastpath for U+0000..U+D7FF if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // copy of getRawOffset() offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_); return m_data_[offset]; } // handle U+D800..U+10FFFF offset = getCodePointOffset(ch); return (offset >= 0) ? m_data_[offset] : m_initialValue_; }
Example #15
Source File: Utility.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Parse a Unicode identifier from the given string at the given * position. Return the identifier, or null if there is no * identifier. * @param str the string to parse * @param pos INPUT-OUPUT parameter. On INPUT, pos[0] is the * first character to examine. It must be less than str.length(), * and it must not point to a whitespace character. That is, must * have pos[0] < str.length(). On * OUTPUT, the position after the last parsed character. * @return the Unicode identifier, or null if there is no valid * identifier at pos[0]. */ public static String parseUnicodeIdentifier(String str, int[] pos) { // assert(pos[0] < str.length()); StringBuilder buf = new StringBuilder(); int p = pos[0]; while (p < str.length()) { int ch = Character.codePointAt(str, p); if (buf.length() == 0) { if (UCharacter.isUnicodeIdentifierStart(ch)) { buf.appendCodePoint(ch); } else { return null; } } else { if (UCharacter.isUnicodeIdentifierPart(ch)) { buf.appendCodePoint(ch); } else { break; } } p += UTF16.getCharCount(ch); } pos[0] = p; return buf.toString(); }
Example #16
Source File: Utility.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Convert a string to separated groups of hex uppercase * digits. E.g., hex('ab'...) => "0041,0042". Append the output * to the given Appendable. */ public static <S extends CharSequence, U extends CharSequence, T extends Appendable> T hex(S s, int width, U separator, boolean useCodePoints, T result) { try { if (useCodePoints) { int cp; for (int i = 0; i < s.length(); i += UTF16.getCharCount(cp)) { cp = Character.codePointAt(s, i); if (i != 0) { result.append(separator); } result.append(hex(cp,width)); } } else { for (int i = 0; i < s.length(); ++i) { if (i != 0) { result.append(separator); } result.append(hex(s.charAt(i),width)); } } return result; } catch (IOException e) { throw new IllegalIcuArgumentException(e); } }
Example #17
Source File: Trie.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Internal trie getter from a code point. * Could be faster(?) but longer with * if((c32)<=0xd7ff) { (result)=_TRIE_GET_RAW(trie, data, 0, c32); } * Gets the offset to data which the codepoint points to * @param ch codepoint * @return offset to data */ protected final int getCodePointOffset(int ch) { // if ((ch >> 16) == 0) slower if (ch < 0) { return -1; } else if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // fastpath for the part of the BMP below surrogates (D800) where getRawOffset() works return getRawOffset(0, (char)ch); } else if (ch < UTF16.SUPPLEMENTARY_MIN_VALUE) { // BMP codepoint return getBMPOffset((char)ch); } else if (ch <= UCharacter.MAX_VALUE) { // look at the construction of supplementary characters // trail forms the ends of it. return getSurrogateOffset(UTF16.getLeadSurrogate(ch), (char)(ch & SURROGATE_MASK_)); } else { // return -1 if there is an error, in this case we return return -1; } }
Example #18
Source File: UCharacterName.java From trekarta with GNU General Public License v3.0 | 6 votes |
/** * Gets the character extended type * @param ch character to be tested * @return extended type it is associated with */ private static int getType(int ch) { if (UCharacterUtility.isNonCharacter(ch)) { // not a character we return a invalid category count return NON_CHARACTER_; } int result = UCharacter.getType(ch); if (result == UCharacterCategory.SURROGATE) { if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { result = LEAD_SURROGATE_; } else { result = TRAIL_SURROGATE_; } } return result; }
Example #19
Source File: ReplaceableUCharacterIterator.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Returns the current codepoint * @return current codepoint */ @Override public int currentCodePoint(){ // cannot use charAt due to it different // behaviour when index is pointing at a // trail surrogate, check for surrogates int ch = current(); if(UTF16.isLeadSurrogate((char)ch)){ // advance the index to get the next code point next(); // due to post increment semantics current() after next() // actually returns the next char which is what we want int ch2 = current(); // current should never change the current index so back off previous(); if(UTF16.isTrailSurrogate((char)ch2)){ // we found a surrogate pair return Character.toCodePoint((char)ch, (char)ch2); } } return ch; }
Example #20
Source File: BreakIteratorWrapper.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 6 votes |
private int calcStatus(int current, int next) { if (current == BreakIterator.DONE || next == BreakIterator.DONE) { return RuleBasedBreakIterator.WORD_NONE; } int begin = start + current; int end = start + next; int codepoint; for (int i = begin; i < end; i += UTF16.getCharCount(codepoint)) { codepoint = UTF16.charAt(text, 0, end, begin); if (UCharacter.isDigit(codepoint)) { return RuleBasedBreakIterator.WORD_NUMBER; } else if (UCharacter.isLetter(codepoint)) { return RuleBasedBreakIterator.WORD_LETTER; } } return RuleBasedBreakIterator.WORD_NONE; }
Example #21
Source File: CharTrie.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Gets the value associated with the codepoint. * If no value is associated with the codepoint, a default value will be * returned. * @param ch codepoint * @return offset to data */ public final char getCodePointValue(int ch) { int offset; // fastpath for U+0000..U+D7FF if(0 <= ch && ch < UTF16.LEAD_SURROGATE_MIN_VALUE) { // copy of getRawOffset() offset = (m_index_[ch >> INDEX_STAGE_1_SHIFT_] << INDEX_STAGE_2_SHIFT_) + (ch & INDEX_STAGE_3_MASK_); return m_data_[offset]; } // handle U+D800..U+10FFFF offset = getCodePointOffset(ch); // return -1 if there is an error, in this case we return the default // value: m_initialValue_ return (offset >= 0) ? m_data_[offset] : m_initialValue_; }
Example #22
Source File: TrieIterator.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Checks if we are beginning at the start of a initial block. * If we are then the rest of the codepoints in this initial block * has the same values. * We increment m_nextCodepoint_ and relevant data members if so. * This is used only in for the supplementary codepoints because * the offset to the trail indexes could be 0. * @return true if we are at the start of a initial block. */ private final boolean checkNullNextTrailIndex() { if (m_nextIndex_ <= 0) { m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); int leadBlock = m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << Trie.INDEX_STAGE_2_SHIFT_; if (m_trie_.m_dataManipulate_ == null) { throw new NullPointerException( "The field DataManipulate in this Trie is null"); } m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( m_trie_.getValue(leadBlock + (nextLead & Trie.INDEX_STAGE_3_MASK_))); m_nextIndex_ --; m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; return true; } return false; }
Example #23
Source File: Utility.java From fitnotifications with Apache License 2.0 | 6 votes |
/** * Convert characters outside the range U+0020 to U+007F to * Unicode escapes, and convert backslash to a double backslash. */ public static final String escape(String s) { StringBuilder buf = new StringBuilder(); for (int i=0; i<s.length(); ) { int c = Character.codePointAt(s, i); i += UTF16.getCharCount(c); if (c >= ' ' && c <= 0x007F) { if (c == '\\') { buf.append("\\\\"); // That is, "\\" } else { buf.append((char)c); } } else { boolean four = c <= 0xFFFF; buf.append(four ? "\\u" : "\\U"); buf.append(hex(c, four ? 4 : 8)); } } return buf.toString(); }
Example #24
Source File: ScriptIterator.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 | 5 votes |
/** * Iterates to the next script run, returning true if one exists. * * @return true if there is another script run, false otherwise. */ boolean next() { if (scriptLimit >= limit) { return false; } scriptCode = UScript.COMMON; scriptStart = scriptLimit; while (index < limit) { final int ch = UTF16.charAt(text, start, limit, index - start); final int sc = getScript(ch); /* * From UTR #24: Implementations that determine the boundaries between * characters of given scripts should never break between a non-spacing * mark and its base character. Thus for boundary determinations and * similar sorts of processing, a non-spacing mark — whatever its script * value — should inherit the script value of its base character. */ if (isSameScript(scriptCode, sc) || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) { index += UTF16.getCharCount(ch); /* * Inherited or Common becomes the script code of the surrounding text. */ if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) { scriptCode = sc; } } else { break; } } scriptLimit = index; return true; }
Example #25
Source File: CharsTrie.java From fitnotifications with Apache License 2.0 | 5 votes |
/** * Traverses the trie from the current state for the * one or two UTF-16 code units for this input code point. * @param cp A Unicode code point 0..0x10ffff. * @return The match/value Result. * @stable ICU 4.8 */ public Result nextForCodePoint(int cp) { return cp<=0xffff ? next(cp) : (next(UTF16.getLeadSurrogate(cp)).hasNext() ? next(UTF16.getTrailSurrogate(cp)) : Result.NO_MATCH); }
Example #26
Source File: Trie.java From trekarta with GNU General Public License v3.0 | 5 votes |
/** * Gets the offset to data which the BMP character points to * Treats a lead surrogate as a normal code point. * @param ch BMP character * @return offset to data */ protected final int getBMPOffset(char ch) { return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) ? getRawOffset(LEAD_INDEX_OFFSET_, ch) : getRawOffset(0, ch); // using a getRawOffset(ch) makes no diff }
Example #27
Source File: StringTokenizer.java From fitnotifications with Apache License 2.0 | 5 votes |
/** * Gets the index of the next non-delimiter after m_nextOffset_ * @param offset to the source string * @return offset of the immediate next non-delimiter, otherwise * (- source string length - 1) if there * are no more delimiters after m_nextOffset */ private int getNextNonDelimiter(int offset) { if (offset >= 0) { int result = offset; int c = 0; if (delims == null) { do { c = UTF16.charAt(m_source_, result); if (!m_delimiters_.contains(c)) { break; } result ++; } while (result < m_length_); } else { do { c = UTF16.charAt(m_source_, result); if (!(c < delims.length && delims[c])) { break; } result ++; } while (result < m_length_); } if (result < m_length_) { return result; } } return -1 - m_length_; }
Example #28
Source File: StringTokenizer.java From fitnotifications with Apache License 2.0 | 5 votes |
/** * Gets the index of the next delimiter after offset * @param offset to the source string * @return offset of the immediate next delimiter, otherwise * (- source string length - 1) if there * are no more delimiters after m_nextOffset */ private int getNextDelimiter(int offset) { if (offset >= 0) { int result = offset; int c = 0; if (delims == null) { do { c = UTF16.charAt(m_source_, result); if (m_delimiters_.contains(c)) { break; } result ++; } while (result < m_length_); } else { do { c = UTF16.charAt(m_source_, result); if (c < delims.length && delims[c]) { break; } result ++; } while (result < m_length_); } if (result < m_length_) { return result; } } return -1 - m_length_; }
Example #29
Source File: ScriptIterator.java From lucene-solr with Apache License 2.0 | 5 votes |
/** * Iterates to the next script run, returning true if one exists. * * @return true if there is another script run, false otherwise. */ boolean next() { if (scriptLimit >= limit) return false; scriptCode = UScript.COMMON; scriptStart = scriptLimit; while (index < limit) { final int ch = UTF16.charAt(text, start, limit, index - start); final int sc = getScript(ch); /* * From UTR #24: Implementations that determine the boundaries between * characters of given scripts should never break between a non-spacing * mark and its base character. Thus for boundary determinations and * similar sorts of processing, a non-spacing mark — whatever its script * value — should inherit the script value of its base character. */ if (isSameScript(scriptCode, sc) || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) { index += UTF16.getCharCount(ch); /* * Inherited or Common becomes the script code of the surrounding text. */ if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) { scriptCode = sc; } } else { break; } } scriptLimit = index; return true; }
Example #30
Source File: Trie.java From fitnotifications with Apache License 2.0 | 5 votes |
/** * Gets the offset to data which the BMP character points to * Treats a lead surrogate as a normal code point. * @param ch BMP character * @return offset to data */ protected final int getBMPOffset(char ch) { return (ch >= UTF16.LEAD_SURROGATE_MIN_VALUE && ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) ? getRawOffset(LEAD_INDEX_OFFSET_, ch) : getRawOffset(0, ch); // using a getRawOffset(ch) makes no diff }