Java Code Examples for com.ibm.icu.lang.UScript#JAPANESE

The following examples show how to use com.ibm.icu.lang.UScript#JAPANESE . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ScriptIterator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** fast version of UScript.getScript(). Basic Latin is an array lookup */
private int getScript(int codepoint) {
  if (0 <= codepoint && codepoint < basicLatin.length) {
    return basicLatin[codepoint];
  } else {
    int script = UScript.getScript(codepoint);
    if (combineCJ) {
      if (script == UScript.HAN || script == UScript.HIRAGANA || script == UScript.KATAKANA) {
        return UScript.JAPANESE;
      } else if (codepoint >= 0xFF10 && codepoint <= 0xFF19) {
        // when using CJK dictionary breaking, don't let full width numbers go to it, otherwise
        // they are treated as punctuation. we currently have no cleaner way to fix this!
        return UScript.LATIN; 
      } else {
        return script;
      }
    } else {
      return script;
    }
  }
}
 
Example 2
Source File: ScriptIterator.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * fast version of UScript.getScript(). Basic Latin is an array lookup
 */
private int getScript(int codepoint) {
    if (0 <= codepoint && codepoint < basicLatin.length) {
        return basicLatin[codepoint];
    } else {
        int script = UScript.getScript(codepoint);
        if (combineCJ) {
            if (script == UScript.HAN || script == UScript.HIRAGANA || script == UScript.KATAKANA) {
                return UScript.JAPANESE;
            } else if (codepoint >= 0xFF10 && codepoint <= 0xFF19) {
                // when using CJK dictionary breaking, don't let full width numbers go to it, otherwise
                // they are treated as punctuation. we currently have no cleaner way to fix this!
                return UScript.LATIN;
            } else {
                return script;
            }
        } else {
            return script;
        }
    }
}
 
Example 3
Source File: ScriptAttributeImpl.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void reflectWith(AttributeReflector reflector) {
  // when wordbreaking CJK, we use the 15924 code Japanese (Han+Hiragana+Katakana) to 
  // mark runs of Chinese/Japanese. our use is correct (as for chinese Han is a subset), 
  // but this is just to help prevent confusion.
  String name = code == UScript.JAPANESE ? "Chinese/Japanese" : getName();
  reflector.reflect(ScriptAttribute.class, "script", name);
}
 
Example 4
Source File: DefaultICUTokenizerConfig.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public RuleBasedBreakIterator getBreakIterator(int script) {
  switch(script) {
    case UScript.JAPANESE: return (RuleBasedBreakIterator)cjkBreakIterator.clone();
    case UScript.MYANMAR: 
      if (myanmarAsWords) {
        return (RuleBasedBreakIterator)defaultBreakIterator.clone();
      } else {
        return (RuleBasedBreakIterator)myanmarSyllableIterator.clone();
      }
    default: return (RuleBasedBreakIterator)defaultBreakIterator.clone();
  }
}
 
Example 5
Source File: DefaultIcuTokenizerConfig.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
public BreakIterator getBreakIterator(int script) {
    switch (script) {
        case UScript.MYANMAR:
            if (myanmarAsWords) {
                return (BreakIterator) defaultBreakIterator.clone();
            } else {
                return (BreakIterator) myanmarSyllableIterator.clone();
            }
        case UScript.JAPANESE:
            return (BreakIterator) cjkBreakIterator.clone();
        default:
            return (BreakIterator) defaultBreakIterator.clone();
    }
}
 
Example 6
Source File: ScriptAttributeImpl.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public void reflectWith(AttributeReflector reflector) {
    String name = code == UScript.JAPANESE ? "Chinese/Japanese" : getName();
    reflector.reflect(ScriptAttribute.class, "script", name);
}