Java Code Examples for com.ibm.icu.lang.UScript#COMMON

The following examples show how to use com.ibm.icu.lang.UScript#COMMON . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ScriptIterator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Iterates to the next script run, returning true if one exists.
 * 
 * @return true if there is another script run, false otherwise.
 */
boolean next() {
  if (scriptLimit >= limit)
    return false;

  scriptCode = UScript.COMMON;
  scriptStart = scriptLimit;

  while (index < limit) {
    final int ch = UTF16.charAt(text, start, limit, index - start);
    final int sc = getScript(ch);

    /*
     * From UTR #24: Implementations that determine the boundaries between
     * characters of given scripts should never break between a non-spacing
     * mark and its base character. Thus for boundary determinations and
     * similar sorts of processing, a non-spacing mark — whatever its script
     * value — should inherit the script value of its base character.
     */
    if (isSameScript(scriptCode, sc)
        || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
      index += UTF16.getCharCount(ch);

      /*
       * Inherited or Common becomes the script code of the surrounding text.
       */
      if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
        scriptCode = sc;
      }

    } else {
      break;
    }
  }

  scriptLimit = index;
  return true;
}
 
Example 2
Source File: ScriptIterator.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * Iterates to the next script run, returning true if one exists.
 *
 * @return true if there is another script run, false otherwise.
 */
boolean next() {
    if (scriptLimit >= limit) {
        return false;
    }
    scriptCode = UScript.COMMON;
    scriptStart = scriptLimit;
    while (index < limit) {
        final int ch = UTF16.charAt(text, start, limit, index - start);
        final int sc = getScript(ch);
        /*
         * From UTR #24: Implementations that determine the boundaries between
         * characters of given scripts should never break between a non-spacing
         * mark and its base character. Thus for boundary determinations and
         * similar sorts of processing, a non-spacing mark — whatever its script
         * value — should inherit the script value of its base character.
         */
        if (isSameScript(scriptCode, sc)
                || UCharacter.getType(ch) == ECharacterCategory.NON_SPACING_MARK) {
            index += UTF16.getCharCount(ch);
            /*
             * Inherited or Common becomes the script code of the surrounding text.
             */
            if (scriptCode <= UScript.INHERITED && sc > UScript.INHERITED) {
                scriptCode = sc;
            }
        } else {
            break;
        }
    }
    scriptLimit = index;
    return true;
}
 
Example 3
Source File: UnicodeData.java    From es6draft with MIT License 5 votes vote down vote up
@Override
public boolean has(int codePoint, int value) {
    // See https://ssl.icu-project.org/trac/ticket/13462
    switch (codePoint) {
    case 0x3000:
    case 0x3004:
    case 0x3012:
    case 0x3020:
    case 0x3036:
        return value == UScript.COMMON;
    }
    return super.has(codePoint, value);
}
 
Example 4
Source File: UnicodeData.java    From es6draft with MIT License 5 votes vote down vote up
@Override
public boolean has(int codePoint, int value) {
    // See https://ssl.icu-project.org/trac/ticket/13462
    switch (codePoint) {
    case 0x3000:
    case 0x3004:
    case 0x3012:
    case 0x3020:
    case 0x3036:
        return value == UScript.COMMON;
    }
    return UScript.hasScript(codePoint, value);
}
 
Example 5
Source File: AnyTransliterator.java    From fitnotifications with Apache License 2.0 4 votes vote down vote up
/**
 * Returns TRUE if there are any more runs.  TRUE is always
 * returned at least once.  Upon return, the caller should
 * examine scriptCode, start, and limit.
 */
public boolean next() {
    int ch;
    int s;

    scriptCode = UScript.INVALID_CODE; // don't know script yet
    start = limit;

    // Are we done?
    if (start == textLimit) {
        return false;
    }

    // Move start back to include adjacent COMMON or INHERITED
    // characters
    while (start > textStart) {
        ch = text.char32At(start - 1); // look back
        s = UScript.getScript(ch);
        if (s == UScript.COMMON || s == UScript.INHERITED) {
            --start;
        } else {
            break;
        }
    }

    // Move limit ahead to include COMMON, INHERITED, and characters
    // of the current script.
    while (limit < textLimit) {
        ch = text.char32At(limit); // look ahead
        s = UScript.getScript(ch);
        if (s != UScript.COMMON && s != UScript.INHERITED) {
            if (scriptCode == UScript.INVALID_CODE) {
                scriptCode = s;
            } else if (s != scriptCode) {
                break;
            }
        }
        ++limit;
    }

    // Return TRUE even if the entire text is COMMON / INHERITED, in
    // which case scriptCode will be UScript.INVALID_CODE.
    return true;
}
 
Example 6
Source File: ScriptAttributeImpl.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void clear() {
  code = UScript.COMMON;
}
 
Example 7
Source File: CharScriptsSet.java    From jasperreports with GNU Lesser General Public License v3.0 4 votes vote down vote up
public boolean includesCharacter(int codePoint)
{
	if (includedScripts == null && excludedScripts == null)
	{
		return true;
	}
	
	int codeScript = UScript.getScript(codePoint);
	if (codeScript == UScript.UNKNOWN)
	{
		//include by default
		return true;
	}
	
	if (codeScript == UScript.COMMON)
	{
		//COMMON is included unless explicitly excluded
		return !excludedCommon;
	}
	
	if (codeScript == UScript.INHERITED)
	{
		//INHERITED is included unless explicitly excluded
		return !excludedInherited;
	}
	
	if (includedScripts != null && includedScripts.contains(codeScript))
	{
		//the codepoint script is explicitly included
		return true;
	}
	
	if (excludedScripts != null && excludedScripts.contains(codeScript))
	{
		//the codepoint script is explicitly excluded
		return false;
	}
	
	if (includedScripts == null)
	{
		//not excluded
		return true;
	}
	
	for (Integer script : includedScripts)
	{
		if (UScript.hasScript(codePoint, script))
		{
			//included as a secondary/extension script
			return true;
		}
	}
	
	//not included
	return false;
}
 
Example 8
Source File: ScriptAttributeImpl.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public void clear() {
    code = UScript.COMMON;
}