java.lang.Character.UnicodeBlock Java Examples

The following examples show how to use java.lang.Character.UnicodeBlock. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JRPdfExporter.java    From jasperreports with GNU Lesser General Public License v3.0 6 votes vote down vote up
protected void initGlyphRenderer() 
{
	glyphRendererBlocks = new HashSet<Character.UnicodeBlock>();
	List<PropertySuffix> props = propertiesUtil.getAllProperties(getCurrentJasperPrint(), 
			PdfReportConfiguration.PROPERTY_PREFIX_GLYPH_RENDERER_BLOCKS);
	for (PropertySuffix prop : props)
	{
		String blocks = prop.getValue();
		for (String blockToken : blocks.split(","))
		{
			UnicodeBlock block = resolveUnicodeBlock(blockToken);
			if (block != null)
			{
				if (log.isDebugEnabled())
				{
					log.debug("glyph renderer block " + block);
				}
				glyphRendererBlocks.add(block);
			}
		}
	}
}
 
Example #2
Source File: QueryHelper.java    From fess with Apache License 2.0 6 votes vote down vote up
protected QueryBuilder buildMatchPhraseQuery(final String f, final String text) {
    final FessConfig fessConfig = ComponentUtil.getFessConfig();
    if (text == null || text.length() != 1
            || (!fessConfig.getIndexFieldTitle().equals(f) && !fessConfig.getIndexFieldContent().equals(f))) {
        return QueryBuilders.matchPhraseQuery(f, text);
    }

    final UnicodeBlock block = UnicodeBlock.of(text.codePointAt(0));
    if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS //
            || block == UnicodeBlock.HIRAGANA //
            || block == UnicodeBlock.KATAKANA //
            || block == UnicodeBlock.HANGUL_SYLLABLES //
    ) {
        return QueryBuilders.prefixQuery(f, text);
    }
    return QueryBuilders.matchPhraseQuery(f, text);
}
 
Example #3
Source File: JRPdfExporter.java    From jasperreports with GNU Lesser General Public License v3.0 6 votes vote down vote up
protected UnicodeBlock resolveUnicodeBlock(String name)
{
	if (name.trim().isEmpty())
	{
		return null;
	}
	
	try 
	{
		return UnicodeBlock.forName(name.trim());
	} 
	catch (IllegalArgumentException e) 
	{
		log.warn("Could not resolve \"" + name + "\" to a Unicode block");
		return null;
	} 
}
 
Example #4
Source File: DefaultTeXFont.java    From AndroidMathKeyboard with Apache License 2.0 6 votes vote down vote up
private static void addAlphabet(Object base,
		Character.UnicodeBlock[] alphabet, String language)
		throws ResourceParseException, IOException {
	boolean b = false;
	for (int i = 0; !b && i < alphabet.length; i++) {
		b = loadedAlphabets.contains(alphabet[i]) || b;
	}
	if (!b) {
		TeXParser.isLoading = true;
		addTeXFontDescription(base,
				AjLatexMath.getAssetManager().open(language), language);
		for (int i = 0; i < alphabet.length; i++) {
			loadedAlphabets.add(alphabet[i]);
		}
		TeXParser.isLoading = false;
	}
}
 
Example #5
Source File: TTUnicodeRange.java    From jpexs-decompiler with GNU General Public License v3.0 6 votes vote down vote up
static public TTUnicodeRange of(long a_unicode) {
    initList();

    TTUnicodeRange retval = null;
    UnicodeBlock block = UnicodeBlock.of((int) a_unicode);
    if (block == null) {
        return retval;
    }

    int i;
    for (i = 0; i < s_list.size(); i++) {
        TTUnicodeRange range = s_list.get(i);
        if (range.m_block.equals(block)) {
            return range;
        }
    }

    return retval;
}
 
Example #6
Source File: DefaultTeXFont.java    From FlexibleRichTextView with Apache License 2.0 6 votes vote down vote up
private static void addAlphabet(Object base,
		Character.UnicodeBlock[] alphabet, String language)
		throws ResourceParseException, IOException {
	boolean b = false;
	for (int i = 0; !b && i < alphabet.length; i++) {
		b = loadedAlphabets.contains(alphabet[i]) || b;
	}
	if (!b) {
		TeXParser.isLoading = true;
		addTeXFontDescription(base,
				AjLatexMath.getAssetManager().open(language), language);
		for (int i = 0; i < alphabet.length; i++) {
			loadedAlphabets.add(alphabet[i]);
		}
		TeXParser.isLoading = false;
	}
}
 
Example #7
Source File: KoreanUnitParser.java    From KOMORAN with Apache License 2.0 6 votes vote down vote up
public List<Pair<Character, UnitType>> parseWithType(String str) {
    List<Pair<Character, UnitType>> result = new ArrayList<>();

    int length = str.length();
    for (int i = 0; i < length; i++) {
        char ch = str.charAt(i);
        UnicodeBlock block = UnicodeBlock.of(ch);
        if (block == UnicodeBlock.HANGUL_SYLLABLES) {
            int cho, jung, jong, tmp;
            tmp = ch - 0xAC00;
            cho = tmp / (21 * 28);
            tmp = tmp % (21 * 28);
            jung = tmp / 28;
            jong = tmp % 28;
            result.add(new Pair<>(ChoSung[cho], UnitType.CHOSUNG));
            result.add(new Pair<>(JungSung[jung], UnitType.JUNGSUNG));
            if (jong != 0) {
                result.add(new Pair<>(JongSung[jong], UnitType.JONGSUNG));
            }
        } else {
            result.add(new Pair<>(ch, UnitType.OTHER));
        }
    }
    return result;
}
 
Example #8
Source File: ArrowKeyMovementMethod.java    From JotaTextEditor with Apache License 2.0 5 votes vote down vote up
private static int findWordStart(CharSequence text, int start) {
    if ( text.length() <= start ){
        return start;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));

    for (; start > 0; start--) {
        char c = text.charAt(start - 1);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return start;
}
 
Example #9
Source File: JRPdfExporter.java    From jasperreports with GNU Lesser General Public License v3.0 5 votes vote down vote up
protected boolean toUseGlyphRenderer(JRPrintText text)
{
	String value = styledTextUtil.getTruncatedText(text);
	if (value == null)
	{
		return false;
	}
	
	if (glyphRendererBlocks.isEmpty())
	{
		return false;
	}
	
	int charCount = value.length();
	char[] chars = new char[charCount];
	value.getChars(0, charCount, chars, 0);
	for (char c : chars)
	{
		UnicodeBlock block = UnicodeBlock.of(c);
		if (glyphRendererBlocks.contains(block))
		{
			if (log.isTraceEnabled())
			{
				log.trace("found character in block " + block + ", using the glyph renderer");
			}
			
			return true;
		}
	}
	
	return false;
}
 
Example #10
Source File: SimpleTextLineWrapper.java    From jasperreports with GNU Lesser General Public License v3.0 5 votes vote down vote up
protected boolean hasComplexLayout(char[] chars)
{
	UnicodeBlock prevBlock = null;
	for (int i = 0; i < chars.length; i++)
	{
		char ch = chars[i];
		if (ch >= COMPEX_LAYOUT_START_CHAR && ch <= COMPEX_LAYOUT_END_CHAR)
		{
			//FIXME use icu4j or CharPredicateCache
			UnicodeBlock chBlock = Character.UnicodeBlock.of(ch);
			if (chBlock == null)
			{
				// being conservative
				return true;
			}
			
			// if the same block as the previous block, avoid going to the hash set
			// this could offer some speed improvement
			if (prevBlock != chBlock)
			{
				prevBlock = chBlock;
				
				if (!simpleLayoutBlocks.contains(chBlock))
				{
					return true;
				}
			}
		}
	}
	return false;
}
 
Example #11
Source File: PhoneNumberMatcher.java    From libphonenumber-android with Apache License 2.0 5 votes vote down vote up
/**
 * Helper method to determine if a character is a Latin-script letter or not. For our purposes,
 * combining marks should also return true since we assume they have been added to a preceding
 * Latin character.
 */
// @VisibleForTesting
static boolean isLatinLetter(char letter) {
  // Combining marks are a subset of non-spacing-mark.
  if (!Character.isLetter(letter) && Character.getType(letter) != Character.NON_SPACING_MARK) {
    return false;
  }
  UnicodeBlock block = UnicodeBlock.of(letter);
  return block.equals(UnicodeBlock.BASIC_LATIN)
      || block.equals(UnicodeBlock.LATIN_1_SUPPLEMENT)
      || block.equals(UnicodeBlock.LATIN_EXTENDED_A)
      || block.equals(UnicodeBlock.LATIN_EXTENDED_ADDITIONAL)
      || block.equals(UnicodeBlock.LATIN_EXTENDED_B)
      || block.equals(UnicodeBlock.COMBINING_DIACRITICAL_MARKS);
}
 
Example #12
Source File: StringTypeTest.java    From qpid-proton-j with Apache License 2.0 5 votes vote down vote up
/**
 * Loop over all the chars in given {@link UnicodeBlock}s and return a {@link Set <String>}
 * containing all the possible values as their {@link String} values.
 *
 * @param blocks
 *        the {@link UnicodeBlock}s to loop over
 * @return a {@link Set <String>} containing all the possible values as {@link String}
 *         values
 */
private static Set<String> getAllStringsFromUnicodeBlocks(final UnicodeBlock... blocks)
{
    final Set<UnicodeBlock> blockSet = new HashSet<>(Arrays.asList(blocks));
    final Set<String> strings = new HashSet<>();
    for (int codePoint = 0; codePoint <= Character.MAX_CODE_POINT; codePoint++)
    {
        if (blockSet.contains(UnicodeBlock.of(codePoint)))
        {
            final int charCount = Character.charCount(codePoint);
            final StringBuilder sb = new StringBuilder(
                    charCount);
            if (charCount == 1)
            {
                sb.append(String.valueOf((char) codePoint));
            }
            else if (charCount == 2)
            {
                sb.append(Character.highSurrogate(codePoint));
                sb.append(Character.lowSurrogate(codePoint));
            }
            else
            {
                throw new IllegalArgumentException("Character.charCount of "
                                                   + charCount + " not supported.");
            }
            strings.add(sb.toString());
        }
    }
    return strings;
}
 
Example #13
Source File: StringTypeTest.java    From qpid-proton-j with Apache License 2.0 5 votes vote down vote up
private static List<String> generateTestData()
{
    return new LinkedList<String>()
    {
        private static final long serialVersionUID = 7331717267070233454L;
        {
            // non-surrogate pair blocks
            addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.BASIC_LATIN,
                                                 UnicodeBlock.LATIN_1_SUPPLEMENT,
                                                 UnicodeBlock.GREEK,
                                                 UnicodeBlock.LETTERLIKE_SYMBOLS));
            // blocks with surrogate pairs
            addAll(getAllStringsFromUnicodeBlocks(UnicodeBlock.LINEAR_B_SYLLABARY,
                                                 UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
                                                 UnicodeBlock.MUSICAL_SYMBOLS,
                                                 UnicodeBlock.EMOTICONS,
                                                 UnicodeBlock.PLAYING_CARDS,
                                                 UnicodeBlock.BOX_DRAWING,
                                                 UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS,
                                                 UnicodeBlock.PRIVATE_USE_AREA,
                                                 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A,
                                                 UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B));
            // some additional combinations of characters that could cause problems to the encoder
            String[] boxDrawing = getAllStringsFromUnicodeBlocks(UnicodeBlock.BOX_DRAWING).toArray(new String[0]);
            String[] halfFullWidthForms = getAllStringsFromUnicodeBlocks(UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS).toArray(new String[0]);
            for (int i = 0; i < halfFullWidthForms.length; i++)
            {
                add(halfFullWidthForms[i] + boxDrawing[i % boxDrawing.length]);
            }
        }
    };
}
 
Example #14
Source File: KoreanUnitParser.java    From KOMORAN with Apache License 2.0 5 votes vote down vote up
@Override
public String parse(String str) {

    StringBuffer result = new StringBuffer();

    int i = 0;
    int length = str.length();
    for (i = 0; i < length; i++) {
        char ch = str.charAt(i);
        UnicodeBlock block = UnicodeBlock.of(ch);
        if (block == UnicodeBlock.HANGUL_SYLLABLES) {
            int cho, jung, jong, tmp;
            tmp = ch - 0xAC00;
            cho = tmp / (21 * 28);
            tmp = tmp % (21 * 28);
            jung = tmp / 28;
            jong = tmp % 28;
            result.append(ChoSung[cho]);
            result.append(JungSung[jung]);
            if (jong != 0) {
                result.append(JongSung[jong]);
            }
        } else {
            result.append(ch);
        }
    }
    return result.toString();
}
 
Example #15
Source File: CorpusBuilder.java    From KOMORAN with Apache License 2.0 5 votes vote down vote up
/**
 * 불규칙 사전에 추가
 *
 * @param paPair
 */
private void appendIrregularDictionary(ProblemAnswerPair paPair) {
    if (this.isIrregular(paPair.getProblem(), paPair.getAnswerList())) {
        // 자소 단위로 변환하여 불규칙 패턴 추출
        List<Pair<String, String>> irrRuleList = irrParser.parse(
                this.convertJaso(paPair.getProblem()),
                this.convertJaso(paPair.getAnswerList()));
        for (Pair<String, String> pair : irrRuleList) {
            //트레이닝 셋의 오류로 인한 skip(세종 코퍼스 기준)
            if (pair.getSecond().trim().length() == 0) {
            } else {
                //불규칙 대상에 자소 단위가 포함된 경우 skip
                if (this.irrExclusiveSet.contains(pair.getFirst() + "\t" + pair.getSecond().substring(0, pair.getSecond().lastIndexOf("/")))) {
                    continue;
                }
                boolean hasJamoProblem = false;
                String tmpProblem = this.unitParser.combine(pair.getFirst());
                for (int i = 0; i < tmpProblem.length(); i++) {
                    if (StringUtil.getUnicodeBlock(tmpProblem.charAt(i)) == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO) {
                        hasJamoProblem = true;
                        break;
                    }
                }
                if (hasJamoProblem) continue;
                //놓으 -> 놓+으시와 같은 경우 skip
                //않으 -> 않+으시
                if (pair.getFirst().endsWith("ㅇㅡ") && pair.getSecond().endsWith("ㅇㅡㅅㅣ/EP")) {
                    continue;
                }
                irrDic.append(this.unitParser.combine(pair.getFirst()), this.unitParser.combine(pair.getSecond()));
                //					irrDic.append(pair.getFirst(), pair.getSecond());
            }
        }
    }
}
 
Example #16
Source File: CorpusBuilder.java    From KOMORAN with Apache License 2.0 5 votes vote down vote up
/**
     * 단어 사전에 형태소, 품사 쌍 데이터 추가
     *
     * @param answerList
     */
    private void appendWordDictionary(List<Pair<String, String>> answerList) {
        for (Pair<String, String> pair : answerList) {
            if (pair.getFirst().trim().length() == 1) {
                if (StringUtil.getUnicodeBlock(pair.getFirst().trim().charAt(0)) == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO
                        && pair.getSecond().contains("NN")) {
                    continue;
                }
            }

            if (pair.getSecond().equals("SH") ||
                    pair.getSecond().equals("SN") ||
                    pair.getSecond().equals("SL")) {
                continue;
            }

            //analyzer와 의존성이 있는 관계로 rule parser에 해당 내용이 포함되어 있어야함
            //근데 이걸 하면 빨라질까?
//			if(pair.getSecond().equals("SF")	//마침표, 물음표, 느낌표 . ? !
//					|| pair.getSecond().equals("SP")	//쉼표, 가운뎃점, 콜론, 빗금 , / ; :
//					|| pair.getSecond().equals("SS")	//따옴표, 괄호표, 줄표 " ' ` - < > { } [ ] ( )
//					|| pair.getSecond().equals("SO") 	//붙임표(물결, 숨김, 빠짐) ~
//					){	//줄임표 ...
//				continue;
//			}

            wordDic.append(pair.getFirst(), pair.getSecond());
        }
    }
 
Example #17
Source File: NGram.java    From language-detection with Apache License 2.0 5 votes vote down vote up
/**
 * Character Normalization
 * @param ch
 * @return Normalized character
 */
static public char normalize(char ch) {
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch<'A' || (ch<'a' && ch >'Z') || ch>'z') ch = ' ';
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch)>=0) ch = ' ';
    } else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
        // normalization for Romanian
        if (ch == '\u0219') ch = '\u015f';  // Small S with comma below => with cedilla
        if (ch == '\u021b') ch = '\u0163';  // Small T with comma below => with cedilla
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') ch = '\u064a';  // Farsi yeh => Arabic yeh
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') ch = '\u1ec3';
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
Example #18
Source File: ArrowKeyMovementMethod.java    From JotaTextEditor with Apache License 2.0 5 votes vote down vote up
private static int findWordEnd(CharSequence text, int end) {
    int len = text.length();

    if ( len <= end ){
        return end;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(end));

    for (; end < len; end++) {
        char c = text.charAt(end);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return end;
}
 
Example #19
Source File: LangDetection.java    From fnlp with GNU Lesser General Public License v3.0 5 votes vote down vote up
private static boolean isChinese(char c) {
	UnicodeBlock ub = UnicodeBlock.of(c);
	if(ub==UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS ||
		ub == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS||
		ub == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A||
		ub == UnicodeBlock.GENERAL_PUNCTUATION||
		ub == UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION||
		ub == UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS)
		return true;
	return false;
}
 
Example #20
Source File: TTUnicodeRange.java    From jpexs-decompiler with GNU General Public License v3.0 5 votes vote down vote up
public TTUnicodeRange(UnicodeBlock a_block,
        long a_start,
        long a_end,
        int a_osTwoFlag) {
    m_block = a_block;
    m_start = a_start;
    m_end = a_end;
    m_osTwoFlag = a_osTwoFlag;
}
 
Example #21
Source File: TTUnicodeRange.java    From jpexs-decompiler with GNU General Public License v3.0 5 votes vote down vote up
public TTUnicodeRange(UnicodeBlock a_block,
        long a_start,
        long a_end,
        int a_osTwoFlag,
        int a_codePageFlag) {
    m_block = a_block;
    m_start = a_start;
    m_end = a_end;
    m_osTwoFlag = a_osTwoFlag;
    m_codePageFlag = a_codePageFlag;
}
 
Example #22
Source File: NGram.java    From weslang with Apache License 2.0 5 votes vote down vote up
/**
 * Character Normalization
 * @param ch
 * @return Normalized character
 */
static public char normalize(char ch) {
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch<'A' || (ch<'a' && ch >'Z') || ch>'z') ch = ' ';
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch)>=0) ch = ' ';
    } else if (block == UnicodeBlock.LATIN_EXTENDED_B) {
        // normalization for Romanian
        if (ch == '\u0219') ch = '\u015f';  // Small S with comma below => with cedilla
        if (ch == '\u021b') ch = '\u0163';  // Small T with comma below => with cedilla
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') ch = '\u064a';  // Farsi yeh => Arabic yeh
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') ch = '\u1ec3';
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) ch = cjk_map.get(ch);
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
Example #23
Source File: NGram.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 5 votes vote down vote up
public static char normalize(char c) {
    char ch = c;
    Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
    if (block == UnicodeBlock.BASIC_LATIN) {
        if (ch < 'A' || (ch < 'a' && ch > 'Z') || ch > 'z') {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.LATIN_1_SUPPLEMENT) {
        if (LATIN1_EXCLUDED.indexOf(ch) >= 0) {
            ch = ' ';
        }
    } else if (block == UnicodeBlock.GENERAL_PUNCTUATION) {
        ch = ' ';
    } else if (block == UnicodeBlock.ARABIC) {
        if (ch == '\u06cc') {
            ch = '\u064a';
        }
    } else if (block == UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) {
        if (ch >= '\u1ea0') {
            ch = '\u1ec3';
        }
    } else if (block == UnicodeBlock.HIRAGANA) {
        ch = '\u3042';
    } else if (block == UnicodeBlock.KATAKANA) {
        ch = '\u30a2';
    } else if (block == UnicodeBlock.BOPOMOFO || block == UnicodeBlock.BOPOMOFO_EXTENDED) {
        ch = '\u3105';
    } else if (block == UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
        if (cjk_map.containsKey(ch)) {
            ch = cjk_map.get(ch);
        }
    } else if (block == UnicodeBlock.HANGUL_SYLLABLES) {
        ch = '\uac00';
    }
    return ch;
}
 
Example #24
Source File: KoreanUnitParser.java    From komoran-2.0 with Apache License 2.0 5 votes vote down vote up
@Override
public String parse(String str) {
	
	StringBuffer result = new StringBuffer();
	
	int i=0;
	int length = str.length();
	for(i=0;i<length;i++){
		char ch = str.charAt(i);
		Character.UnicodeBlock block = Character.UnicodeBlock.of(ch);
		if(block == UnicodeBlock.HANGUL_SYLLABLES){
			int cho,jung,jong,tmp;
			tmp = ch - 0xAC00;
			cho = tmp / (21*28);
			tmp = tmp % (21*28);			
			jung = tmp / 28;
			jong = tmp % 28;
			result.append(ChoSung[cho]);
			result.append(JungSung[jung]);
			if(jong != 0){
				result.append(JongSung[jong]);
			}
		}else{
			result.append(ch);
		}
	}
	return result.toString();
}
 
Example #25
Source File: StringUtil.java    From SoloPi with Apache License 2.0 5 votes vote down vote up
private static boolean checkCharContainChinese(char checkChar){
    UnicodeBlock ub = UnicodeBlock.of(checkChar);
    if(UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS == ub ||
            UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS == ub ||
            UnicodeBlock.CJK_COMPATIBILITY_FORMS == ub ||
            UnicodeBlock.CJK_RADICALS_SUPPLEMENT == ub ||
            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A == ub ||
            UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B == ub){
        return true;
    }
    return false;
}
 
Example #26
Source File: TeXFormula.java    From AndroidMathKeyboard with Apache License 2.0 5 votes vote down vote up
public static void registerExternalFont(Character.UnicodeBlock block,
		String sansserif, String serif) {
	if (sansserif == null && serif == null) {
		externalFontMap.remove(block);
		return;
	}
	externalFontMap.put(block, new FontInfos(sansserif, serif));
	if (block.equals(Character.UnicodeBlock.BASIC_LATIN)) {
		predefinedTeXFormulas.clear();
	}
}
 
Example #27
Source File: TeXFormula.java    From AndroidMathKeyboard with Apache License 2.0 5 votes vote down vote up
public static FontInfos getExternalFont(Character.UnicodeBlock block) {
	FontInfos infos = externalFontMap.get(block);
	if (infos == null) {
		infos = new FontInfos("SansSerif", "Serif");
		externalFontMap.put(block, infos);
	}

	return infos;
}
 
Example #28
Source File: ArrowKeyMovementMethod.java    From PowerFileExplorer with GNU General Public License v3.0 5 votes vote down vote up
private static int findWordStart(CharSequence text, int start) {
    if ( text.length() <= start ){
        return start;
    }

    UnicodeBlock c0 = UnicodeBlock.of(text.charAt(start));

    for (; start > 0; start--) {
        char c = text.charAt(start - 1);
        UnicodeBlock cb = UnicodeBlock.of(c);
        if ( c0 == UnicodeBlock.BASIC_LATIN ){
            int type = Character.getType(c);

            if (c != '\'' &&
                type != Character.UPPERCASE_LETTER &&
                type != Character.LOWERCASE_LETTER &&
                type != Character.TITLECASE_LETTER &&
                type != Character.MODIFIER_LETTER &&
                type != Character.DECIMAL_DIGIT_NUMBER) {
                break;
            }
        }else if ( c0 != cb ){
            break;
        }
    }

    return start;
}
 
Example #29
Source File: TeXFormula.java    From FlexibleRichTextView with Apache License 2.0 5 votes vote down vote up
public static void registerExternalFont(Character.UnicodeBlock block,
		String sansserif, String serif) {
	if (sansserif == null && serif == null) {
		externalFontMap.remove(block);
		return;
	}
	externalFontMap.put(block, new FontInfos(sansserif, serif));
	if (block.equals(Character.UnicodeBlock.BASIC_LATIN)) {
		predefinedTeXFormulas.clear();
	}
}
 
Example #30
Source File: TeXFormula.java    From FlexibleRichTextView with Apache License 2.0 5 votes vote down vote up
public static FontInfos getExternalFont(Character.UnicodeBlock block) {
	FontInfos infos = externalFontMap.get(block);
	if (infos == null) {
		infos = new FontInfos("SansSerif", "Serif");
		externalFontMap.put(block, infos);
	}

	return infos;
}