Java Code Examples for net.sourceforge.pinyin4j.PinyinHelper#toHanyuPinyinStringArray()

The following examples show how to use net.sourceforge.pinyin4j.PinyinHelper#toHanyuPinyinStringArray() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PinYinUtil.java    From MicroCommunity with Apache License 2.0 6 votes vote down vote up
/**
 * 获取汉字串拼音首字母,英文字符不变
 * @param chinese 汉字串
 * @return 汉语拼音首字母
 */
public static String getFirstSpell(String chinese) {
    StringBuffer pybf = new StringBuffer();
    char[] arr = chinese.toCharArray();
    HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
    defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    for (int i = 0; i < arr.length; i++) {
        if (arr[i] > 128) {
            try {
                String[] temp = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
                if (temp != null) {
                    pybf.append(temp[0].charAt(0));
                }
            } catch (BadHanyuPinyinOutputFormatCombination e) {
                e.printStackTrace();
            }
        } else {
            pybf.append(arr[i]);
        }
    }
    return pybf.toString().replaceAll("\\W", "").trim();
}
 
Example 2
Source File: PinYinUtil.java    From code with Apache License 2.0 6 votes vote down vote up
/**
 * @param chars 汉字字符数组
 * @param pinyinFormat 汉字转拼音格式化模式
 */
public static String words2Pinyin(char[] chars, HanyuPinyinOutputFormat pinyinFormat) {
    StringBuilder pinyinBuilder = new StringBuilder();
    try {
        for (char word : chars) {
            //是否为汉字字符
            if (Character.toString(word).matches("[\\u4E00-\\u9FA5]+")) {
                // 多音字
                String[] py = PinyinHelper.toHanyuPinyinStringArray(word, pinyinFormat);
                pinyinBuilder.append(py[0]);
            } else {
                pinyinBuilder.append(word);
            }
        }
    } catch (BadHanyuPinyinOutputFormatCombination e) {
        e.printStackTrace();
    }
    return pinyinBuilder.toString();
}
 
Example 3
Source File: PinyinUtils.java    From springboot-admin with Apache License 2.0 6 votes vote down vote up
/**
 * 将文字转为汉语拼音
 * @param ChineseLanguage 要转成拼音的中文
 */
private static String toHanyuPinyin(String ChineseLanguage, HanyuPinyinCaseType caseType){
    char[] cl_chars = ChineseLanguage.trim().toCharArray();
    String hanyupinyin = "";
    HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
    defaultFormat.setCaseType(caseType);
    defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);// 不带声调
    defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V) ;
    try {
        for (int i=0; i<cl_chars.length; i++){
            if (String.valueOf(cl_chars[i]).matches("[\u4e00-\u9fa5]+")){// 如果字符是中文,则将中文转为汉语拼音
                hanyupinyin += PinyinHelper.toHanyuPinyinStringArray(cl_chars[i], defaultFormat)[0];
            } else {// 如果字符不是中文,则不转换
                hanyupinyin += cl_chars[i];
            }
        }
    } catch (BadHanyuPinyinOutputFormatCombination e) {
        e.printStackTrace();
    }
    return hanyupinyin;
}
 
Example 4
Source File: ChinesUtil.java    From utils with Apache License 2.0 6 votes vote down vote up
/**
 * 获取汉字串拼音首字母,英文字符不变.
 *
 * @param chinese 汉字串
 * @return 汉语拼音首字母
 */
public static String getFirstSpell(String chinese) {
    StringBuffer pybf = new StringBuffer();
    char[] arr = chinese.toCharArray();
    HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
    defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    for (int i = 0; i < arr.length; i++) {
        if (arr[i] > 128) {
            try {
                String[] temp = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
                if (temp != null) {
                    pybf.append(temp[0].charAt(0));
                }
            } catch (BadHanyuPinyinOutputFormatCombination e) {
                e.printStackTrace();
            }
        } else {
            pybf.append(arr[i]);
        }
    }
    return pybf.toString().replaceAll("\\W", "").trim();
}
 
Example 5
Source File: CharacterParser.java    From LoveTalkClient with Apache License 2.0 5 votes vote down vote up
public static String getPinYinHeadChar(String str) {

		String convert = "";
		for (int j = 0; j < str.length(); j++) {
			char word = str.charAt(j);
			String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(word);
			if (pinyinArray != null) {
				convert += pinyinArray[0].charAt(0);
			} else {
				convert += word;
			}
		}
		return convert;
	}
 
Example 6
Source File: PinyinUtil.java    From jeecg with Apache License 2.0 5 votes vote down vote up
/**
 * 获取拼音集合
 * 
 * @author wyh
 * @param src
 * @return Set<String>
 */
public static Set<String> getPinyin(String src) {
	if (src != null && !src.trim().equalsIgnoreCase("")) {
		char[] srcChar;
		srcChar = src.toCharArray();
		// 汉语拼音格式输出类
		HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat();

		// 输出设置,大小写,音标方式等
		hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
		hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
		hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);

		String[][] temp = new String[src.length()][];
		for (int i = 0; i < srcChar.length; i++) {
			char c = srcChar[i];
			// 是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z)
			if (String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")) {
				try {
					temp[i] = PinyinHelper.toHanyuPinyinStringArray(
							srcChar[i], hanYuPinOutputFormat);
				} catch (BadHanyuPinyinOutputFormatCombination e) {
					e.printStackTrace();
				}
			} else if (((int) c >= 65 && (int) c <= 90)
					|| ((int) c >= 97 && (int) c <= 122)) {
				temp[i] = new String[] { String.valueOf(srcChar[i]) };
			} else {
				temp[i] = new String[] { "" };
			}
		}
		String[] pingyinArray = Exchange(temp);
		Set<String> pinyinSet = new HashSet<String>();
		for (int i = 0; i < pingyinArray.length; i++) {
			pinyinSet.add(pingyinArray[i]);
		}
		return pinyinSet;
	}
	return null;
}
 
Example 7
Source File: PinyinUtil.java    From jeecg with Apache License 2.0 5 votes vote down vote up
/**
 * 提取每个汉字的首字母
 * 
 * @param str
 * @return String
 */
public static String getPinYinHeadChar(String str) {
	String convert = "";
	for (int j = 0; j < str.length(); j++) {
		char word = str.charAt(j);
		// 提取汉字的首字母
		String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(word);
		if (pinyinArray != null) {
			convert += pinyinArray[0].charAt(0);
		} else {
			convert += word;
		}
	}
	return convert;
}
 
Example 8
Source File: PinyinUtil.java    From jeecg with Apache License 2.0 5 votes vote down vote up
/**
 * 将汉字转换为全拼
 * 
 * @param src
 * @return String
 */
public static String getPinYin(String src) {
	char[] t1 = null;
	t1 = src.toCharArray();
	// org.jeecgframework.core.util.LogUtil.info(t1.length);
	String[] t2 = new String[t1.length];
	// org.jeecgframework.core.util.LogUtil.info(t2.length);
	// 设置汉字拼音输出的格式
	HanyuPinyinOutputFormat t3 = new HanyuPinyinOutputFormat();
	t3.setCaseType(HanyuPinyinCaseType.LOWERCASE);
	t3.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
	t3.setVCharType(HanyuPinyinVCharType.WITH_V);
	String t4 = "";
	int t0 = t1.length;
	try {
		for (int i = 0; i < t0; i++) {
			// 判断能否为汉字字符
			// org.jeecgframework.core.util.LogUtil.info(t1[i]);
			if (Character.toString(t1[i]).matches("[\\u4E00-\\u9FA5]+")) {
				t2 = PinyinHelper.toHanyuPinyinStringArray(t1[i], t3);// 将汉字的几种全拼都存到t2数组中
				t4 += t2[0];// 取出该汉字全拼的第一种读音并连接到字符串t4后
			} else {
				// 如果不是汉字字符,间接取出字符并连接到字符串t4后
				t4 += Character.toString(t1[i]);
			}
		}
	} catch (BadHanyuPinyinOutputFormatCombination e) {
		e.printStackTrace();
	}
	return t4;
}
 
Example 9
Source File: HanyuPinyin.java    From o2oa with GNU Affero General Public License v3.0 5 votes vote down vote up
public String getHanyuPinYin(char c) {
    try {
        pinyin = PinyinHelper.toHanyuPinyinStringArray(c, format);
    }
    catch(BadHanyuPinyinOutputFormatCombination e) {
        e.printStackTrace();
    }

    // 如果c不是汉字,toHanyuPinyinStringArray会返回null
    if(pinyin == null) return null;

     //todo 多音字 取第一个
    return pinyin[0];
}
 
Example 10
Source File: PinyinUtil.java    From FancyListIndexer with Apache License 2.0 5 votes vote down vote up
/**
 * 根据汉字获取对应的拼音
 * @param str
 * @return
 */
public static String getPinyin(String str) {
	// 黑马 -> HEIMA
	// 设置输出配置
	HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
	// 设置大写
	format.setCaseType(HanyuPinyinCaseType.UPPERCASE);
	// 设置不需要音调
	format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);

	StringBuilder sb = new StringBuilder();

	// 获取字符数组
	char[] charArray = str.toCharArray();
	for (int i = 0; i < charArray.length; i++) {
		char c = charArray[i];
		// 如果是空格, 跳过当前的循环
		if (Character.isWhitespace(c)) {
			continue;
		}

		if (c > 128 || c < -127) {
			// 可能是汉字
			try {
				// 根据字符获取对应的拼音. 黑 -> HEI , 单 -> DAN , SHAN
				String s = PinyinHelper.toHanyuPinyinStringArray(c, format)[0];
				sb.append(s);

			} catch (BadHanyuPinyinOutputFormatCombination e) {
				e.printStackTrace();
			}
		} else {
			// *&$^*@654654LHKHJ
			// 不需要转换, 直接添加
			sb.append(c);
		}
	}

	return sb.toString();
}
 
Example 11
Source File: PinyinUtil.java    From PinyinSearchLibrary with Apache License 2.0 5 votes vote down vote up
/**
 * judge chr is kanji
 * 
 * @param chr
 * @return Is kanji return true,otherwise return false.
 */
public static boolean isKanji(char chr){
	String[] pinyinStr = null;
	try {
		pinyinStr = PinyinHelper.toHanyuPinyinStringArray(chr, format);
	} catch (BadHanyuPinyinOutputFormatCombination e) {
		e.printStackTrace();
	}
	
	return (null==pinyinStr)?(false):(true);
}
 
Example 12
Source File: Utils.java    From WeSync with MIT License 5 votes vote down vote up
/**
 * 将汉字转换为全拼
 *
 * @param src
 * @return
 */
public static String getPingYin(String src) {

    char[] t1;
    t1 = src.toCharArray();
    String[] t2;
    HanyuPinyinOutputFormat t3 = new HanyuPinyinOutputFormat();

    t3.setCaseType(HanyuPinyinCaseType.LOWERCASE);
    t3.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    t3.setVCharType(HanyuPinyinVCharType.WITH_V);
    String t4 = "";
    int t0 = t1.length;
    try {
        for (int i = 0; i < t0; i++) {
            // 判断是否为汉字字符
            if (Character.toString(t1[i]).matches("[\\u4E00-\\u9FA5]+")) {
                t2 = PinyinHelper.toHanyuPinyinStringArray(t1[i], t3);
                t4 += t2[0];
            } else {
                t4 += Character.toString(t1[i]);
            }
        }
        return t4;
    } catch (BadHanyuPinyinOutputFormatCombination e1) {
        e1.printStackTrace();
    }
    return t4;
}
 
Example 13
Source File: PinyinUtil.java    From jeewx with Apache License 2.0 5 votes vote down vote up
/**
 * 提取每个汉字的首字母
 * 
 * @param str
 * @return String
 */
public static String getPinYinHeadChar(String str) {
	String convert = "";
	for (int j = 0; j < str.length(); j++) {
		char word = str.charAt(j);
		// 提取汉字的首字母
		String[] pinyinArray = PinyinHelper.toHanyuPinyinStringArray(word);
		if (pinyinArray != null) {
			convert += pinyinArray[0].charAt(0);
		} else {
			convert += word;
		}
	}
	return convert;
}
 
Example 14
Source File: LanguageComparator_CN.java    From weixin with Apache License 2.0 5 votes vote down vote up
private String pinyin(char c) {
	String[] pinyins = PinyinHelper.toHanyuPinyinStringArray(c);
	if (pinyins == null) {
		return null;
	}
	return pinyins[0];
}
 
Example 15
Source File: PinYinHelper.java    From es-service-parent with Apache License 2.0 5 votes vote down vote up
/**
 * 传入要分析的上行词,全拼解析,只解析汉字,英文,数字
 * 
 * @param word
 * @return
 */
public static String getPinYin(String word) {
    char[] ch = word.trim().toCharArray();
    StringBuilder rs = new StringBuilder();
    try {
        if (ch.length > 40) {
            return PinyinHelper.toHanyuPinyinString(word, format, " ");
        }
        // 解析
        String s_ch;
        String[] temp;
        for (int i = 0; i < ch.length; i++) {
            s_ch = Character.toString(ch[i]);
            if (s_ch.matches("[\u4e00-\u9fa5]+")) {
                // 汉字
                temp = PinyinHelper.toHanyuPinyinStringArray(ch[i], format);
                if (null != temp && temp.length > 0) {
                    rs.append(temp[0]);
                }
            } else if (s_ch.matches("[\u0030-\u0039]+")) {
                // 0-9
                rs.append(s_ch);
            } else if (s_ch.matches("[\u0041-\u005a]+") || s_ch.matches("[\u0061-\u007a]+")) {
                // a-zA-Z
                rs.append(s_ch);
            }
        }
    } catch (BadHanyuPinyinOutputFormatCombination e) {
        e.printStackTrace();
    }
    return rs.toString();
}
 
Example 16
Source File: PinyinSampleBenchmark.java    From TinyPinyin with Apache License 2.0 5 votes vote down vote up
private boolean isChinesePinyin4j(char c) {
    String[] pinyins = PinyinHelper.toHanyuPinyinStringArray(c);
    if (pinyins != null && pinyins.length > 0) {
        return true;
    }
    return false;
}
 
Example 17
Source File: PinyinUtil.java    From jeewx with Apache License 2.0 5 votes vote down vote up
/**
 * 获取拼音集合
 * 
 * @author wyh
 * @param src
 * @return Set<String>
 */
public static Set<String> getPinyin(String src) {
	if (src != null && !src.trim().equalsIgnoreCase("")) {
		char[] srcChar;
		srcChar = src.toCharArray();
		// 汉语拼音格式输出类
		HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat();

		// 输出设置,大小写,音标方式等
		hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
		hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
		hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);

		String[][] temp = new String[src.length()][];
		for (int i = 0; i < srcChar.length; i++) {
			char c = srcChar[i];
			// 是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z)
			if (String.valueOf(c).matches("[\\u4E00-\\u9FA5]+")) {
				try {
					temp[i] = PinyinHelper.toHanyuPinyinStringArray(
							srcChar[i], hanYuPinOutputFormat);
				} catch (BadHanyuPinyinOutputFormatCombination e) {
					e.printStackTrace();
				}
			} else if (((int) c >= 65 && (int) c <= 90)
					|| ((int) c >= 97 && (int) c <= 122)) {
				temp[i] = new String[] { String.valueOf(srcChar[i]) };
			} else {
				temp[i] = new String[] { "" };
			}
		}
		String[] pingyinArray = Exchange(temp);
		Set<String> pinyinSet = new HashSet<String>();
		for (int i = 0; i < pingyinArray.length; i++) {
			pinyinSet.add(pingyinArray[i]);
		}
		return pinyinSet;
	}
	return null;
}
 
Example 18
Source File: PinYinHelper.java    From es-service-parent with Apache License 2.0 5 votes vote down vote up
/**
 * 将汉字解析成拼音,取首字母,英文和数字不变
 * 
 * @param word
 * @return
 */
public static String getPinYinPrefix(String word) {
    char[] ch = word.trim().toCharArray();
    StringBuilder rs = new StringBuilder();
    try {
        String s_ch;
        String[] temp;
        for (int i = 0; i < ch.length; i++) {
            s_ch = Character.toString(ch[i]);
            if (s_ch.matches("[\u4e00-\u9fa5]+")) {
                // 汉字
                temp = PinyinHelper.toHanyuPinyinStringArray(ch[i], format);
                if (null != temp && temp.length > 0) {
                    rs.append(temp[0].charAt(0));
                }
            } else if (s_ch.matches("[\u0030-\u0039]+")) {
                // 0-9
                rs.append(s_ch);
            } else if (s_ch.matches("[\u0041-\u005a]+") || s_ch.matches("[\u0061-\u007a]+")) {
                // a-zA-Z
                rs.append(s_ch);
            }
        }
    } catch (BadHanyuPinyinOutputFormatCombination e) {
        e.printStackTrace();
    }
    return rs.toString();
}
 
Example 19
Source File: PinyinTest.java    From TinyPinyin with Apache License 2.0 5 votes vote down vote up
@Test
public void testToPinyin_char() throws BadHanyuPinyinOutputFormatCombination {
    char[] allChars = allChars();
    final int allCharsLength = allChars.length;
    HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
    format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
    format.setCaseType(HanyuPinyinCaseType.UPPERCASE);
    format.setVCharType(HanyuPinyinVCharType.WITH_V);

    int chineseCount = 0;
    for (int i = 0; i < allCharsLength; i++) {
        char targetChar = allChars[i];
        String[] pinyins = PinyinHelper.toHanyuPinyinStringArray(targetChar, format);
        if (pinyins != null && pinyins.length > 0) {
            // is chinese
            chineseCount++;
            assertThat(Pinyin.toPinyin(targetChar), equalTo(pinyins[0]));
        } else {
            // not chinese
            assertThat(Pinyin.toPinyin(targetChar), equalTo(String.valueOf(targetChar)));
        }
    }

    //CHECKSTYLE:OFF
    int expectedChineseCount = 20378;
    //CHECKSTYLE:ON

    assertThat(chineseCount, is(expectedChineseCount));
}
 
Example 20
Source File: PinyinUtil.java    From jeecg with Apache License 2.0 2 votes vote down vote up
/**
 * 
 * 取汉字的首字母
 * 
 * @param src
 * 
 * @param isCapital
 *            是否是大写
 * 
 * @return
 */

public static char[] getHeadByChar(char src, boolean isCapital) {

	// 如果不是汉字直接返回

	if (src <= 128) {

		return new char[] { src };

	}

	// 获取所有的拼音

	String[] pinyingStr = PinyinHelper.toHanyuPinyinStringArray(src);

	// 创建返回对象

	int polyphoneSize = pinyingStr.length;

	char[] headChars = new char[polyphoneSize];

	int i = 0;

	// 截取首字符

	for (String s : pinyingStr) {

		char headChar = s.charAt(0);

		// 首字母是否大写,默认是小写

		if (isCapital) {

			headChars[i] = Character.toUpperCase(headChar);

		} else {

			headChars[i] = headChar;

		}

		i++;

	}

	return headChars;

}