您的位置:首页 > 编程语言 > Java开发

Java 汉字转拼音

2014-09-10 17:53 323 查看
最近做搜索推荐,中间需要用到 汉字转拼音,于是乎找到了Pinyin4j,自己封装了一个工具类,支持获取汉字全拼和简拼方法,先Mark一下!

PinyinUtils.java

package com.ricky.java.suggestion.util;

import java.util.HashSet;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

public class PinyinUtils {

public static String[] chineseToPinYin(char chineseCharacter) throws BadHanyuPinyinOutputFormatCombination{
HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat();
outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
outputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
outputFormat.setVCharType(HanyuPinyinVCharType.WITH_V);

if(chineseCharacter>=32 && chineseCharacter<=125){ //ASCII >=33 ASCII<=125的直接返回 ,ASCII码表:http://www.asciitable.com/
return new String[]{String.valueOf(chineseCharacter)};
}

return PinyinHelper.toHanyuPinyinStringArray(chineseCharacter, outputFormat);
}

/**
* 获取汉字拼音的全拼
* @param chineseCharacter
* @return
* @throws BadHanyuPinyinOutputFormatCombination
*/
public static String[] chineseToPinYinF(String chineseCharacter) throws BadHanyuPinyinOutputFormatCombination{
if(StringUtils.isEmpty(chineseCharacter)){
return null;
}

char[] chs = chineseCharacter.toCharArray();

String[] result = null;

for(int i=0;i<chs.length;i++){
String[] arr = chineseToPinYin(chs[i]);
if(result!=null){
result = combine(result, arr,true);
}else{
result = arr;
}
}

//去重
HashSet<String> set = new HashSet<String>();

for(int i=0;result!=null && i<result.length;i++){
set.add(result[i]);
}

return set.toArray(new String[]{});
}

public static String[] chineseToPinYinS(String chineseCharacter) throws BadHanyuPinyinOutputFormatCombination{
if(StringUtils.isEmpty(chineseCharacter)){
return null;
}

char[] chs = chineseCharacter.toCharArray();

String[] result = null;

for(int i=0;i<chs.length;i++){
String[] arr = chineseToPinYin(chs[i]);
if(result!=null){
result = combine(result, arr,false);
}else{
for (int x=0;arr!=null && x<arr.length;x++) {
arr[x] = arr[x].substring(0,1);
}
result = arr;
}
}

//去重
HashSet<String> set = new HashSet<String>();

for(int i=0;result!=null && i<result.length;i++){
set.add(result[i]);
}

return set.toArray(new String[]{});
}

protected static String[] combine(String[] t1,String[] t2,boolean full){

if(t2==null || t2.length==0){ //为特殊字符,直接跳过
return t1;
}

int len = t1.length * t2.length;
String[] retVal = new String[len];

int count = 0;

for(int i=0;i<t1.length;i++){

for (String str : t2) {
if(full){
retVal[count] = t1[i]+str;
}else{
retVal[count] = t1[i]+str.substring(0,1);
}

count ++;
}
}

return retVal;
}

}


示例代码如下:

package com.ricky.java.suggestion.test;

import java.util.Arrays;

import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

import com.ricky.java.suggestion.util.PinyinUtils;

public class PinyinTest {

/**
* @param args
*/
public static void main(String[] args) {

try {

// System.out.println("长="+Arrays.toString(PinyinUtils.chineseToPinYin('长')));
// System.out.println("沙="+Arrays.toString(PinyinUtils.chineseToPinYin('沙')));
// System.out.println("绿="+Arrays.toString(PinyinUtils.chineseToPinYin('绿')));

// String str = "长沙绿";
// String str = "KFC(重庆&旗舰店)";
// String str = "陶然居";
// String str = "361°";
// String str = "中国·人民电器集团";
// String str = "好邻居";
// String str = "便利店";
String str = "京成138快捷酒店";

System.out.println("str pyf="
+ Arrays.toString(PinyinUtils.chineseToPinYinF(str)));

System.out.println("str pys="
+ Arrays.toString(PinyinUtils.chineseToPinYinS(str)));

} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
}

}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  pinyin4j