java用pinyin4j把中文问件转化为拼音
2014-12-09 17:37
302 查看
<span style="font-family: Arial, Helvetica, sans-serif;">package testcase;</span>
import java.util.HashSet; import java.util.Set; import net.sourceforge.pinyin4j.PinyinHelper; import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType; import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat; import net.sourceforge.pinyin4j.format.HanyuPinyinToneType; import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType; import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination; public class ToPin { public static Set<String> getPinyin(String src){ if(src != null && !src.trim().equalsIgnoreCase("")){ char[] srcChar; srcChar = src.toCharArray(); //汉语拼音格式输出类 HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat(); //输出设置,大小写,音标方式等 hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); //小写 hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE); //无音调 hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_V); //'¨¹' is "v" String[][] temp = new String[src.length()][]; for(int i=0;i<srcChar.length;i++){ char c = srcChar[i]; //是中文或者a-z或者A-Z转换拼音(我的需求,是保留中文或者a-z或者A-Z) if(String.valueOf(c).matches("[\u4E00-\u9FA5]+")){ //中文字符 try{ temp[i] = PinyinHelper.toHanyuPinyinStringArray(srcChar[i],hanYuPinOutputFormat); }catch(BadHanyuPinyinOutputFormatCombination e){ e.printStackTrace(); } }else if(((int)c>=65&&(int)c<=90)||((int)c>=97&&(int)c<=122)){ //英文字母 temp[i] = new String[]{String.valueOf(srcChar[i])}; }else{ //其他字符 temp[i] = new String[]{""}; } } String[] pinyinArray = ExChange(temp); Set<String> pinyinSet = new HashSet<String>(); for(int i=0;i<pinyinArray.length;i++){ pinyinSet.add(pinyinArray[i]); } return pinyinSet; } return null; } /** * 字符串集合转换字符串(逗号分隔) * @param stringSet * @return */ public static String makeStringByStringSet(Set<String> stringSet,String separator){ StringBuilder str = new StringBuilder(); int i=0; for(String s :stringSet){ if(i == stringSet.size() - 1){ str.append(s); }else{ str.append(s+separator); } i++; } return str.toString().toLowerCase(); } private static String[] ExChange(String[][] strJaggedArray) { String[][] temp = DoExchange(strJaggedArray); return temp[0]; } private static String[][] DoExchange(String[][] strJaggedArray) { int len = strJaggedArray.length; if(len >= 2){ int len1 = strJaggedArray[0].length; int len2 = strJaggedArray[1].length; int newlen = len1*len2; String[] temp = new String[newlen]; int index = 0; for(int i=0;i<len1;i++){ for(int j=0;j<len2;j++){ temp[index] = strJaggedArray[0][i]+strJaggedArray[1][j]; index++; } } String[][] newArray = new String[len-1][]; for(int i=2;i<len;i++){ newArray[i-1] = strJaggedArray[i]; } newArray[0] = temp; return DoExchange(newArray); }else{ return strJaggedArray; } } /** * 只转换汉字为拼音,其他字符不变 * @param src * @return */ public static String getPinyinWithMark(String src){ if(src != null && !src.trim().equalsIgnoreCase("")){ char[] srcChar; srcChar = src.toCharArray(); //汉语拼音格式输出类 HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat(); //输出设置,大小写,音标方式等 hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); //小写 hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_MARK); //无音调 hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); //'¨¹' is "v" StringBuffer output = new StringBuffer(); //String[][] temp = new String[src.length()][]; for(int i=0;i<srcChar.length;i++){ char c = srcChar[i]; //是中文转换拼音(我的需求,是保留中文) if(String.valueOf(c).matches("[//u4E00-//u9FA5]+")){ //中文字符 try{ String[] temp = PinyinHelper.toHanyuPinyinStringArray(srcChar[i],hanYuPinOutputFormat); output.append(temp[0]); output.append(" "); }catch(BadHanyuPinyinOutputFormatCombination e){ e.printStackTrace(); } }else{ //其他字符 output.append(String.valueOf(srcChar[i])); } } return output.toString(); } return null; } /** * 只转换汉字为拼音,其他字符不变 * @param src * @return */ public static String getPinyinWithMark2(String inputString){ //汉语拼音格式输出类 HanyuPinyinOutputFormat hanYuPinOutputFormat = new HanyuPinyinOutputFormat(); //输出设置,大小写,音标方式等 hanYuPinOutputFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE); //小写 hanYuPinOutputFormat.setToneType(HanyuPinyinToneType.WITH_TONE_MARK); //有音调 hanYuPinOutputFormat.setVCharType(HanyuPinyinVCharType.WITH_U_UNICODE); //'¨¹' is "u:" char[] input = inputString.trim().toCharArray(); StringBuffer output = new StringBuffer(); for(int i=0;i<input.length;i++){ //是中文转换拼音(我的需求,是保留中文) if(Character.toString(input[i]).matches("[\u4E00-\u9FA5]+")){ //中文字符 try{ String[] temp = PinyinHelper.toHanyuPinyinStringArray(input[i],hanYuPinOutputFormat); output.append(temp[0]); output.append(" "); }catch(BadHanyuPinyinOutputFormatCombination e){ e.printStackTrace(); } }else{ //其他字符 output.append(Character.toString(input[i])); } } return output.toString(); } /** * @param args */ public static void main(String[] args) { String str = "我是中国人! I'm Chinese! 本文主要介绍在linux下sh批处理文件调用java的方法。"; //System.out.println(makeStringByStringSet(getPinyin(str)," ")); System.out.println(getPinyinWithMark2(str)); } }
package testcase;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
public class ToPinYin {
public static void readFileByLines(String fileName) {
File file = new File(fileName);
BufferedReader reader = null;
try {
System.out.println("以行为单位读取文件内容,一次读一整行:");
reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName),"UTF-8"));
//reader = new BufferedReader(new FileReader(file));
String tempString = null;
int line = 1;
// 一次读入一行,直到读入null为文件结束
while ((tempString = reader.readLine()) != null) {
// 显示行号
// System.out.println("line " + line + ": " + tempString);
writeFile(ToPin.getPinyinWithMark2(tempString),"D:\\aaa2.txt");
line++;
}
reader.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
}
public static void writerFile(String str,String str2){
try{
//String data = " This content will append to the end of the file";
File file =new File(str2);
// if file doesnt exists, then create it
if (!file.exists()) {
file.createNewFile();
}
FileWriter fw = new FileWriter(file.getAbsoluteFile());
BufferedWriter bw = new BufferedWriter(fw);
bw.write(str);
bw.close();
}catch(IOException e){
e.printStackTrace();
}
}
public static void writeFile(String str,String url) throws Exception
{
try {
//BufferedWriter writer=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(url),"UTF-8"));
FileWriter writer = new FileWriter(url, true);
writer.write(str+"\r\n");
writer.close();
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) {
readFileByLines("D:\\bbbb.txt");
//writerFile("asdf","D:\\bbb.txt");
}
}
相关文章推荐
- java中文转化成拼音
- java 实现中文转化为拼音代码 汉字转化为拼音源码分享
- java的中文转拼音组件---pinyin4j
- 组件pinyin4j中文转化拼音牛刀小试
- java 中文转拼音之pinyin4j
- java的中文转拼音组件---pinyin4j的使用
- JAVA 将中文转化为拼音工具类
- java 中文排序 中文拼音排序 pinyin4j (怡,阿等)
- java中文转换为拼音的pinyin4j学习笔记
- java 中文转拼音之pinyin4j
- java实现中文汉字转拼音 Pinyin4j的基本用法
- java 中文转化为拼音
- 【框架】pinyin4j中文汉字转化为拼音
- java实现中文转化为拼音与简称 .
- 使用Java将中文转化为拼音
- java实现中文转化为拼音与简称
- java实现中文汉字转拼音 Pinyin4j的基本用法
- Java下将汉字转换为拼音的包pinyin4j
- 用Java转化汉字为拼音全拼
- JAVA翻译给定中文字符串的拼音首字母