您的位置:首页 > 其它

中文、英文和中英文混合排序

2010-01-25 10:51 106 查看
对Search进行排序

SearchComparator.java中的实现方法compare已不能满足需要

其中涉及到中文、英文或者中英文混合排序,所以,这里使用开源的Pingyin 对起排序

SearchComparator.java调用sortListByType排序,其中调用了PinyinComparator

SearchComparator .java

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.clx.webmail.util.PinyinComparator;

public class SearchComparator implements Comparator<Search> {

/**简单,不能用来对纯中文、纯英文或者中英文混合进行排序**/
public int compare(Search s1, Search s2) {
return s1.getSearchName().compareToIgnoreCase(s2.getSearchName());
}

public List<Search> sortListByType(List<Search> list)
{

List engList=new ArrayList();
List chaList=new ArrayList();

if(list!=null&&list.size()>0)
{

for(int i=0;i<list.size();i++)
{

Search search=(Search)list.get(i);
String name=search.getSearchName();
/**
* 如果英文优先,则这里的IF条件为:
* isContainsHanyu(name.substring(0,1))&&isContainsHanyu(name)
**/
if(isContainsHanyu(name))
{
chaList.add(search);
}
else
{
engList.add(search);
}

}

}

if(chaList.size()>0)
{
PinyinComparator pinyin=new PinyinComparator();
Collections.sort(chaList,pinyin);
}

if(engList.size()>0)
{
Collections.sort(engList,this);
}

list=new ArrayList();
list=copy(list,engList);
list=copy(list,chaList);

return list;
}

//把一个集合中的元素复制到另一个集合中
public List copy(List sourceList,List copyList)
{
if(copyList!=null)
{
for(int i=0;i<copyList.size();i++)
{
sourceList.add(copyList.get(i));
}
}
return sourceList;
}

//检查字符串是否包含中文
public boolean isContainsHanyu(String str)
{
boolean flag=false;
Pattern pattern = Pattern.compile("[//u4E00-//u9FA5]+",Pattern.CANON_EQ);
Matcher matcher = pattern.matcher(str);

if(matcher.find())
{
flag=true;
}

return flag;

}

public static void main(String[] args)
{
String s = "test测试";
SearchComparator comparator=new SearchComparator();
comparator.isContainsHanyu(s);
}

}

PinyinComparator.java

import java.util.Arrays;
import java.util.Comparator;

import com.clx.webmail.models.Search;

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

public class PinyinComparator implements Comparator<Search>{

public int compare(Search s1,Search s2) {

String o1=s1.getSearchName();

String o2=s2.getSearchName();

for (int i = 0; i < o1.length() && i < o2.length(); i++) {

int codePoint1 = o1.charAt(i);

int codePoint2 = o2.charAt(i);

if (Character.isSupplementaryCodePoint(codePoint1)|| Character.isSupplementaryCodePoint(codePoint2)) {
i++;
}

if (codePoint1 != codePoint2)
{
if (Character.isSupplementaryCodePoint(codePoint1)|| Character.isSupplementaryCodePoint(codePoint2))

{
return codePoint1 - codePoint2;
}

String pinyin1 = pinyin((char) codePoint1);
String pinyin2 = pinyin((char) codePoint2);

if (pinyin1 != null && pinyin2 != null)
{
// 两个字符都是汉字
if (!pinyin1.equals(pinyin2))
{
return pinyin1.compareTo(pinyin2);
}
}
else
{
return codePoint1 - codePoint2;
}
}
}

return o1.length() - o2.length();

}

/**对中英文排序**/
private String pinyin(char c) {

if(String.valueOf(c)==null||String.valueOf(c).length()==0)
{
return "";
}

HanyuPinyinOutputFormat format = new HanyuPinyinOutputFormat();
format.setCaseType(HanyuPinyinCaseType.LOWERCASE);
format.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
format.setVCharType(HanyuPinyinVCharType.WITH_V);
String output="";
try
{
if (java.lang.Character.toString(c).matches("[//u4E00-//u9FA5]+"))
{
String[] temp = PinyinHelper.toHanyuPinyinStringArray(c,format);
if(temp!=null&&temp.length>0)
{
output += temp[0];
}
}
else
{
output += java.lang.Character.toString(c);
}
}catch(BadHanyuPinyinOutputFormatCombination e)
{
e.printStackTrace();
}

return output;
}

}

此外,增加一个类,此类用来得到汉语拼音,可能在排序中也用得到

import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType;
import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination;

import java.io.UnsupportedEncodingException;

/**
* 拼音工具
*
* @author zhouhang 2010-01-25

*/
public class PinyinToolkit {

/**
* 获取汉字串拼音首字母,英文字符不变
*
* @param chinese 汉字串
* @return 汉语拼音首字母
*/
public static String cn2FirstSpell(String chinese) {
StringBuffer pybf = new StringBuffer();
char[] arr = chinese.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
for (int i = 0; i < arr.length; i++) {
if (arr[i] > 128)
{
try {
String[] _t = PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat);
if (_t != null) {
pybf.append(_t[0].charAt(0));
}
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pybf.append(arr[i]);
}
}
return pybf.toString().replaceAll("//W", "").trim();
}

/**
* 获取汉字串拼音,英文字符不变
*
* @param chinese 汉字串
* @return 汉语拼音
*/
public static String cn2Spell(String chinese) {
StringBuffer pybf = new StringBuffer();
char[] arr = chinese.toCharArray();
HanyuPinyinOutputFormat defaultFormat = new HanyuPinyinOutputFormat();
defaultFormat.setCaseType(HanyuPinyinCaseType.LOWERCASE);
defaultFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
defaultFormat.setVCharType(HanyuPinyinVCharType.WITH_V);
for (int i = 0; i < arr.length; i++) {
if (arr[i] > 128) {
try {
pybf.append(PinyinHelper.toHanyuPinyinStringArray(arr[i], defaultFormat)[0]);
} catch (BadHanyuPinyinOutputFormatCombination e) {
e.printStackTrace();
}
} else {
pybf.append(arr[i]);
}
}
return pybf.toString();
}

public static void main(String[] args) throws UnsupportedEncodingException {
String x = "嘅囧誰說壞學生來勼髮視頻裆児";
System.out.println(cn2FirstSpell(x));
System.out.println(cn2Spell(x));
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐