您的位置:首页 > 其它

统计文本英文单词总个数,并列出每个单词的个数

2014-02-26 13:48 661 查看
package test;
/*
* Task :统计文本英文单词总个数,并列出每个单词的个数
*
* Date:2014.02.26
*
*Author:璀若星辰
* */
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class IO_Word {
public static List<String>Io_word(String str)throws Exception{
File file = new File(str);
int n = 0;//文章中单词总数
TreeMap<Object, Integer> myTreeMap = new TreeMap<Object, Integer>();//存放键值对
Object word = null;//文章中的单词
Object num = null;//出现的次数
FileInputStream fis = new FileInputStream(file);
try{
InputStreamReader isr = new InputStreamReader(fis, "gb2312");
try{
BufferedReader br = new BufferedReader(isr);
try{
List<String> all = new ArrayList<String>();
String temp = br.readLine();
while (temp !=null){
all.add(temp);
temp = br.readLine();
}
//System.out.println("all="+all.size());
// System.out.println(all.get(0));
Pattern expression = Pattern.compile("[a-zA-Z]+");//定义正则表达式匹配单词
String string1 = all.toString().toLowerCase();//转换成小写
Matcher matcher = expression.matcher(string1);//定义string1的匹配器
while(matcher.find()){
word = matcher.group();//得到一个单词—树映射的键
//System.out.println("word="+word);
n++;
if(myTreeMap.containsKey(word)){
num = myTreeMap.get(word);//得到单词出现的次数
Integer count = (Integer)num;
myTreeMap.put(word, new Integer(count.intValue()+1));
}else {
myTreeMap.put(word, new Integer(1));//否则单词第一次出现,添加到映射中
}
}
System.out.println("统计分析如下:");
System.out.println("txt文章中单词总数"+ n +"个");
/*Iterator<Object> iter = myTreeMap.keySet().iterator();//得到树映射键集合的迭代器
while(iter.hasNext()){
key = iter.next();
System.out.println(((String)key+"-"+myTreeMap.get(key)));
}*/
List<Map.Entry<Object, Integer>> list = new ArrayList<Map.Entry<Object,Integer>>(myTreeMap.entrySet());
System.out.println("list="+list.size());
Collections.sort(list,new Comparator<Map.Entry<Object, Integer>>(){

public int compare(Map.Entry<Object, Integer>zj,  Map.Entry<Object, Integer> zz) {
return (zz.getValue() - zj.getValue());
}
});
for (Entry<Object, Integer> entry : list) {
System.out.println(entry.getKey() + "-" + entry.getValue() );
}
return all;
}finally{
br.close();
}
}finally{
isr.close();
}
}finally{
fis.close();
}
}
public static void main(String[] args) {
try {
IO_Word.Io_word("D:/abc.txt");
} catch (Exception e) {
e.printStackTrace();
}
}
}

运行结果效果如下

内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: