您的位置:首页 > 其它

老师,这绝对是我一行一行敲出来的

2014-03-24 22:50 176 查看
要求:写一个程序,分析一个文本文件中各个人名出现的频率,并且把频率最高的10个词打印出来。文本文件大约是30KB~300KB大小。
输入文本:jidushan.txt(基督山伯爵英文小说节选,300kb),输出文本:result.txt。
实现思想:
1、载入文本
2、处理字符流,用空格分离单词,用正则方程式去除字母意外的符号。
3、检测单词是否需要屏蔽,从单词长度和设置屏蔽单词入手。
4、输出结果文本。
主类代码如下:
package wzh;

public class computeWork {

	
	public static void main(String[] args) {

		Work work = new Work();
		work.addFile( "jidushan.txt" );
		work.createResultFlie( "result.txt" );
	}
	
}


功能类代码如下:
<pre code_snippet_id="254544" snippet_file_name="blog_20140324_2_9634156" name="code" class="javascript">package wzh;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

public class Work {

	private BufferedReader br;
	
	private Map map = new HashMap<String,Integer>();
	
	private List<Map.Entry<String, Integer>> infoIds;
	
	private ArrayList< String >maskWord ;

	public Work() 
	{
		// 设置屏蔽单词
		maskWord = new ArrayList<String>();
		String[] words = { "None" , "Have" , "You" , "And" , "When" , "Well" , "It" , "Yes" , "What",
				"Nothing" , "Then" , "They" , "Sometimes" , "This" , "Why" , "After" , "There" , "XVIII" , "With" , "True" , "Come",
				"Because" , "How" , "Never" , "These" , "Will" , "Father" , "Your" , "Here" , "Shall" , "That" , "However" , "Wait" , "Where" , "Speak" ,
				"Hallo" , "After" , "Really" , "Thats" , "Very", "Suddenly" , "Tell" , "Thanks" ,"Take" ,"Island" ,"Count" ,"Captain"
				 };
		int i = 0;
		while( i<words.length ) {
			maskWord.add( words[i] );
			i++;
		}
	}
	
	//加载输入文本
	public void addFile(String fileName)	{
		try {
			br = new BufferedReader(new FileReader(fileName));
			readWord(br);
			
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		}
	}
	
	//输出结果文本
	public void createResultFlie( String fileName) {
		int i = 0;
		BufferedWriter bw;
		try {
			bw = new BufferedWriter( new FileWriter( fileName ));
			
			for (; i < 100; i++) {   
				String id = infoIds.get(i).toString();   
				bw.write( id );
				bw.newLine();
				}   
	        bw.flush();
	        bw.close();
			br.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	//读入字符流,处理文本
	private void readWord( BufferedReader br ) {
		String lineStr , word ;
		StringTokenizer sto ;
		 try {
			while( (lineStr = br.readLine()) != null){
			     sto = new StringTokenizer(lineStr);
			     while ( sto.hasMoreElements() ) {
			    	 word = sto.nextToken();
			    	 checkWordToAdd( word );
			     }
			 }
			sortWord();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	//检测单词
	private void checkWordToAdd( String str ) {
		str = str.replaceAll("[^a-zA-Z]", "");
		if( str.length() <=3 || maskWord.contains(str) )
			return;
		if( str.substring(0,1).matches("[A-Z]") )
		{
			if( map.containsKey(str) ) {
				int wordNum = (int)map.get( str );
				map.put( str , wordNum+1 );
			}
			else {
				map.put( str , 1 );
			}
		}
	}
	//排序
	private void sortWord() {
		infoIds = new ArrayList<Map.Entry<String, Integer>>(map.entrySet()); 
		
		Collections.sort(infoIds, new Comparator<Map.Entry<String, Integer>>() {   
			public int compare(Map.Entry<String, Integer> o1,   
			Map.Entry<String, Integer> o2) {   
				 return o2.getValue().compareTo(o1.getValue()); 
			}   
			});   
	}
	
}



结果截图


内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: 
相关文章推荐