您的位置:首页 > 其它

MapReduce之WordCount

2016-08-23 18:58 253 查看
这是我在学习《深入理解Hadoop》中的代码示例,仅作示例用

package edu.hfut.wls.study.hadoop;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.io.IOException;

/**
* Created by lianbin.zlb.
*/
public class WordCount extends Configured implements Tool{
@Override
public int run(String[] strings) {
try {
Job job = Job.getInstance(getConf());
job.setJarByClass(WordCount.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);

job.setMapperClass(WcMapper.class);
job.setReducerClass(WcReducer.class);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

String[] args = new GenericOptionsParser(getConf(), strings).getRemainingArgs();

FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.submit();
} catch (Exception e) {
e.printStackTrace();
}
return 0;
}

public static class WcMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String mVal = value.toString();
context.write(new Text(mVal), new LongWritable(1));
}
}
public static class WcReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {
long sum = 0;
for(LongWritable lVal : values){
sum += lVal.get();
}
context.write(key, new LongWritable(sum));
}
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
ToolRunner.run(new WordCount(), args);
}
}
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签:  wordcount