您的位置:首页 > 运维架构

hadoop-mapreduce-(1)-统计单词数量

2017-11-22 11:35 417 查看
编写map程序

package com.cvicse.ump.hadoop.mapreduce.map;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> {

@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {

String line = value.toString();
String[] words = line.split(" ");
for(String word:words){
context.write(new Text(word), new IntWritable(1));
}

}

}


编写reduce程序

package com.cvicse.ump.hadoop.mapreduce.reduce;

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class WordCountReduce extends
Reducer<Text, IntWritable, Text, IntWritable> {

@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {

Integer count = 0;
for(IntWritable value:values){
count+=value.get();
}

context.write(key, new IntWritable(count));

}

}


编写main函数

package com.cvicse.ump.hadoop.mapreduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.cvicse.ump.hadoop.mapreduce.map.WordCountMap;
import com.cvicse.ump.hadoop.mapreduce.reduce.WordCountReduce;

public class WordCount {

public static void main(String[] args) throws Exception {

Configuration conf = new Configuration();

Job job = Job.getInstance(conf,"wordCount");
job.setJarByClass(WordCount.class);
job.setMapperClass(WordCountMap.class);
job.setReducerClass(WordCountReduce.class);

job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

boolean bb = job.waitForCompletion(true);
if(!bb){
System.out.println("wrodcount task fail!");
}else{
System.out.println("wordcount task success!");
}

}

}


把wordcount.txt放在hdfs的/dyh/data/input/目录下

执行:hadoop jar hdfs.jar com.cvicse.ump.hadoop.mapreduce.WordCount /dyh/data/input/wordcount.txt /dyh/data/output/1
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: