您的位置:首页 > 运维架构 > 网站架构

一个典型的MapRuduce实例------webcount(网站统计访客信息)

2016-12-05 01:17 585 查看
统计某一特定网站的某个时辰访客人数

所用版本:hadoop2.6.5

数据样式如下:

111.111.111.111 - - [16/Dec/2012:05:32:50 -0500] "GET / HTTP/1.1" 200 14791 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
111.111.111.111 - - [16/Dec/2012:05:33:50 -0500] "GET / HTTP/1.1" 200 14791 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
111.111.111.111 - - [16/Dec/2012:05:34:45 -0500] "GET / HTTP/1.1" 200 14791 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
111.111.111.111 - - [16/Dec/2012:05:34:50 -0500] "GET / HTTP/1.1" 200 14791 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
111.111.111.111 - - [16/Dec/2012:09:34:55 -0500] "GET / HTTP/1.1" 200 14791 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
111.111.111.111 - - [16/Dec/2012:10:23:30 -0500] "GET / HTTP/1.1" 200 14791 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"
111.111.111.111 - - [16/Dec/2012:10:32:50 -0500] "GET / HTTP/1.1" 200 14791 "-" "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"


辅助类

1 package com.trendwise.software;
2
3 import java.text.SimpleDateFormat;
4 import java.util.Date;
5 import java.io.DataInput; import java.io.DataOutput;
6 import java.io.IOException;
7 import org.apache.hadoop.io.WritableComparable;
8
9 public class DateWritable implements WritableComparable<DateWritable>{
10     private final static SimpleDateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd' T 'HH:mm:ss.SSS" );
11     private Date date;
12     public Date getDate() {
13         return date;
14     }
15     public void setDate( Date date ) {
16         this.date = date;
17     }
18
19     @Override
20     public void readFields(DataInput in) throws IOException {
21         date = new Date( in.readLong() );
22     }
23
24     @Override
25     public void write(DataOutput out) throws IOException {
26         out.writeLong( date.getTime() );
27     }
28
29     @Override
30     public int compareTo(DateWritable o) {
31         return date.compareTo( o.getDate() );
32     }
33
34     public String toString() {
35         return formatter.format( date);
36     }
37 }


mapper 映射特定年份中每月每天每个时辰的访客数

1 package com.trendwise.software;
2
3 import java.io.IOException;
4 import java.util.Calendar;
5 import org.apache.hadoop.io.IntWritable;
6 import org.apache.hadoop.io.LongWritable;
7 import org.apache.hadoop.io.Text;
8 import org.apache.hadoop.mapreduce.Mapper;
9
10 public class LogMapper extends Mapper<LongWritable, Text, DateWritable, IntWritable> {
11     public static DateWritable dates = new DateWritable();
12     public final static IntWritable two = new IntWritable(1);
13     public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
14         String text = value.toString();
15         // Get the date and time
16         int openBracket = text.indexOf( '[' );
17         int closeBracket = text.indexOf( ']' );
18         if( openBracket != -1 && closeBracket != -1 ) {
19             // Read the date
20             String dateString = text.substring( text.indexOf( '[' ) + 1, text. indexOf( ']' ) );
21             // Build a date object from a string of the form: 16/Dec/2012:05:32:50 -0500
22             int index = 0;
23             int nextIndex = dateString.indexOf( '/' );
24             int day = Integer.parseInt( dateString.substring(index, nextIndex) );
25
26             index = nextIndex; nextIndex = dateString.indexOf( '/', index+1 );
27             String month = dateString.substring( index+1, nextIndex );
28             index = nextIndex;
29             nextIndex = dateString.indexOf( ':', index );
30             int year = Integer.parseInt(dateString.substring(index + 1, nextIndex));
31             index = nextIndex; nextIndex = dateString.indexOf( ':', index+1 );
32             int hour = Integer.parseInt(dateString.substring(index + 1, nextIndex));
33             // Build a calendar object for this date
34             Calendar calendar = Calendar.getInstance();
35             calendar.set( Calendar.DATE, day );
36             calendar.set( Calendar.YEAR, year );
37             calendar.set( Calendar.HOUR, hour );
38             calendar.set( Calendar.MINUTE, 0 );
39             calendar.set( Calendar.SECOND, 0 );
40             calendar.set( Calendar.MILLISECOND, 0 );
41             if( month.equalsIgnoreCase( "dec" ) ) {
42                 calendar.set( Calendar.MONTH, Calendar.DECEMBER );
43             }
44             else if( month.equalsIgnoreCase( "nov" ) ) {
45                 calendar.set( Calendar.MONTH, Calendar.NOVEMBER );
46             }
47             else if( month.equalsIgnoreCase( "oct" ) ) {
48                 calendar.set( Calendar.MONTH, Calendar.OCTOBER );
49             }
50             else if( month.equalsIgnoreCase( "sep" ) ) {
51                 calendar.set( Calendar.MONTH, Calendar.SEPTEMBER );
52             }
53             else if( month.equalsIgnoreCase( "aug" ) ) {
54                 calendar.set( Calendar.MONTH, Calendar.AUGUST );
55             }
56             else if( month.equalsIgnoreCase( "jul" ) ) {
57                 calendar.set( Calendar.MONTH, Calendar.JULY );
58             }
59             else if( month.equalsIgnoreCase( "jun" ) ) {
60                 calendar.set( Calendar.MONTH, Calendar.JUNE );
61             }
62             else if( month.equalsIgnoreCase( "may" ) ) {
63                 calendar.set( Calendar.MONTH, Calendar.MAY );
64             }
65             else if( month.equalsIgnoreCase( "apr" ) ) {
66                 calendar.set( Calendar.MONTH, Calendar.APRIL );
67             }
68             else if( month.equalsIgnoreCase( "mar" ) ) {
69                 calendar.set( Calendar.MONTH, Calendar.MARCH );
70             }
71             else if( month.equalsIgnoreCase( "feb" ) ) {
72                 calendar.set( Calendar.MONTH, Calendar.FEBRUARY );
73             }
74             else if( month.equalsIgnoreCase( "jan" ) ) {
75                 calendar.set( Calendar.MONTH, Calendar.JANUARY );
76             }
77
78             dates.setDate( calendar.getTime() );
79             context.write(dates, two);
80
81         }
82     }
83 }


reducer 汇总一个时辰内访客人数

1 package com.trendwise.software;
2
3 import java.io.IOException;
4 import org.apache.hadoop.io.IntWritable;
5 import org.apache.hadoop.mapreduce.Reducer;
6
7 public class  LogReducer extends Reducer<DateWritable, IntWritable, DateWritable, IntWritable> {
8     @Override
9     public void reduce( DateWritable key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
10
11         int countn = 0;
12         for(IntWritable v :values){
13             countn += v.get();
14         }
15         context.write(key, new IntWritable( countn) );
16     }
17 }


driver 配置信息,程序入口

1 package com.trendwise.software;
2
3 import java.io.IOException;
4 import org.apache.hadoop.conf.Configuration;
5 import org.apache.hadoop.fs.Path;
6 import org.apache.hadoop.io.IntWritable;
7 import org.apache.hadoop.mapreduce.Job;
8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
10
11 public class Driver {
12
13     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
14
15         String in = args[0];
16         String out = args[1];
17         int unitmb =Integer.valueOf(args[2]);
18         int nreducer = Integer.valueOf(args[3]);
19
20         Configuration conf = new Configuration();
21         conf.set("mapreduce.input.fileinputformat.split.maxsize", String.valueOf(unitmb * 1024 * 1024));
22         conf.set("mapred.min.split.size", String.valueOf(unitmb * 1024 * 1024));
23         conf.set("mapreduce.input.fileinputformat.split.minsize.per.node", String.valueOf(unitmb * 1024 * 1024));
24         conf.set("mapreduce.input.fileinputformat.split.minsize.per.rack", String.valueOf(unitmb * 1024 * 1024));
25
26         Job job = new Job(conf);
27         FileInputFormat.addInputPath(job, new Path(in));
28         FileOutputFormat.setOutputPath(job, new Path(out));
29         job.setMapperClass(LogMapper.class);
30         job.setReducerClass(LogReducer.class);
31         job.setCombinerClass(LogReducer.class);
32         job.setNumReduceTasks(nreducer);
33         job.setMapOutputKeyClass(DateWritable.class);
34         job.setMapOutputValueClass(IntWritable.class);
35         job.setOutputKeyClass(DateWritable.class);
36         job.setOutputValueClass(IntWritable.class);
37         job.setJarByClass(Driver.class);
38         job.waitForCompletion(true);
39
40     }
41 }


command



result



内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: