Hadoop分析tomcat访问日志
2016-03-10 16:05
936 查看
今天照着《hadoop实战》写了个小例子,解析tomcat日志统计各个浏览器的访问次数
Java代码
package com.ice.stat;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class TomcatLog{
static class TomcatMapper extends Mapper<Object, Text, Text, IntWritable> {
private static final IntWritable one = new IntWritable(1);
private static Pattern pattern = Pattern.compile("([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),(.*)");
protected void map(Object key, Text value,
Context context) throws IOException ,InterruptedException {
String line = value.toString();
System.out.println(line);
Matcher m = pattern.matcher(line);
if(m.matches()){
String agent = m.group(9).toLowerCase();
if(agent.contains("chrome")){
agent = "chrome";
}else if(agent.contains("safari")){
agent = "safari";
}else if(agent.contains("firefox")){
agent = "firefox";
}else{
agent = "other";
}
Text t = new Text(agent);
context.write(t, one);
}
};
}
static class TomcatReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
protected void reduce(Text key, java.lang.Iterable<IntWritable> value, org.apache.hadoop.mapreduce.Reducer<Text,IntWritable,Text,IntWritable>.Context context) throws IOException ,InterruptedException {
int count = 0;
for(IntWritable v : value){
count = count + v.get();
}
context.write(key, new IntWritable(count));
};
}
public static void main(String[] args) throws Exception {
if(args.length != 2){
System.err.println("参数个数不对");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(TomcatLog.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(TomcatMapper.class);
job.setReducerClass(TomcatReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
日志样例:
Tomcat日志代码
172.16.2.12,-,-,[06/Sep/2011:10:03:13 +0800],GET /icestat/jpivot/toolbar/sort-asc-up.png HTTP/1.1,200,336,-,Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2)
172.16.2.12,-,-,[06/Sep/2011:09:48:17 +0800],GET /icestat/ HTTP/1.1,200,171,http://10.65.11.241:8080/icestat/,Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.43 Safari/534.24
把日志放入hdfs
命令行代码
hadoop fs -put icestat_access_log.2011-09-06.txt icestat_access_log.2011-09-06.txt
分析日志
命令行代码
hadoop jar tomcatLog.jar icestat_access_log.2011-09-06.txt output6
查看输出
命令行代码
[root@xxx hadoop-0.20.2]# hadoop fs -cat output6/part-r-00000
11/09/06 00:18:54 WARN conf.Configuration: DEPRECATED: hadoop-site.xml found in the classpath. Usage of hadoop-site.xml is deprecated. Instead use core-site.xml, mapred-site.xml and hdfs-site.xml to override properties of core-default.xml, mapred-default.xml and hdfs-default.xml respectively
11/09/06 00:18:54 WARN fs.FileSystem: "xxx :9000" is a deprecated filesystem name. Use "hdfs://xxx :9000/" instead.
11/09/06 00:18:54 WARN fs.FileSystem: "xxx :9000" is a deprecated filesystem name. Use "hdfs://xxx :9000/" instead.
11/09/06 00:18:54 WARN fs.FileSystem: "xxx :9000" is a deprecated filesystem name. Use "hdfs://xxx :9000/" instead.
chrome 58
firefox 23
other 49
safari 5
http://dsbjoe.iteye.com/blog/1166698
Java代码
package com.ice.stat;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class TomcatLog{
static class TomcatMapper extends Mapper<Object, Text, Text, IntWritable> {
private static final IntWritable one = new IntWritable(1);
private static Pattern pattern = Pattern.compile("([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),([^,]*),(.*)");
protected void map(Object key, Text value,
Context context) throws IOException ,InterruptedException {
String line = value.toString();
System.out.println(line);
Matcher m = pattern.matcher(line);
if(m.matches()){
String agent = m.group(9).toLowerCase();
if(agent.contains("chrome")){
agent = "chrome";
}else if(agent.contains("safari")){
agent = "safari";
}else if(agent.contains("firefox")){
agent = "firefox";
}else{
agent = "other";
}
Text t = new Text(agent);
context.write(t, one);
}
};
}
static class TomcatReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
protected void reduce(Text key, java.lang.Iterable<IntWritable> value, org.apache.hadoop.mapreduce.Reducer<Text,IntWritable,Text,IntWritable>.Context context) throws IOException ,InterruptedException {
int count = 0;
for(IntWritable v : value){
count = count + v.get();
}
context.write(key, new IntWritable(count));
};
}
public static void main(String[] args) throws Exception {
if(args.length != 2){
System.err.println("参数个数不对");
System.exit(-1);
}
Job job = new Job();
job.setJarByClass(TomcatLog.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClass(TomcatMapper.class);
job.setReducerClass(TomcatReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
日志样例:
Tomcat日志代码
172.16.2.12,-,-,[06/Sep/2011:10:03:13 +0800],GET /icestat/jpivot/toolbar/sort-asc-up.png HTTP/1.1,200,336,-,Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2)
172.16.2.12,-,-,[06/Sep/2011:09:48:17 +0800],GET /icestat/ HTTP/1.1,200,171,http://10.65.11.241:8080/icestat/,Mozilla/5.0 (Windows NT 6.1) AppleWebKit/534.24 (KHTML, like Gecko) Chrome/11.0.696.43 Safari/534.24
把日志放入hdfs
命令行代码
hadoop fs -put icestat_access_log.2011-09-06.txt icestat_access_log.2011-09-06.txt
分析日志
命令行代码
hadoop jar tomcatLog.jar icestat_access_log.2011-09-06.txt output6
查看输出
命令行代码
[root@xxx hadoop-0.20.2]# hadoop fs -cat output6/part-r-00000
11/09/06 00:18:54 WARN conf.Configuration: DEPRECATED: hadoop-site.xml found in the classpath. Usage of hadoop-site.xml is deprecated. Instead use core-site.xml, mapred-site.xml and hdfs-site.xml to override properties of core-default.xml, mapred-default.xml and hdfs-default.xml respectively
11/09/06 00:18:54 WARN fs.FileSystem: "xxx :9000" is a deprecated filesystem name. Use "hdfs://xxx :9000/" instead.
11/09/06 00:18:54 WARN fs.FileSystem: "xxx :9000" is a deprecated filesystem name. Use "hdfs://xxx :9000/" instead.
11/09/06 00:18:54 WARN fs.FileSystem: "xxx :9000" is a deprecated filesystem name. Use "hdfs://xxx :9000/" instead.
chrome 58
firefox 23
other 49
safari 5
http://dsbjoe.iteye.com/blog/1166698
相关文章推荐
- linux 安装jdk tomcat
- 并行类加载——让tomcat玩转双十一
- 图文解说:Nginx+tomcat配置集群负载均衡(转)
- 使用 CAS 在 Tomcat 中实现单点登录
- jvm 类加载器 和 tomcat类加载器
- Apache/Tomcat/JBOSS/Nginx区别
- Tomcat访问日志详细配置
- tomcat、servlet、JSP、JSTL版本支持对应表
- maven中tomcat7-maven-plugin插件的使用
- centos中apache-tomcat的配置
- Myeclipse中tomcat内存溢出的解决方法
- tomcat登录需要身份验证的问题
- Tomcat服务绑定域名的方法
- linux+tomcat下如何发布web程序
- jvm 内存溢出 在myeclipse中加大tomcat的jvm内存 java.lang.OutOfMemoryError: PermGen space
- javaweb学习(1):win10配置javaee开发环境eclipse+tomcat+mysql,web小项目
- tomcat的一些常用的优化技巧
- Weblogic和Tomcat对比
- tomcat启动过程报the JDBC Driver has been forcibly unregistered问题的修复过程
- 利用Tomcat内置的servlet实现文件下载功能