ggplot2画图工作中的代码总结
2015-10-13 21:39
519 查看
工作中经常要ggplot各种图形,下面是这几个月用ggplot画图的一些代码,最经常碰到的情况就是各个类别下的频率,在柱状图中每个分类上显示百分比,各个类别对应的字体要倾斜,各个图的颜色什么的,总之一大堆的事情,下面就是ggplot包中的针对这些技巧的代码,数据用的R软件自带的mpg数据 以字段class为例,class是汽车的类型,具体上代码:
library(ggplot2) library(reshape) test<-data.frame(table(mpg$class)) test<-rename(test,c(Var1="cars",Freq="freq")) labels<-paste(round((sort(test$freq)/sum(test$freq))*100,2),"%",sep="") #reorder是重新排序坐标条数,fill是改变柱状图颜色,width改变柱形图宽度,geom_text是标签 p<-ggplot(test,aes(x=reorder(cars,freq),y=freq))+geom_bar(stat="identity",fill = "green",width=0.5)+ geom_text(label=labels,colour = "blue", vjust=-1) #theme(axis.text.x=element_text(angle=30,colour="red")是改变柱状图的的下标的顺序 p+theme(axis.text.x=element_text(angle=30,colour="red"))+theme(axis.text.x=element_text(angle=30,colour="red"))+scale_fill_manual(values=c("green","red")) #coord_flip()是改变横向和众向坐标方向 p+labs(x="车型号",y="频率",title = "生产、各流通级别、终端流通量和纯销比\n")+coord_flip() #南丁格尔玫瑰图: ggplot(test,aes(cars,freq))+geom_bar(stat="identity",fill = "green")+coord_polar()+theme(legend.position="none") 众筹用户画像 library(RMySQL) library(ggplot2) library(reshape) # 文件操作 # list.files(pattern=("\\.pdf$")) # file.remove( list.files(pattern=("\\.pdf$"))) # file.exists("uv走势图total.pdf") conn <- dbConnect(MySQL(), dbname = "pms", username="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333) query<-dbSendQuery(conn,"select * from ((select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='pc') pc group by insert_date,site order by insert_date,site ) union (select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='m' ) m group by insert_date,site order by insert_date,site) union ( select insert_date,'t' as site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit where page_url not like '%zhongchouban%' and insert_time is NOT NULL ) t group by insert_date order by insert_date)) c order by insert_date , site desc;") result <- fetch(query,n=-1) dbDisconnect(conn) head(result) # pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE) p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site)) p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray")) + scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30,color='black')) # dev.off() # pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE) p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site)) p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+ scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30)) # dev.off() # pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE) p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site)) p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+ scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30)) # dev.off # pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE) p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site)) p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+ scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30)) 网站pv uv分析走势图 library(RMySQL) library(ggplot2) library(reshape) # 文件操作 # list.files(pattern=("\\.pdf$")) # file.remove( list.files(pattern=("\\.pdf$"))) # file.exists("uv走势图total.pdf") conn <- dbConnect(MySQL(), dbname = "pms", username="gaoyang922", password="gaoyang922@123456!",host="10.10.109.62",port=1333) query<-dbSendQuery(conn,"select * from ((select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='pc') pc group by insert_date,site order by insert_date,site ) union (select insert_date,site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit where page_url not like '%zhongchouban%' and insert_time is NOT NULL and site ='m' ) m group by insert_date,site order by insert_date,site) union ( select insert_date,'t' as site,count(distinct(label)) as UV,count(distinct(pindex)) as PV from( SELECT date_format(left(insert_time,8),'%Y-%m-%d') as insert_date,site, label, concat(label,sessionid,visit_time) as pindex FROM tracker.hbase_visit where page_url not like '%zhongchouban%' and insert_time is NOT NULL ) t group by insert_date order by insert_date)) c order by insert_date , site desc;") result <- fetch(query,n=-1) dbDisconnect(conn) head(result) # pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE) p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site)) p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray")) + scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30,color='black')) # dev.off() # pdf("D:\\Users\\zhoumeixu204\\Desktop\\uv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE) p_uv<-ggplot(result,aes(x=insert_date,y=UV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site)) p_uv+xlab("日期")+ylab("uv量")+ggtitle("8月18至9月20日每日uv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+ scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30)) # dev.off() # pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图.pdf",width=23,height=10,family="GB1",onefile=FALSE) p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+facet_grid(site~.)+geom_line(aes(group=site)) p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+ scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30)) # dev.off # pdf("D:\\Users\\zhoumeixu204\\Desktop\\pv走势图total.pdf",width=23,height=10,family="GB1",onefile=FALSE) p_pv<-ggplot(result,aes(x=insert_date,y=PV,color=site))+geom_point(alpha=1)+geom_line(aes(group=site)) p_pv+xlab("日期")+ylab("pv量")+ggtitle("8月18至9月20日每日pv走势图")+theme(panel.background = element_rect(fill = "transparent", color = "gray"))+ scale_colour_hue("渠道",labels=c("m站","pc","全部"))+theme(axis.text.x=element_text(angle=30)) R机器学习实战书本ggplot图汇总 library(reshape) library(ggplot2) data.file<-read.csv("D:\\ML_for_Hackers-master\\02-Exploration\\data\\01_heights_weights_genders.csv",stringsAsFactors = F,sep=",",head=T) heigths<-with(data.file,Height) tapply(data.file$Height,data.file$Gender,mean) aggregate(data.file[,2:3],list(data.file$Gender),mean) data.file.melt<-melt(data.file,id="Gender") result<-cast(Gender~variable,data=data.file.melt,mean,margins="grand_row") result<-cast(Gender~variable,data=data.file.melt,mean,subset=variable %in% c("Height"),margins="grand_row") result<-ddply(data.file,.(Gender),nrow) quantile(heigths,probs=seq(0,1,0.2)) quantile(data.file$Height,probs=seq(0,1,0.2)) quantile(data.file$Height,probs=c(0.975,0.225)) ggplot(data.file,aes(x=Height))+geom_histogram(binwidth=1) ggplot(data.file,aes(x=Height))+geom_histogram(binwidth=5) #fill是颜色填充的效果 ggplot(data.file,aes(x=Height,y=Weight,fill=Gender))+geom_point()+facet_grid(Gender~.) #散点图 ggplot(data.file[1:800,],aes(x=Height,y=Weight))+geom_point()+geom_smooth() #增加平滑曲线 小于1000数目 ggplot(data.file,aes(x=Height,y=Weight))+geom_point(aes(color=Gender,alpha))+geom_smooth() ggplot(data.file,aes(x=Height,y=Weight,color=factor(Gender)))+geom_point() #颜色按到Gender的不同显示不同 heights.weights <- transform(data.file, Male = ifelse(Gender == 'Male', 1, 0)) logit.model <- glm(Male ~ Weight + Height, data = heights.weights, family = binomial(link = 'logit')) ggplot(data.file, aes(x = Height, y = Weight)) +geom_point(aes(color = Gender, alpha = 0.25)) +scale_alpha(guide = "none") + scale_color_manual(values = c("Male" = "black", "Female" = "gray")) +theme_bw() +stat_abline(intercept = -coef(logit.model)[1] / coef(logit.model)[2], slope = - coef(logit.model)[3] / coef(logit.model)[2], geom = 'abline', color = 'black') #散点图 head(mpg) p<-ggplot(mpg,aes(x=cty,y=hwy))+geom_point(aes(color=class,size=displ))+stat_smooth(method='loess') #数据超过1000stat_smooth用method=‘loess’来拟合 p+labs(x="城市",y="耗油量",title="汽车油耗与型号") #加标题 x、y轴加标签 p+facet_wrap(~year,ncol=2)+labs(xlab="城市",ylab="耗油量",title="汽车油耗与型号") #在两幅图显示,表示两列 #条形图 class2<-mpg$class;class2<-reorder(class2,class2,length) mpg$class2<-class2 ggplot(mpg,aes(x=class2))+geom_bar(aes(fill=class2)) #在这里fill是调颜色的作用 class2<-mpg$class;class2<-reorder(class2,class2,length) #柱形图按到频数高低画图 ggplot(mpg,aes(class2,fill=factor(year)))+geom_bar(position = 'stack') #在一起按year分开,纵向方向 ggplot(mpg,aes(class2,fill=factor(year)))+geom_bar(position = 'dodge') #在一起按year分卡,横向分开 ggplot(mpg,aes(class2,fill=factor(year)))+geom_bar(position = 'stack')#在两两页显示 #饼图 ggplot(mpg,aes(x=factor(1),fill=factor(class)))+geom_bar(width=1)+coord_polar(theta="y") #饼图原始的画图更美观 #箱线图 ggplot(mpg,aes(class,hwy,fill=class))+geom_boxplot() # 散点图用线连接起来 ggplot(guss.accuracy,aes(x=Guss,y=Error))+geom_point()+geom_line() #ggsave(plot=, filename=) plot为要保存的图像名 filename为要保存的图像路径 # 同一幅图里面显示分类数据的散点图连线 set.seed(1) x<-seq(0,1,by=0.01) y<-sin(2*pi*x)+rnorm(length(x),0,1) n<-length(x) indices<-sort(sample(1:n,round(0.5*n))) training.x<-x[indices] training.y<-y[indices] test.x<-x[-indices] test.y<-y[-indices] training.df<-data.frame(X=training.x,Y=training.y) test.df<-data.frame(X=test.x,Y=test.y) rmse<-function(y,h){ sqrt(mean((y-h)^2)) } performance<-data.frame() for(d in 1:12){ poly.fit<-lm(Y~poly(X,degree=d),data=training.df) performance<-rbind(performance,data.frame(Degree=d,Data='Training', RMSE=rmse(training.y,predict(poly.fit)))) performance<-rbind(performance,data.frame(Degree=d,Data='Test', RMSE=rmse(test.y,predict(poly.fit,newdata=test.df)))) } ggplot(performance,aes(x=Degree,y=RMSE,linetype=Data))+geom_point(aes(color=Data))+geom_line() g<-ggplot(data=bidpx1,aes=(x=as.POSIXct(tradaetime,format="%Y%m%d%H%M%s"),y=bidpx1))+geom_line(aes(group=securityid,colours=securityid)) +xlab('tradetime')+ylab('bidpx1') #同一个图里面散点图连用直线连接两条数据,以securityid分开 # p=ggplot(data.frame(x,y),aes(x,y,label = colnames(watervoles))) # p+geom_point(shape=16,size=3,colour='red')+ # geom_text(hjust=-0.1,vjust=0.5,alpha=0.5)
相关文章推荐
- 第116讲:Hadoop集群之安装Java、创建Hadoop用户、配置SSH等实战学习笔记
- Python装饰器
- python中的支持向量机实现
- 使用 Python 在 Caché 和 Sql Server 之间同步数据
- .NET环境下的DPAPI加密编程
- python中实现logistics回归
- 反射(java基础)
- javaSE 学习笔记
- python中朴素贝叶斯代码的实现
- Django Singal
- jdk配置
- java23中设计模式之工厂模式
- [JAVA基础]JAVA中为什么引入范型?
- python中sklearn机器学习实现的博客
- Python 3 读取不同编码的文本文件
- 【进阶——最小费用最大流】hdu 1533 Going Home (费用流)Pacific Northwest 2004
- Python 3 读取不同编码的文本文件
- java将文本内容读入到List中
- 利用python手动写最小二乘估计
- C++ string简介及常见用法