R系列之一 ----逻辑回归
2016-03-18 19:57
375 查看
require('data.table')
path = 'D:/当前地址模型/addr_sample.txt'
tb=fread(path,sep='\t',verbose = T,na.strings=c("NA","NULL",'NONE'),header = T,encoding='utf-8')
col = sapply(tb, class)
tb=fread(path,sep='\t',verbose = T,na.strings=c("NA","NULL"),header = T,colClasses = col)
colnames(tb) = gsub('curr_homeaddr_sample.','',colnames(tb),fixed = T)
colname = colnames(tb)
colname = colname[2:length(colname)]
tb = tb[,colname,with=F]
class(tb)
resultfull1=glm(tb$homeaddr_flag ~ tb$addr_cnt_woe+tb$addr_wt_woe+tb$addr_v_woe+
tb$ziying_work_cnt_woe+tb$ziying_rest_cnt_woe+
tb$pop_work_cnt_woe+tb$pop_rest_cnt_woe+tb$addr_last_woe+
tb$addr_type_woe+tb$addr_max_wt_woe+tb$addr_max_v_woe+
tb$beyod_cnt_woe+tb$spe_cate_cnt_woe,family=binomial,data=tb)
step(resultfull1,direction="backward")
step(resultfull1,direction="forward")
tb[is.na(tb)] <- 0
result=glm(tb$homeaddr_flag ~ tb$addr_cnt_woe+tb$addr_v_woe+
tb$ziying_rest_cnt_woe+
tb$pop_work_cnt_woe+tb$pop_rest_cnt_woe+tb$addr_last_woe+
tb$addr_type_woe+tb$addr_max_v_woe+
tb$beyod_cnt_woe+tb$spe_cate_cnt_woe,family=binomial,data=tb)
summary(result)
library(ROCR)
library(gplots)
pred=prediction(result$fit,tb$homeaddr_flag)
perf=performance(pred,"tpr","fpr")
plot(perf)
x=performance(pred,"auc")
x1=table(result$fit,tb$homeaddr_flag)
perf=performance(pred,"lift","rpp")
plot(perf)
predict <- predict(result,type='response',newdata=tb)
real <- tb$homeaddr_flag
x2=data.frame(real,predict)
res <- data.frame(real,predict =ifelse(predict>0.5,1,0),predict)
path = 'D:/当前地址模型/addr_sample.txt'
tb=fread(path,sep='\t',verbose = T,na.strings=c("NA","NULL",'NONE'),header = T,encoding='utf-8')
col = sapply(tb, class)
tb=fread(path,sep='\t',verbose = T,na.strings=c("NA","NULL"),header = T,colClasses = col)
colnames(tb) = gsub('curr_homeaddr_sample.','',colnames(tb),fixed = T)
colname = colnames(tb)
colname = colname[2:length(colname)]
tb = tb[,colname,with=F]
class(tb)
resultfull1=glm(tb$homeaddr_flag ~ tb$addr_cnt_woe+tb$addr_wt_woe+tb$addr_v_woe+
tb$ziying_work_cnt_woe+tb$ziying_rest_cnt_woe+
tb$pop_work_cnt_woe+tb$pop_rest_cnt_woe+tb$addr_last_woe+
tb$addr_type_woe+tb$addr_max_wt_woe+tb$addr_max_v_woe+
tb$beyod_cnt_woe+tb$spe_cate_cnt_woe,family=binomial,data=tb)
step(resultfull1,direction="backward")
step(resultfull1,direction="forward")
tb[is.na(tb)] <- 0
result=glm(tb$homeaddr_flag ~ tb$addr_cnt_woe+tb$addr_v_woe+
tb$ziying_rest_cnt_woe+
tb$pop_work_cnt_woe+tb$pop_rest_cnt_woe+tb$addr_last_woe+
tb$addr_type_woe+tb$addr_max_v_woe+
tb$beyod_cnt_woe+tb$spe_cate_cnt_woe,family=binomial,data=tb)
summary(result)
library(ROCR)
library(gplots)
pred=prediction(result$fit,tb$homeaddr_flag)
perf=performance(pred,"tpr","fpr")
plot(perf)
x=performance(pred,"auc")
x1=table(result$fit,tb$homeaddr_flag)
perf=performance(pred,"lift","rpp")
plot(perf)
predict <- predict(result,type='response',newdata=tb)
real <- tb$homeaddr_flag
x2=data.frame(real,predict)
res <- data.frame(real,predict =ifelse(predict>0.5,1,0),predict)
相关文章推荐
- 深入理解Java回收机制
- linux 从一台机器复制文件到另一台linux机器上去
- 【机房重构】——报表
- iOSDay10之OC类和对象
- IT十八掌作业_java基础第十天_集合
- [数学] BZOJ3142 [Hnoi2013]数列
- bzoj3620 似乎在梦中见过的样子 kmp暴力
- RHEL 7.0 yum源
- 89.数字三角形
- Codeforces Beta Round #4 (Div. 2 Only) D. Mysterious Present 记忆化搜索
- winform下的简易播放器
- IOS控件学习之UI ScrollView(7)
- 浅谈ios异步加载
- 【初级C语言】简单的程序设计案例
- GRID-颜色移上去的变化
- JAVA GUI界面组件学习
- poj2438 哈密顿图
- CentOS7.2共享文件夹_安装配置SAMBA服务器
- Eclipse 乱码 解决方案总结(UTF8 -- GBK)
- Improved SLIC 算法学习笔记