您的位置:首页 > 其它

R系列之一 ----逻辑回归

2016-03-18 19:57 375 查看
require('data.table')

path = 'D:/当前地址模型/addr_sample.txt'

tb=fread(path,sep='\t',verbose = T,na.strings=c("NA","NULL",'NONE'),header = T,encoding='utf-8')

col = sapply(tb, class)

tb=fread(path,sep='\t',verbose = T,na.strings=c("NA","NULL"),header = T,colClasses = col)

colnames(tb) = gsub('curr_homeaddr_sample.','',colnames(tb),fixed = T)

colname = colnames(tb)

colname = colname[2:length(colname)]

tb = tb[,colname,with=F]

class(tb)

resultfull1=glm(tb$homeaddr_flag ~ tb$addr_cnt_woe+tb$addr_wt_woe+tb$addr_v_woe+

                  tb$ziying_work_cnt_woe+tb$ziying_rest_cnt_woe+

                  tb$pop_work_cnt_woe+tb$pop_rest_cnt_woe+tb$addr_last_woe+

                  tb$addr_type_woe+tb$addr_max_wt_woe+tb$addr_max_v_woe+

                  tb$beyod_cnt_woe+tb$spe_cate_cnt_woe,family=binomial,data=tb)

step(resultfull1,direction="backward")

step(resultfull1,direction="forward")

tb[is.na(tb)] <- 0

result=glm(tb$homeaddr_flag ~ tb$addr_cnt_woe+tb$addr_v_woe+

             tb$ziying_rest_cnt_woe+

             tb$pop_work_cnt_woe+tb$pop_rest_cnt_woe+tb$addr_last_woe+

             tb$addr_type_woe+tb$addr_max_v_woe+

             tb$beyod_cnt_woe+tb$spe_cate_cnt_woe,family=binomial,data=tb)

summary(result)

library(ROCR)

library(gplots)

pred=prediction(result$fit,tb$homeaddr_flag)

perf=performance(pred,"tpr","fpr")

plot(perf)

x=performance(pred,"auc")

x1=table(result$fit,tb$homeaddr_flag)

perf=performance(pred,"lift","rpp")

plot(perf)

predict <- predict(result,type='response',newdata=tb)

real <- tb$homeaddr_flag

x2=data.frame(real,predict)

res <- data.frame(real,predict =ifelse(predict>0.5,1,0),predict)
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: