Bagging算法在SAS中的实现
2015-10-13 21:14
399 查看
原文地址:Bagging算法在SAS中的实现作者:文穗
%macro bagging(data = , y = , numx = , catx = , ntrees = 10);***********************************************************;* THIS SAS MACRO IS AN ATTEMPT TO IMPLEMENT BAGGING *;* PROPOSED BY LEO BREIMAN (1996) *;* ======================================================= *;* PAMAMETERS: *;* DATA : INPUT SAS DATA TABLE *;* Y : RESPONSE VARIABLE WITH 0/1 VALUE *;* NUMX : A LIST OF NUMERIC ATTRIBUTES *;* CATX : A LIST OF CATEGORICAL ATTRIBUTES *;* NTREES : # OF TREES TO DO THE BAGGING *;* ======================================================= *;* OUTPUTS: *;* 1. A SAS CATALOG FILE NAMED "TREEFILES" IN THE WORKING *;* DIRECTORY CONTAINING ALL SCORING FILES IN BAGGING *;* 2. A LST FILE SHOWING ks STATISTICS OF THE BAGGING *;* CLASSIFIER AND EACH TREE CLASSIFIER *;* ======================================================= *;* CONTACT: *;* WENSUI.LIU@53.COM, LOSS FORECASTING & RISK MODELING *;***********************************************************;options mprint mlogic nocenter nodate nonumber;*** a random seed value subject to change ***;%let seed = 20110613;*** assign a library to the working folder ***;libname _path '';*** generate a series of random seeds ***;data _null_;do i = 1 to &ntrees;random = put(ranuni(&seed) * (10 ** 8), 8.);name = compress("random"||put(i, 3.), ' ');call symput(name, random);end;run;*** clean up catalog files in the library ***;proc datasets library = _path nolist;delete TreeFiles tmp / memtype = catalog;run;quit;proc sql noprint;select count(*) into :nobs from &data where &y in (1, 0);quit;data _tmp1 (keep = &y &numx &catx _id_);set &data;_id_ + 1;run;%do i = 1 %to &ntrees;%put &&random&i;*** generate bootstrap samples for bagging ***;proc surveyselect data = _tmp1 method = urs n = &nobs seed = &&random&iout = sample&i(rename = (NumberHits = _hits)) noprint;run;*** generate data mining datasets for sas e-miner ***;proc dmdb data = sample&i out = db_sample&i dmdbcat = cl_sample&i;class &y &catx;var &numx;target &y;freq _hits;run;*** create a sas temporary catalog to contain sas output ***;filename out_tree catalog "_path.tmp.out_tree.source";*** create decision tree mimicking CART ***;proc split data = db_sample&i dmdbcat = cl_sample&icriterion = giniassess = impuritymaxbranch = 2splitsize = 100subtree = assessmentexhaustive = 0nsurrs = 0;code file = out_tree;input &numx / level = interval;input &catx / level = nominal;target &y / level = binary;freq _hits;run;*** create a perminant sas catalog to contain all tree outputs ***;filename in_tree catalog "_path.TreeFiles.tree&i..source";data _null_;infile out_tree;input;file in_tree;if _n_ > 3 then put _infile_;run;*** score the original data by each tree output file ***;data _score&i (keep = p_&y.1 p_&y.0 &y _id_);set _tmp1;%include in_tree;run;*** calculate KS stat ***;proc printto new print = lst_out;run;ods output kolsmir2stats = _kstmp(where = (label1 = 'KS'));proc npar1way wilcoxon edf data = _score&i;class &y.;var p_&y.1;run;proc printto;run;%if &i = 1 %then %do;data _tmp2;set _score&i;run;data _ks;set _kstmp (keep = nvalue2);tree_id = &i;seed = &&random&i;ks = round(nvalue2 * 100, 0.0001);run;%end;%else %do;data _tmp2;set _tmp2 _score&i;run;data _ks;set _ks _kstmp(in = a keep = nvalue2);if a then do;tree_id = &i;seed = &&random&i;ks = round(nvalue2 * 100, 0.0001);end;run;%end;%end;*** aggregate predictions from all trees in the bag ***;proc summary data = _tmp2 nway;class _id_;output out = _tmp3(drop = _type_ rename = (_freq_ = freq))mean(p_&y.1) = mean(p_&y.0) = mean(&y) = ;run;*** calculate bagging KS stat ***;proc printto new print = lst_out;run;ods output kolsmir2stats = _kstmp(where = (label1 = 'KS'));proc npar1way wilcoxon edf data = _tmp3;class &y;var p_&y.1;run;proc printto;run;data _ks;set _ks _kstmp (in = a keep = nvalue2);if a then do;tree_id = 0;seed = &seed;ks = round(nvalue2 * 100, 0.0001);end;run;proc sort data = _ks;by tree_id;run;proc sql noprint;select max(ks) into :max_ks from _ks where tree_id > 0;select min(ks) into :min_ks from _ks where tree_id > 0;select ks into :bag_ks from _ks where tree_id = 0;quit;*** summarize the performance of bagging classifier and each tree in the bag ***;title "MAX KS = &max_ks, MIN KS = &min_ks, BAGGING KS = &bag_ks";proc print data = _ks noobs;var tree_id seed ks;run;title;proc datasets library = _path nolist;delete tmp / memtype = catalog;run;quit;%mend bagging;%let x1 = tot_derog tot_tr age_oldest_tr tot_open_tr tot_rev_tr tot_rev_debttot_rev_line rev_util bureau_score ltv tot_income;%let x2 = purpose;libname data 'D:SAS_CODEbagging';�gging(data = data.accepts, y = bad, numx = &x1, catx = &x2, ntrees = 10);
相关文章推荐
- 第6章深入使用Hibernate 6.2 继承映射
- 泡泡带你看java script
- iOS数据存取
- 不一样的ViewPager
- oracle安装完初次连接问题
- Mybatis基础框架学习(五)
- Spring容器的工具类
- 【PAT】1088. Rational Arithmetic (20)
- 1002. A+B for Polynomials (25)
- Javascript中的几种继承方式
- POI读写excel实例 (1)
- 解决:子元素设置margin-top,父元素也受影响的问题
- SetThreadAffinityMask的用法
- 【队列项目2 - 建立链队算法库——第7周】
- hdu 3727 Jewel(主席树学习第四弹)
- Swift 播放视频
- python中的pandas包的数据清洗能力
- Cpp--字符串快速查找运用
- 【bzoj3174】【TJOI2013】【拯救小矮人】【贪心+dp】
- ZOJ 1136 Multiply