/* Jackknife resampling with logistic regression. Warning: this routine will run a regression equal to the number of observations in your dataset!!! */ data mydata; input BAD VAR1 VAR2 VAR3 GOOD; cards; 1 20 67 1 0 1 20 67 1 0 1 20 67 1 0 1 20 66 1 0 1 20 66 1 0 1 20 66 1 0 1 30 77 1 0 1 30 77 1 0 1 30 77 1 0 0 30 76 2 1 1 30 76 1 0 1 30 76 1 0 1 30 76 1 0 1 10 49 2 0 1 10 49 2 0 1 10 49 2 0 1 10 49 2 0 1 20 59 2 0 1 20 49 2 0 1 20 49 2 0 1 20 49 2 0 1 10 33 4 0 1 30 59 2 0 1 30 59 2 0 1 30 59 2 0 1 30 59 2 0 0 20 45 5 1 0 20 45 5 1 0 20 45 5 1 0 10 30 3 1 0 10 30 3 1 0 10 30 3 1 0 10 30 3 1 0 10 30 3 1 0 10 30 3 1 0 30 56 2 1 0 30 56 2 1 0 30 56 2 1 1 30 55 2 0 1 30 55 2 0 1 30 55 2 0 1 10 28 2 0 0 10 24 5 1 0 10 24 5 1 0 10 24 5 1 0 10 23 4 1 0 30 49 2 1 0 30 49 2 1 0 30 49 2 1 0 30 49 2 1 0 20 33 4 1 0 20 33 4 1 0 20 33 4 1 0 20 32 5 1 0 20 32 5 1 0 20 32 5 1 0 30 45 5 1 0 30 45 2 1 0 30 45 2 1 0 30 45 2 1 0 20 29 3 1 0 20 29 3 1 0 20 29 3 1 0 20 29 3 1 0 20 29 3 1 0 20 29 3 1 0 20 28 3 1 0 20 28 3 1 0 20 28 3 1 0 20 26 2 1 0 20 26 2 1 0 20 26 2 1 0 20 24 3 1 0 20 24 3 1 0 20 24 3 1 0 20 23 4 1 0 20 23 4 1 0 20 23 4 1 0 20 22 2 1 0 20 21 3 1 0 30 31 2 1 0 30 31 2 1 0 30 31 2 1 0 30 29 3 1 0 30 29 3 1 0 30 29 3 1 0 30 25 4 1 0 30 25 4 1 0 30 25 4 1 0 30 24 3 1 0 30 23 4 1 0 30 23 4 1 0 30 23 4 1 0 30 22 3 1 1 30 22 1 0 0 30 22 3 1 1 30 22 1 0 0 30 22 3 1 1 30 22 1 0 0 30 21 4 1 ; %macro jackknife(DATASET); data _null_; set &dataset end=last; if last then call symput('totobs',_n_); run; %do i=1 %to &totobs; data JACKKNIFE HOLD_1_OBS; set &DATASET; if _n_=&i then output HOLD_1_OBS; else output JACKKNIFE; run; proc logistic data=JACKKNIFE noprint descending outest=mycoeff; model bad=var1 var2 var3; run; proc append base=parms data=mycoeff force; run; %score; data score_1_obs; set HOLD_1_OBS; %include 'c:\hardcode_xc8f4.txt'; %myhardcode; run; proc append base=score_all data=score_1_obs force; run; %end; %mend jackknife; /*This program scores the holdout sample. You probabily could do this quicker other ways, but this dynamically uses the results of your regression and saves you from hardcoding your scoring solution. */ %macro score; data coeff (keep=myvar coef); length myvar $32; set mycoeff end=last; where _type_='PARMS'; file 'c:\vars_1x3r5.txt'; put '%macro sel_var;'; array myarray{*} _numeric_; length myvar $32; do j = 1 to dim(myarray); if (myarray{j} ^= .) then do; call vname (myarray{j}, myvar); coef= (myarray{j}); if not(myvar in ("_LNLIKE_","_RMSE_")) then do; if myvar ^="INTERCEPT" then do; put myvar;output; end; end; end; end; if last then put '%mend sel_var;'; data coeff; set coeff end=last; if last then call symput ('nvars',_n_); /* Now we have number of parameters in a macro var */ run; data coeff ; set coeff end=last; file 'c:\hardcode_xc8f4.txt'; if _n_=1 then do; put '%macro myhardcode;'; put 'hscore = '; put ' ' coef ' + '; end; if _n_>1 and _n_ <&nvars then put ' ' myvar ' * ' coef ' + '; if last then do; put ' ' myvar ' * ' coef';'; *Now this turns it into the logistic probability rather than log odds; put "hscore = 1 /(1+exp(-(hscore)));"; put '%mend myhardcode;'; end; run; %mend score; %jackknife(MYDATA);