/* Split Validation Routine using Jackknife Estimation. */ data mydata; input BAD VAR1 VAR2 VAR3 GOOD; cards; 1 20 67 1 0 1 20 67 1 0 1 20 67 1 0 1 20 66 1 0 1 20 66 1 0 1 20 66 1 0 1 30 77 1 0 1 30 77 1 0 1 30 77 1 0 0 30 76 2 1 1 30 76 1 0 1 30 76 1 0 1 30 76 1 0 1 10 49 2 0 1 10 49 2 0 1 10 49 2 0 1 10 49 2 0 1 20 59 2 0 1 20 49 2 0 1 20 49 2 0 1 20 49 2 0 1 10 33 4 0 1 30 59 2 0 1 30 59 2 0 1 30 59 2 0 1 30 59 2 0 0 20 45 5 1 0 20 45 5 1 0 20 45 5 1 0 10 30 3 1 0 10 30 3 1 0 10 30 3 1 0 10 30 3 1 0 10 30 3 1 0 10 30 3 1 0 30 56 2 1 0 30 56 2 1 0 30 56 2 1 1 30 55 2 0 1 30 55 2 0 1 30 55 2 0 1 10 28 2 0 0 10 24 5 1 0 10 24 5 1 0 10 24 5 1 0 10 23 4 1 0 30 49 2 1 0 30 49 2 1 0 30 49 2 1 0 30 49 2 1 0 20 33 4 1 0 20 33 4 1 0 20 33 4 1 0 20 32 5 1 0 20 32 5 1 0 20 32 5 1 0 30 45 5 1 0 30 45 2 1 0 30 45 2 1 0 30 45 2 1 0 20 29 3 1 0 20 29 3 1 0 20 29 3 1 0 20 29 3 1 0 20 29 3 1 0 20 29 3 1 0 20 28 3 1 0 20 28 3 1 0 20 28 3 1 0 20 26 2 1 0 20 26 2 1 0 20 26 2 1 0 20 24 3 1 0 20 24 3 1 0 20 24 3 1 0 20 23 4 1 0 20 23 4 1 0 20 23 4 1 0 20 22 2 1 0 20 21 3 1 0 30 31 2 1 0 30 31 2 1 0 30 31 2 1 0 30 29 3 1 0 30 29 3 1 0 30 29 3 1 0 30 25 4 1 0 30 25 4 1 0 30 25 4 1 0 30 24 3 1 0 30 23 4 1 0 30 23 4 1 0 30 23 4 1 0 30 22 3 1 1 30 22 1 0 0 30 22 3 1 1 30 22 1 0 0 30 22 3 1 1 30 22 1 0 0 30 21 4 1 ; /* DATASET = name of your dataset NSAMPLES = number of times you want to estimate a Jackknife regression PERCENT = percent of original sample you want placed in the holdout sample tables: PARMS will contain the results from the regression SCORE_ALL will contain the holdout samples along with their predicted values JACKKNIFE is the estimation dataset which gets overwritten each time per iteration. */ %macro jackknife_split(DATASET, NSAMPLES,PERCENT); data _null_; set &DATASET end=last; if last then call symput('totobs',_n_); run; %do i=1 %to &nsamples; data JACKKNIFE HOLD_OBS; retain G &percent N &totobs; set &DATASET; if ranuni(357163+&i)<=G/N then do; output hold_obs; G=G-1; end; else output jackknife; N=N-1; run; proc logistic data=JACKKNIFE descending outest=mycoeff; model bad=var1 var2; run; proc append base=parms data=mycoeff force; run; %score; data score_obs; retain iter; set HOLD_OBS; iter=&i; %include 'c:\hardcode_xc8f4.txt'; %myhardcode; run; proc append base=score_all data=score_obs force; run; %end; %mend jackknife_split; /*This program scores the holdout sample. You probabily could do this quicker other ways, but this dynamically uses the results of your regression and saves you from hardcoding your scoring solution. */ %macro score; data coeff (keep=myvar coef); length myvar $32; set mycoeff end=last; where _type_='PARMS'; file 'c:\vars_1x3r5.txt'; put '%macro sel_var;'; array myarray{*} _numeric_; length myvar $32; do j = 1 to dim(myarray); if (myarray{j} ^= .) then do; call vname (myarray{j}, myvar); coef= (myarray{j}); if not(myvar in ("_LNLIKE_","_RMSE_")) then do; if myvar ^="INTERCEPT" then do; put myvar;output; end; end; end; end; if last then put '%mend sel_var;'; data coeff; set coeff end=last; if last then call symput ('nvars',_n_); /* Now we have number of parameters in a macro var */ run; data coeff ; set coeff end=last; file 'c:\hardcode_xc8f4.txt'; if _n_=1 then do; put '%macro myhardcode;'; put 'hscore = '; put ' ' coef ' + '; end; if _n_>1 and _n_ <&nvars then put ' ' myvar ' * ' coef ' + '; if last then do; put ' ' myvar ' * ' coef';'; *Now this turns it into the logistic probability rather than log odds; put "hscore = 1 /(1+exp(-(hscore)));"; put '%mend myhardcode;'; end; run; %mend score; %jackknife_split(MYDATA,5,10);