*Lift chart table and power curve graph; OPTIONS NOTES SOURCE SOURCE2 MPRINT MLOGIC MERROR SYMBOLGEN; /* Here is your data. EVENT is the event condition - purchase of a product or a risk event as in credit scoring or marketing response to a credit card mailing. The calculations below assume that a high value of the SCORE variable means that there is a high chance that the event condition will be met. If the opposite is true, a slight modification of the code will have to be done. */ data lift_chart; input event score; cards; 1 0.996394248 1 0.993153049 0 0.99264846 1 0.991390846 1 0.990078239 1 0.985069718 1 0.985069718 1 0.985069718 1 0.985069718 1 0.983979034 1 0.983979034 1 0.983979034 1 0.982810063 1 0.982810063 1 0.982810063 1 0.981266345 1 0.981266345 0 0.979903485 1 0.969914804 1 0.969914804 1 0.969914804 1 0.969914804 1 0.969914804 1 0.967753267 1 0.967753267 1 0.965441962 1 0.965441962 1 0.965441962 1 0.965441962 1 0.965441962 1 0.965441962 1 0.962971333 1 0.96033133 1 0.96033133 1 0.959719044 1 0.959719044 0 0.956857553 1 0.956857553 1 0.936160482 0 0.931744456 1 0.927046761 1 0.927046761 1 0.927046761 1 0.927046761 1 0.927046761 1 0.915521847 1 0.915521847 1 0.915521847 1 0.909815699 1 0.909815699 1 0.909815699 1 0.909815699 0 0.869625777 1 0.852510035 0 0.843276297 0 0.843276297 0 0.84115611 1 0.84115611 1 0.84115611 1 0.831352167 1 0.831352167 1 0.831352167 1 0.831352167 1 0.831352167 1 0.831352167 1 0.831352167 1 0.762340303 1 0.762340303 1 0.73851779 0 0.73851779 1 0.73851779 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.735564531 1 0.724454133 1 0.724454133 1 0.676248128 1 0.676248128 1 0.581741884 1 0.576127216 0 0.576127216 1 0.576127216 0 0.576127216 1 0.576127216 1 0.576127216 1 0.576127216 0 0.576127216 1 0.576127216 0 0.576127216 1 0.576127216 0 0.576127216 1 0.576127216 0 0.576127216 1 0.544607051 1 0.544607051 1 0.544607051 0 0.505109013 0 0.505109013 0 0.505109013 1 0.465190337 1 0.465190337 1 0.455327168 1 0.447425344 0 0.381878549 1 0.381878549 1 0.345235366 1 0.345235366 0 0.345235366 1 0.345235366 0 0.316903095 0 0.301758433 0 0.30160739 0 0.286886708 0 0.286886708 0 0.286886708 0 0.283633745 0 0.283633745 0 0.283633745 0 0.283633745 0 0.283633745 0 0.283633745 0 0.272461916 1 0.232007321 1 0.232007321 1 0.232007321 0 0.232007321 0 0.232007321 0 0.232007321 0 0.232007321 0 0.232007321 0 0.223463965 0 0.223463965 1 0.182404424 0 0.171966194 0 0.171966194 0 0.164283093 0 0.164283093 0 0.164283093 0 0.164283093 0 0.164283093 0 0.164283093 0 0.162006938 0 0.152518223 0 0.152518223 0 0.143490099 0 0.143490099 0 0.143490099 0 0.143490099 0 0.141540365 1 0.133060035 0 0.133060035 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.11912665 0 0.111814315 0 0.111814315 0 0.111814315 0 0.111814315 0 0.111814315 0 0.111814315 0 0.104897381 0 0.104897381 1 0.104897381 0 0.104897381 1 0.104897381 1 0.075716078 0 0.075716078 0 0.070854072 0 0.070854072 0 0.070854072 0 0.070854072 0 0.070854072 0 0.065257519 0 0.065257519 0 0.061063851 0 0.061063851 0 0.061063851 0 0.061063851 0 0.061022756 0 0.056231867 0 0.056231867 0 0.049096442 0 0.046562171 0 0.046562171 0 0.046562171 0 0.046562171 0 0.046562171 0 0.046562171 0 0.046562171 0 0.035374656 0 0.035374656 0 0.035374656 0 0.033010602 0 0.033010602 0 0.030799492 0 0.030799492 0 0.030799492 0 0.030799492 0 0.030799492 0 0.030305696 1 0.026799614 0 0.026386616 0 0.026386616 0 0.026386616 0 0.024607901 0 0.024607901 0 0.024607901 0 0.022946263 0 0.022946263 0 0.022946263 0 0.022946263 0 0.022930194 0 0.022930194 0 0.019945283 0 0.014026395 0 0.013069694 0 0.012177442 0 0.011345402 0 0.011345402 0 0.011345402 0 0.011345402 0 0.010569604 0 0.008403544 ; run; *****************************************************************************; * GAINS CHART AND DATA FOR GRAPHICS; *****************************************************************************; %MACRO gui_gains_chart(deciles,data,column1,score); OPTIONS linesize=80 number center date ; data &data; set &data; n_event=1-&column1; obs_=_N_; run; %let column2=n_event; %let mytitle=Validation Results; *At this point, column1=EVENTS, column2=Non-Events; * First sort dataset by whatever the SCORE variable represents with respect to the event; * In this case, the score represents the prob of the event happening; * If we developed this score from a logit model in SAS, we would have used the; * DESCENDING option on the regression statement; proc sort data=&data out=&data; by descending &score ; * Proc means is run to get the totals of some columns - Events & Non-Events; PROC MEANS DATA=&data(KEEP=&column1 &column2) NOPRINT MAXDEC=8; VAR &column1 &column2 ; OUTPUT OUT=SUMS SUM=EVENT_T NEVENT_T; *Get column sums; DATA SUMS; SET SUMS; CALL SYMPUT('EVENT_T',trim(left(EVENT_T))); CALL SYMPUT('NEVENT_T',trim(left(NEVENT_T))); *Now for the main procedure that does all the work; PROC RANK DATA=&data(KEEP= obs_ &SCORE &column1 &column2)OUT=RANKSCOR TIES=LOW GROUPS=&deciles ; VAR obs_; RANKS RANK ; DATA RANKSCOR; SET RANKSCOR; Rank=Rank+1; run; PROC SUMMARY DATA=RANKSCOR; CLASS RANK; VAR &column1 &column2 &SCORE ; OUTPUT OUT=FINAL_TBL SUM=COL1 COL2 MIN(&SCORE)=LOW_SCORE MAX(&SCORE)=HIGH_SCORE ; PROC SORT DATA= FINAL_TBL; BY RANK; DATA FINAL_TBL; SET FINAL_TBL; BY RANK; IF _TYPE_= 1; IF COL1 = . THEN COL1= 0; IF COL1 ^= 0 THEN E_1= COL1/&EVENT_T*100; ELSE E_1= 0; Cumulative_percent_event + E_1; IF COL2 =. THEN COL2= 0; IF COL2 ^= 0 THEN NE_1= COL2/&NEVENT_T*100; ELSE NE_1= 0; Cumulative_percent_nevent + NE_1; RATIO= 0; IF NE_1 > 0 THEN RATIO = ROUND((E_1/NE_1)*100,1) ; KS= 0 ; KS= ABS(Cumulative_percent_event - Cumulative_percent_nevent); KS = ROUND(KS,.1); Percentage = COL2/(COL2+COL1)*100; E_1= ROUND(E_1,.01); NE_1= ROUND(NE_1,.01); data FINAL_TBL; set FINAL_TBL; by rank; proc means data=FINAL_TBL noprint; var ks; output out=ks_1 max=ks_stat; data ks_1; set ks_1; call symput('KS_STAT',trim(left(KS_STAT))); data FINAL_TBL; set FINAL_TBL; call symput('low_score',trim(left(low_score))); call symput('high_score',trim(left(high_score))); Decile+5; run; * This sorts FINAL_TBL from low to high based upon its RANK; PROC SORT DATA= FINAL_TBL; BY RANK; run; title1 "&mytitle"; OPTIONS LS=145 ps=45 center ERROR=1 nodate nonumber; PROC PRINT DATA=FINAL_TBL NOOBS SPLIT='/'; LABEL RANK = ' /PERCENTILE/==========' LOW_SCORE= 'MIN SCORE/ IN/PERCENTILE/==========' HIGH_SCORE= 'MAX SCORE/ IN/PERCENTILE/==========' COL1 = "NUMBER/ OF /EVENTS/========" E_1 = "PERCENT/ OF /EVENTS/========" Cumulative_percent_event = "CUMULATIVE/PERCENT OF/EVENTS/========" COL2 = "NUMBER/ OF / Non-EVENTS/========" NE_1 = "PERCENT/ OF /Non-EVENTS/========" Cumulative_percent_nevent = "CUMULATIVE/PERCENT OF/Non-EVENTS/========" KS = ' DIFFERENCE/IN/CUMULATIVES/==========='; VAR RANK LOW_SCORE HIGH_SCORE COL1 E_1 Cumulative_percent_event COL2 NE_1 Cumulative_percent_nevent KS ; RUN; data _null_; file print noprint notitles ; put #2 @10 "====================================================="; put #3 @10 " *** MODEL SUMMARY INFORMATION ***"; put #4 @10 "====================================================="; put #5 @10 "--> Total number of EVENTS =&EVENT_T"; put #6 @10 "--> Total number of Non-EVENTS=&NEVENT_T"; put #8 @10 "--> The Kolmovorov-Smirnov Value is &KS_STAT"; run; %MEND gui_gains_chart; %gui_gains_chart(20,lift_chart,event,score); %Macro power(mydata, vr1, vr2,good,mytitle,footnote,pdf); %if %upcase(&pdf)='N' %then %do; goptions reset=all device=WIN gunit=pct nodisplay cback=white colors=(blue green red black) ctitle=blue htitle=5 htext=4 ftext=swissb border; %end; %if %upcase(&pdf)='Y' %then %do; goptions reset=all device=sasprtc gunit=pct cback=white colors=(blue green red black) ctitle=blue htitle=5 htext=4 ftext="helvetica" border; %end; data _null_; set SUMS end=last; if last then call symput('random_bads',EVENT_T); if last then call symput('random_int',int(EVENT_T/20)); run; data _null_; set ks_1 end=last; if last then call symput('KS',ks_stat); run; data FINAL_TBL(drop=tot_random random_bads cum_bads tot_freq xyx t_perfect tfreq lagxyx); set FINAL_TBL; *if star='<== KS STAT' then call symput('KS_p',_n_); retain tot_random cum_bads tot_freq t_perfect; random_bads=&random_int/&random_bads; if _n_=1 then tot_random=0; if _n_=1 then cum_bads=0; if _n_=1 then tot_freq=0; if _n_=1 then t_perfect=0; tot_random=(random_bads+tot_random); r=tot_random*100; tot_freq=(_freq_+tot_freq); tfreq=tot_freq; perfect=_freq_; xyx=&random_bads-tfreq; lagxyx=lag(xyx); if xyx<0 then perfect=lagxyx; if lagxyx<0 then perfect=0; if _n_=1 then perfect=_freq_; t_perfect=perfect+t_perfect; if t_perfect<&random_bads then p_per=(t_perfect/&random_bads)*100; if t_perfect>=&random_bads then p_per=100; ks_graph=&ks; run; footnote1 j=r c=black &footnote; footnote2 angle=90; axis1 offset=(5,10) width=5; symbol1 c=red i=j v=dot ; symbol2 c=blue i=j v=dot; symbol3 c=green i=j v=dot; symbol4 c=orange i=j v=dot; symbol5 c=black; symbol6 c=black i=j ; /* Add titles and footnotes */ footnote1 h=3.3 c=black f=simplex ' PERFECT MODEL-EVENT ' c=ORANGE f=marker 'U' c=black f=simplex ' EVENT ' c=RED f=marker 'U' c=black f=simplex ' RANDOM ' c=GREEN f=marker 'U' c=black f=simplex ' NON-EVENT ' c=BLUE f=marker 'U'; %if %upcase(&pdf)='N' %then %do; axis1 label=('% Identified'); axis2 label=(" High Risk ---- Percentiles ---- Low Risk "); proc gplot data=&mydata gout=AGraph3; plot &vr2 * &vr1 &good * &vr1 r * &vr1 p_per * &vr1 / overlay frame name="MYCHART" vaxis=axis1 haxis=axis2; title &mytitle ; title2 'Maximum value KS =' &ks; run; quit; %end; %if %upcase(&pdf)='Y' %then %do; axis1 label=('% Identified'); axis2 label=(" High Risk ---- Percentiles ---- Low Risk "); proc gplot data=&mydata ; plot &vr2 * &vr1 &good * &vr1 r * &vr1 p_per * &vr1 / overlay frame name="MYCHART" vaxis=axis1 haxis=axis2; title &mytitle ; title2 'Maximum KS Value =' &ks; run; quit; %end; footnote1 blank=yes; footnote2 blank=yes; %mend power; %power(FINAL_TBL, decile, Cumulative_percent_event,Cumulative_percent_nevent,'Validation Chart','SunTrust Banks','N'); /* Power Curve */