/* This program deletes variables in your dataset that have a missing percentage > than some threshold. In regression modeling, you might want to eliminate variables that are highly missing. Macro parameters: 1st position = input dataset name 2nd position = output dataset name 3rd position = missing percentage threshold (<=.1) Example Macro Call: %delete_missing(mydata,mydata2,.35); Note - nothing is printed. Check your output dataset name to see if it worked. */ data mydata; input X1 X2 X3; cards; 1 2 3 4 . . 7 . . 1 2 . 4 8 . 7 8 . 1 2 3 4 8 6 7 8 . 5 . . ; %macro delete_missing(mydata,myout,percent); OPTIONS NOTES SOURCE SOURCE2 MPRINT MLOGIC MERROR SYMBOLGEN; /* Use PROC MEANS to count the missing values for each variable */ proc means data=&mydata nmiss noprint; output out=new(drop=_type_ _freq_) nmiss=; run; data _null_; set &mydata end=last; if last then call symput('nobs',_n_); run; data _null_; length name $35 ; set new; array test(*) _numeric_; do i= 1 to dim(test); if test(i) > &nobs*&percent then do; n+1; call vname(test(i),name); call symput('var'||left(put(n,8.)),name); end; call symput('num',left(put(n,8.))); end; data &myout; set &mydata(drop= %do i = 1 %to # &&var&i %end; ); run; %mend delete_missing; %delete_missing(mydata,mydata2,.35);