/* 
  Code: two_stage example.sas
  Author: Paul von Hippel
  Version: 1.0
  Date: July 19, 2017
*/
/* This code replicates the first simulation in my article "How many imputations do you need?" */

/* Before anything else, copy the provided data, bmi_observed, into the work directory,
    and %include the macro %mi_combine.
   To do this, run the code below. 
    You will need to change the code to point to the 
    folder on your system that contains the data. */
libname repdata "C:\Users\pvonhippel\Box Sync\More imputations\Supplement\";
data bmi_observed;
 set repdata.bmi_observed;
run;
%include "C:\Users\pvonhippel\Box Sync\More imputations\Supplement\mi_combine.sas";

/* Now the simulation by running all the code below this line. */
%macro mi (imputations=5, indata=, outdata=);
 proc mi data=&indata out=&outdata nimpute=&imputations;
  var c1bmi c2bmi c3bmi c4bmi;
        /* 4 rounds of BMIs . Missing value codes (e.g., -9) were previously set to missing (.) */
 mcmc niter=5 nbiter=10 /*  initial=em (xconv=.01) */;
  /* These MCMC options speed the runtime about 20fold from the defaults of niter=100 nbiter=200. */
  /* Results using the default settings were practically identical but much slower. */
run;
%mend;

%macro analyze (indata=, outests=, imputed=N);
proc surveymeans data=&indata plots=none;
 var c3bmi;
 %if %upcase(&imputed)=Y %then %do;
  by _imputation_;
 %end;
 ods output Statistics=&outests;
run;
%mend;

%macro mi_stages (imputations=, target_sd_se=.01, indata=);
 %let start_time = %sysfunc(TIME()) ;
 %mi (imputations=&imputations, indata=&indata, outdata=mi_data);
 %analyze (indata=mi_data, outests=ests, imputed=Y);
 %mi_combine (inests=ests, outests=mi_ests, print=0,
   est=mean, se=stderr, label=VarName VarLabel, target_sd_se=&target_sd_se);
 %let end_time = %sysfunc(TIME()) ;
 data mi_ests;
  set mi_ests;
  run_time = &end_time - &start_time ;
 run;
%mend;

%macro annotate_stage (pilot_imputations=, stage=);
  data mi_ests;
   set mi_ests;
   stage="&stage";
   pilot_imputations = &pilot_imputations;
   replication=&replication;
  run;
  proc append data=mi_ests base=two_stages force;
  run;
%mend;

%macro two_stages (pilot_imputations=, target_sd_se=, replication=, indata=);
  %mi_stages (imputations=&pilot_imputations, target_sd_se=&target_sd_se, indata=&indata);
  %annotate_stage (pilot_imputations=&pilot_imputations, stage=Pilot);
  %mi_stages (imputations=&recommended_imputations, target_sd_se=&target_sd_se, indata=&indata);
  %annotate_stage (pilot_imputations=&pilot_imputations, stage=Final);
%mend;

%macro output_off;
 /* This speeds runtime and prevents log and output windows from filling and asking the user to clear it. */
 ods graphics off;
 ods exclude all;
 ods noresults;  
 options nonotes nosource nosource2 errors=0 nomprint nosymbolgen;
%mend;

%macro output_on;
 /* Turn the log and output window back on so users can see final output. */
 ods graphics on;
 ods exclude none;
 ods results;
 options notes source source2 errors=20;
%mend;

%macro replicate_two_stages (target_sd_se=.001, replications=2, indata=, suppress_output=Y);
proc datasets;
 delete two_stages;
run;
%do pilot_imputations = 5 %to 20 %by 15;
 %do replication = 1 %to &replications;
  %if %upcase(&suppress_output)=Y %then %do;
   %output_off;
  %end;
  %two_stages (indata=&indata, pilot_imputations=&pilot_imputations, target_sd_se=&target_sd_se, replication=&replication);
  %if %upcase(&suppress_output)=Y %then %do;
   %output_on;
   %put Pilot imputations &pilot_imputations Replication &replication; 
  %end;
 %end;
%end;
data two_stages;
 retain replication pilot_imputations stage target: Var: imputations run_time;
 set two_stages;
 drop lcl ucl t p;
run;
proc print data=two_stages;
run;
%mend;

%macro summarize_results;
%output_off;
proc means data=two_stages mean /* median */ stddev min max stackods;
 var imputations run_time est se df frac_missing frac_missing_lcl frac_missing_ucl;
 class pilot_imputations stage;
 where stage="Final";
 ods output Summary=two_stage_summary (drop=label: nobs);
run;
proc sort data=two_stage_summary;
 by pilot_imputations /* descending stage */;
run;
%output_on;
proc print data=two_stage_summary;
run;
proc transpose data=two_stage_summary out=two_stage_transposed (drop=_label_);
 by pilot_imputations /* descending stage */;
 id variable;
run;
proc print data=two_stage_transposed;
 format imputations run_time 4.0 est se 6.3 df 4.0 frac_missing: 4.2;
run;
%mend;

/******************* Run macro calls below after defining macros above. ******************/

ods trace off;
%analyze (indata=bmi_observed, outests=listwise_ests, imputed=N);
 /* The above command returns listwise deleted estimates. 
    These are not necessary but serve as a comparison. */
%replicate_two_stages (target_sd_se=.001, replications=3, suppress_output=Y, 
 indata=bmi_observed);
 /* The above command simulates 100 runs of the two-stage procedure */
%summarize_results;
