footnote '~/tacrine/mi18.sas';
libname p '~/tacrine';
*------------------------------------------------------------------------------;
* Macro to perform multiple imputation for data with monotone missing pattern ;
* Analysis up to Week 18 ;
*------------------------------------------------------------------------------;
%macro loop;
* compute parameter estimates from conditional regressions based on available data;
proc iml;
use dat1;
read all into xx;
close dat1;
n=nrow(xx); print "Number of observations in monotone pattern="n;
n4=316; * number of observations with observed Week 6, 12, 16 adas_Cog;
n3=340; * number of observations with observed Week 6, 12 adas_Cog;
n2=398; * number of observations with observed Week 6 adas_Cog;
n1=511; * number of observations with observed baseline adas_Cog;
p=4; * number of dependent variables: adas_Cog at Weeks 6, 12, 16, 18;
sumsq=repeat(0,1,p);
int=repeat(1,n1,1);
basy=xx[1:n1,2];
site=xx[1:n1,8:39];
y1=xx[1:n1,42];
y2=xx[1:n1,43];
y3=xx[1:n1,44];
y4=xx[1:n1,45];
dose1=xx[1:n1,46];
dose2=xx[1:n1,47];
dose3=xx[1:n1,48]||xx[1:n1,50];
dose4=xx[1:n1,49]||xx[1:n1,51];
y=y4[1:n4,];
x=int[1:n4,]||basy[1:n4,]||dose4[1:n4,]||y3[1:n4,]||y2[1:n4,]||y1[1:n4,];
fbeta4=inv(t(x)*x)*t(x)*y;
sumsq[1,4]=t(y-x*fbeta4)*(y-x*fbeta4);
sg4=inv(t(x)*x);
print "Coefficients for conditional dist of diff at Wk 18 "fbeta4;
print "(t(X)X)^-1="sg4;
y=y3[1:n3,];
x=int[1:n3,]||basy[1:n3,]||dose3[1:n3,]||y2[1:n3,]||y1[1:n3,];
fbeta3=inv(t(x)*x)*t(x)*y;
sumsq[1,3]=t(y-x*fbeta3)*(y-x*fbeta3);
sg3=inv(t(x)*x);
print "Coefficients for conditional dist of diff at Wk 16 "fbeta3;
print "(t(X)X)^-1="sg3;
y=y2[1:n2,];
x=int[1:n2,]||basy[1:n2,]||dose2[1:n2,]||y1[1:n2,];
fbeta2=inv(t(x)*x)*t(x)*y;
sumsq[1,2]=t(y-x*fbeta2)*(y-x*fbeta2);
sg2=inv(t(x)*x);
print "Coefficients for conditional dist of diff at Wk 12 "fbeta2;
print "(t(X)X)^-1="sg2;
y=y1;
x=int||basy||site||dose1;
fbeta1=inv(t(x)*x)*t(x)*y;
sumsq[1,1]=t(y-x*fbeta1)*(y-x*fbeta1);
sg1=inv(t(x)*x);
print "Coefficients for conditional dist of diff at Wk 6 "fbeta1;
print "(t(X)X)^-1="sg1;
print "Residual sum of squares for the conditional distns="sumsq;
est4=fbeta4||sg4;
est3=fbeta3||sg3;
est2=fbeta2||sg2;
est1=fbeta1||sg1;
* store regression coefficient estimates and residual sum of squares in files;
create rssq from sumsq; append from sumsq; close rssq;
create par1 from est1; append from est1; close par1;
create par2 from est2; append from est2; close par2;
create par3 from est3; append from est3; close par3;
create par4 from est4; append from est4; close par4;
quit;
* missg=dataset containing cases with missing values;
data missg; set dat1;
if miss18=0 then delete;
run;
* invoke macro MIMPUTE to draw values for missing data;
%mimpute;
* rename imputed values with y for adas_cog at Week 6, yy for adas_Cog at Week 12;
* yyy for adas_Cog at Week 16, yyyy for adas_Cog at Week 18;
data imputedz; set imputedz;
array col{40} col1-col40;
array y{40} y1 yy1 yyy1 yyyy1 y2 yy2 yyy2 yyyy2 y3 yy3 yyy3 yyyy3
y4 yy4 yyy4 yyyy4 y5 yy5 yyy5 yyyy5 y6 yy6 yyy6 yyyy6
y7 yy7 yyy7 yyyy7 y8 yy8 yyy8 yyyy8 y9 yy9 yyy9 yyyy9
y10 yy10 yyy10 yyyy10;
do i=1 to 40;
y{i}=col{i};
end;
drop col1-col40 i;
data imputedi; set imputedi;
array col{40} col1-col40;
array y{40} y1 yy1 yyy1 yyyy1 y2 yy2 yyy2 yyyy2 y3 yy3 yyy3 yyyy3
y4 yy4 yyy4 yyyy4 y5 yy5 yyy5 yyyy5 y6 yy6 yyy6 yyyy6
y7 yy7 yyy7 yyyy7 y8 yy8 yyy8 yyyy8 y9 yy9 yyy9 yyyy9
y10 yy10 yyy10 yyyy10;
do i=1 to 40;
y{i}=col{i};
end;
drop col1-col40 i;
data imputedd; set imputedd;
array col{40} col1-col40;
array y{40} y1 yy1 yyy1 yyyy1 y2 yy2 yyy2 yyyy2 y3 yy3 yyy3 yyyy3
y4 yy4 yyy4 yyyy4 y5 yy5 yyy5 yyyy5 y6 yy6 yyy6 yyyy6
y7 yy7 yyy7 yyyy7 y8 yy8 yyy8 yyyy8 y9 yy9 yyy9 yyyy9
y10 yy10 yyy10 yyyy10;
do i=1 to 40;
y{i}=col{i};
end;
drop col1-col40 i;
* merge imputed values with other variables for observations with missing data;
data imissz; merge missg imputedz;
drop d1-d4 ds40_1 ds80_2 ds80_3 ds80_4 ds120_3 ds120_4;
data imissi; merge missg imputedi;
drop d1-d4 ds40_1 ds80_2 ds80_3 ds80_4 ds120_3 ds120_4;
data imissd; merge missg imputedd;
drop d1-d4 ds40_1 ds80_2 ds80_3 ds80_4 ds120_3 ds120_4;
run;
* append imputed data to complete data and store in dataset FINAL;
proc append base=finalz data=imissz;
proc append base=finali data=imissi;
proc append base=finald data=imissd;
run;
* print imputed data with observed data;
proc print data=finald;
title "Complete and multiply imputed data under nearest-dose model";
run;
%mend loop;
*----------------------------------------------------------------------------;
* Macro to draw values for missing data from posterior distributions ;
*----------------------------------------------------------------------------;
%macro mimpute;
proc iml;
title "Multiple imputation for montone missing data up to Week 18";
use missg;
read all into ms;
close missg;
use rssq;
read all into rsumsq;
close rssq;
use par1;
read all into fest1;
use par2;
read all into fest2;
use par3;
read all into fest3;
use par4;
read all into fest4;
close par1;
close par2;
close par3;
close par4;
* p=number of dependent variables;
p=4;
nmiss=nrow(ms); print "Number of observations with missing data="nmiss;
df=repeat(0,p,1); * df=degrees of freedom of chi-squared distribution;
p1=nrow(fest1);
n1=511;
df[1,1]=n1-p1;
p11=p1+1;
fbeta1=fest1[,1];
sg1=fest1[,2:p11];
p2=nrow(fest2);
n2=398;
df[2,1]=n2-p2;
p21=p2+1;
fbeta2=fest2[,1];
sg2=fest2[,2:p21];
p3=nrow(fest3);
n3=340;
df[3,1]=n3-p3;
p31=p3+1;
fbeta3=fest3[,1];
sg3=fest3[,2:p31];
p4=nrow(fest4);
n4=316;
df[4,1]=n4-p4;
p41=p4+1;
fbeta4=fest4[,1];
sg4=fest4[,2:p41];
* m=number of multiple imputes;
m=10;
pm=p*m;
* ymiss stores the m sets of imputed data;
ymissz=repeat(0,nmiss,pm);
ymissi=repeat(0,nmiss,pm);
ymissd=repeat(0,nmiss,pm);
* start multiple imputation;
do ii=1 to m;
seed=1234567;
* draw variance of conditional distributions from inverse chi-squared distribution;
* drawn values stored in sg;
print "--------------------------- Iteration "ii" ---------------------------";
print "Degrees of Freedom of chi-squared distribution ";
print df;
sg=repeat(0,p,1);
do iii=1 to p;
chi2=0;
do ll=1 to df[iii,1];
z=normal(seed);
chi2=chi2+z*z;
end;
sg[iii,1]=rsumsq[1,iii]/chi2;
end;
print "Drawn variances of conditional distributions";
print sg;
* draw regression coefficients from posterior distributions;
* drawn values stored in rbeta1, rbeta2, rbeta3, rbeta4;
z=repeat(0,p1,1);
do iii=1 to p1; z[iii,1]=normal(seed);
end;
rbeta1=t(root(sg[1,1]@sg1))*z+fbeta1;
z=repeat(0,p2,1);
do iii=1 to p2; z[iii,1]=normal(seed); end;
rbeta2=t(root(sg[2,1]@sg2))*z+fbeta2;
z=repeat(0,p3,1);
do iii=1 to p3; z[iii,1]=normal(seed); end;
rbeta3=t(root(sg[3,1]@sg3))*z+fbeta3;
z=repeat(0,p4,1);
do iii=1 to p4; z[iii,1]=normal(seed); end;
rbeta4=t(root(sg[4,1]@sg4))*z+fbeta4;
print "Drawn regression coefficients";
print rbeta1 rbeta2 rbeta3 rbeta4;
* store baseline value and differences in yi, yz and yd;
yi=ms[,2]||ms[,42:45];
yz=ms[,2]||ms[,42:45];
yd=ms[,2]||ms[,42:45];
* take draws from the conditional distribution of missing values;
* on observed variables;
do i=1 to nmiss;
nms=0;
do j=3 to 6;
if ms[i,j]=. then nms=nms+1;
end;
* print "Number of missing values="nms;
k=repeat(0,nms,1);
jj=0;
do j=3 to 6;
if ms[i,j]=. then do; jj=jj+1; k[jj,1]=j-2; end;
end;
* k = contains the positions of the missing values for observation i;
* nms = number of missing values for observation i;
do l=1 to nms;
kk=k[l,1];
kkp1=kk+1;
* compute conditional mean;
* for continuing-dose model;
if kk=1 then conmeani=(1||ms[i,2]||ms[i,8:39]||ms[i,46])*rbeta1;
if kk=2 then conmeani=(1||ms[i,2]||ms[i,47]||yi[i,2])*rbeta2;
if kk=3 then conmeani=(1||ms[i,2]||ms[i,48]||ms[i,50]||yi[i,3]||yi[i,2])*rbeta3;
if kk=4 then
conmeani=(1||ms[i,2]||ms[i,49]||ms[i,51]||yi[i,4]||yi[i,3]||yi[i,2])*rbeta4;
* for zero-dose model;
if kk=1 then conmeanz=(1||ms[i,2]||ms[i,8:39]||0)*rbeta1;
if kk=2 then conmeanz=(1||ms[i,2]||0||yz[i,2])*rbeta2;
if kk=3 then conmeanz=(1||ms[i,2]||0||0||yz[i,3]||yz[i,2])*rbeta3;
if kk=4 then conmeanz=(1||ms[i,2]||0||0||yz[i,4]||yz[i,3]||yz[i,2])*rbeta4;
* for nearest-dose model;
if kk=1 then conmeand=(1||ms[i,2]||ms[i,8:39]||ms[i,52])*rbeta1;
if kk=2 then conmeand=(1||ms[i,2]||ms[i,53]||yd[i,2])*rbeta2;
if kk=3 then conmeand=(1||ms[i,2]||ms[i,54]||ms[i,56]||yd[i,3]||yd[i,2])*rbeta3;
if kk=4 then
conmeand=(1||ms[i,2]||ms[i,55]||ms[i,57]||yd[i,4]||yd[i,3]||yd[i,2])*rbeta4;
sigma2=sg[kk,1];
* print kk"th variable -";
caseno=ms[i,40];
* print "Case No="caseno;
* print "Conditional Mean (zero-dose)="conmeanz;
* print "Conditional Mean (continuing-dose)="conmeani;
* print "Conditional Mean (nearest-dose)="conmeand;
* print "Variance="sigma2;
ym=sqrt(sigma2)*normal(seed);
ymz=ym+conmeanz;
ymi=ym+conmeani;
ymd=ym+conmeand;
* print "Imputed Difference (zero-dose)="ymz;
* print "Imputed Difference (continuing-dose)="ymi;
* print "Imputed Difference (nearest-dose)="ymd;
yz[i,kkp1]=ymz;
yi[i,kkp1]=ymi;
yd[i,kkp1]=ymd;
ymcol=p*(ii-1)+kk;
ymissz[i,ymcol]=ymz+yz[i,1];
ymissi[i,ymcol]=ymi+yi[i,1];
ymissd[i,ymcol]=ymd+yd[i,1];
end; * (l loop);
end; * (i loop);
end; * (ii loop);
* stores imputed values in file: imputedz - imputed data under zero-dose assumption;
* imputedi - imputed data under continuing-dose assumption;
* imputedd - imputed data under nearest-dose assumption;
create imputedz from ymissz; append from ymissz; close imputedz;
create imputedi from ymissi; append from ymissi; close imputedi;
create imputedd from ymissd; append from ymissd; close imputedd;
quit;
%mend mimpute;
/*----------------------------------------------------------------------------*/
/* Main Program */
/*----------------------------------------------------------------------------*/
* read in data and sort by missing patterns;
data dat0; set p.little;
array adas{5} adas_bas adas_6 adas_12 adas_16 adas_18;
array ind{5} indi1-indi5;
array st{32} site1-site12 site14-site22 site24-site34;
do i=1 to 5;
if adas{i}=. then ind{i}=1; else ind{i}=0;
end;
case_no=trial_no*1000+pat_no;
miss18=indi1*10000+indi2*1000+indi3*100+indi4*10+indi5;
d1=adas_6-adas_bas;
d2=adas_12-adas_bas;
d3=adas_16-adas_bas;
d4=adas_18-adas_bas;
ds40_1=0; ds80_2=0; ds80_3=0; ds80_4=0; ds120_3=0; ds120_4=0;
if treatmt=2 then do; ds40_1=1; ds80_2=1; ds80_3=1; ds80_4=1; end;
if treatmt in (3 4) then do; ds40_1=1; ds80_2=1; ds120_3=1; ds120_4=1; end;
do i=1 to 32;
st{i}=0;
if i ge 1 and i le 12 then do; if trial_no=i then st{i}=1; end;
if i ge 13 and i le 21 then do; j=i+1; if trial_no=j then st{i}=1; end;
if i ge 22 then do; j=i+2; if trial_no=j then st{i}=1; end;
end;
keep trial_no treatmt adas_bas adas_6 adas_12 adas_16 adas_18 miss18 case_no
d1-d4 ds40_1 ds80_2 ds80_3 ds80_4 ds120_3 ds120_4
site1-site12 site14-site22 site24-site34;
proc sort; by case_no;
* define nearest-dose indicators for observed doses after drop-out;
data dat2; set p.tacrine;
rds40_1=0; rds80_2=0; rds80_3=0; rds80_4=0; rds120_3=0; rds120_4=0;
if rds_w6 in (40 80) then rds40_1=1;
if rds_w12 in (40 80 120) then rds80_2=1;
if rds_w16 in (40 80) then rds80_3=1;
if rds_w16=120 then rds120_3=1;
if rds_w18 in (40 80) then rds80_4=1;
if rds_w18 in (120 160) then rds120_4=1;
keep rds40_1 rds80_2 rds80_3 rds80_4 rds120_3 rds120_4 case_no;
proc sort; by case_no;
data dat1; merge dat0 dat2; by case_no;
proc sort; by miss18;
run;
proc freq;
title "Missing data pattern for Tacrine data";
title2 "(all patients)";
tables miss18;
proc sort data=dat1; by miss18 treatmt;
proc freq data=dat1; by miss18;
title "Number of patients in each treatment by missing data pattern";
title2 "(all patients)";
tables treatmt;
run;
* retain only cases in monotone missing pattern;
data dat1; set dat1;
if miss18 in (10 100 101 110 1000 1001 1010 1011 1100 10000 10001 10010
10100 11000 11111) then delete;
proc sort; by miss18;
proc freq;
title "Missing data pattern for patients in monotone pattern";
tables miss18;
proc sort data=dat1; by miss18 treatmt;
proc freq data=dat1; by miss18;
title "Number of patients in each treatment by missing data pattern";
title2 "(patients in monotone pattern only)";
tables treatmt;
run;
* compl=dataset containing ONLY complete cases;
data compl; set dat1;
if miss18 ne 0 then delete;
run;
* y{.} = array for 10 sets of imputed data;
* y =imputed value for adas_Cog at Week 6;
* yy =imputed value for adas_Cog at Week 12;
* yyy =imputed value for adas_Cog at Week 16;
* yyyy=imputed value for adas_Cog at Week 18;
* finalz=final dataset containing complete and imputed data under zero-dose assumption;
data finalz; set compl;
array y{40} y1 yy1 yyy1 yyyy1 y2 yy2 yyy2 yyyy2 y3 yy3 yyy3 yyyy3
y4 yy4 yyy4 yyyy4 y5 yy5 yyy5 yyyy5 y6 yy6 yyy6 yyyy6
y7 yy7 yyy7 yyyy7 y8 yy8 yyy8 yyyy8 y9 yy9 yyy9 yyyy9
y10 yy10 yyy10 yyyy10;
do i=1 to 40;
y{i}=.;
end;
drop i d1-d4 ds40_1 ds80_2 ds80_3 ds80_4 ds120_3 ds120_4;
* finali=final dataset containing complete and imputed data under continuing-dose assumption;
data finali; set compl;
array y{40} y1 yy1 yyy1 yyyy1 y2 yy2 yyy2 yyyy2 y3 yy3 yyy3 yyyy3
y4 yy4 yyy4 yyyy4 y5 yy5 yyy5 yyyy5 y6 yy6 yyy6 yyyy6
y7 yy7 yyy7 yyyy7 y8 yy8 yyy8 yyyy8 y9 yy9 yyy9 yyyy9
y10 yy10 yyy10 yyyy10;
do i=1 to 40;
y{i}=.;
end;
drop i d1-d4 ds40_1 ds80_2 ds80_3 ds80_4 ds120_3 ds120_4;
* finald=final dataset containing complete and imputed data under nearest-dose assumption;
data finald; set compl;
array y{40} y1 yy1 yyy1 yyyy1 y2 yy2 yyy2 yyyy2 y3 yy3 yyy3 yyyy3
y4 yy4 yyy4 yyyy4 y5 yy5 yyy5 yyyy5 y6 yy6 yyy6 yyyy6
y7 yy7 yyy7 yyyy7 y8 yy8 yyy8 yyyy8 y9 yy9 yyy9 yyyy9
y10 yy10 yyy10 yyyy10;
do i=1 to 40;
y{i}=.;
end;
drop i d1-d4 ds40_1 ds80_2 ds80_3 ds80_4 ds120_3 ds120_4;
run;
* start iterations;
%loop;
* create SAS dataset containing both complete and;
* imputed data with adjustment for previous observations;
data p.miw18z; set finalz; * imputed data under zero-dose assumption;
data p.miw18i; set finali; * imputed data under continuing-dose assumption;
data p.miw18d; set finald; * imputed data under nearest-dose assumption;
run;