/********************************************************************/ /********************************************************************/ /* */ /* THE DISTRIB MACRO */ /* */ /********************************************************************/ /* VERSION 1.1 02/15/2008 */ /* */ /* */ /* The DISTRIB macro uses results from the MIXTRAN macro and */ /* estimates the distribution of usual intake for episodically */ /* consumed foods, foods consumed every day, and nutrients (Tooze */ /* et al., 2006, Journal of the American Dietetic Association, 106, */ /* 1575-1587). */ /* */ /* First, the DISTRIB macro reads data sets of parameter estimates */ /* and predicted values output by the MIXTRAN macro. Then, Monte */ /* Carlo simulation of the random effect(s) is used to estimate the */ /* distribution of usual intake. */ /* */ /* The syntax for calling the DISTRIB macro is: */ /* */ /* %macro DISTRIB (seed=, nsim_mc=, modeltype=, pred=, param=, */ /* outlib=, cutpoints=, ncutpnt=, byvar=, */ /* subgroup=, subject=, titles=, food=); */ /* */ /* where: */ /* */ /* "seed" * Specifies the seed for the random number */ /* generator used for the Monte Carlo simulation of */ /* the random effects u1 and u2. */ /* */ /* "nsim_mc" * Specifies the number of repetitions to be used in */ /* the Monte Carlo simulation. For each subject, */ /* one record will be output for each repetition. */ /* */ /* "modeltype" * Specifies the model that was used by the MIXTRAN */ /* macro to prepare the data for the DISTRIB macro. */ /* The value must be the same as the model declared */ /* for the MIXTRAN macro. The possible values are: */ /* "null string" = fit correlated model, */ /* "corr" = fit correlated model, */ /* "nocorr" = fit uncorrelated model, */ /* "amount" = fit amount-only model. */ /* */ /* "pred" * Specifies the name of the data set containing */ /* predicted values calculated in the MIXTRAN macro. */ /* */ /* "param" * Specifies the name of the data set containing the */ /* parameter estimates calculated in the MIXTRAN */ /* macro. */ /* */ /* "outlib" * Specifies the library reference to which the */ /* output data set of distributions will be written. */ /* */ /* "cutpoints" Specifies a list of cutoff points separated by a */ /* space. */ /* */ /* "ncutpnt" Specifies the number of cutoff points. If cutoff */ /* points are given, ncutpnt must also be given. */ /* */ /* "byvar" Specifies a list of by-variables that are in the */ /* data sets "pred" and "param" to indicate that the */ /* MIXTRAN model was fit separately for each */ /* by-group. The DISTRIB macro will produce */ /* estimates of the entire population (not */ /* distributions within each by-group). To obtain */ /* distributions for subpopulations, use the */ /* "subgroup" parameter. */ /* */ /* "subgroup" Specifies one categorical variable used for the */ /* calculation of a separate usual intake */ /* distribution for each subgroup. The distribution */ /* of usual intake will also be calculated for the */ /* overall data set. The subgroup variable must */ /* also be in the call to the MIXTRAN macro because */ /* preparatory calculations are performed in the */ /* macro. */ /* */ /* "subject" (Required when "weekend" is used in MIXTRAN). */ /* Specifies the variable that uniquely identifies */ /* each subject. */ /* */ /* "titles" Specifies the number of title lines (0-4) to be */ /* reserved for the user's titles. The remaining */ /* title lines are used by the macro. The default */ /* value is 0. */ /* */ /* "food" Specifies a name for the analysis, used to label */ /* the output data set. */ /* */ /* Note: * Parameters marked with an asterisk are mandatory, so a */ /* value must be supplied in the macro call. */ /* */ /********************************************************************/ ; %macro DISTRIB (seed=, nsim_mc=, modeltype=, pred=, param=, outlib=, cutpoints=, ncutpnt=, byvar=, subgroup=, subject=, titles=, food=); %let modeltype=%upcase(&modeltype); /* If no Title lines reserved set titles to 0 */ %if &titles = %str() %then %let titles=0; /* set up a macro variable to keep the cut point probabilities if declared */ %if &ncutpnt ne %str() %then %let cutprobs=%str(cutprob1-cutprob&ncutpnt) ; %else %let cutprobs=%str() ; %if &ncutpnt = %str() %then %do ; %if &cutpoints ne %str() %then %put ## user error - # of cutpoints must be given ; %end ; /* set up the subgroup code for use in the percentile calculations */ /* -- if a subgroup is being called -- */ %if &subgroup ne %str( ) %then %do ; %let first_subgroup = %str(if first.&subgroup) ; %let last_subgroup = %str(or last.&subgroup) ; %end; /* of %if &subgroup ne %str( ) %then */ %else %do ; %let first_subgroup = %str( ) ; %let last_subgroup = %str( ) ; %end; /* of else for subgroup */ /* read in the parameters and merge with the predicted data set */ data _predicted2; %if (&byvar = %str()) %then %do; /* allow different estimates by by-group. */ set &pred; if (_n_ = 1) then set ¶m; %end; %else %do; merge &pred ¶m; by &byvar; %end; if _n_ = 1 then do; /* get the weight variable, the number of variance /* groups and the flag indicating a weekend variable /* if any of these were used in the MIXTRAN macro */ call symput('freq_var',FreqName); call symput('Numvargrps',left(Numvargrps)); call symput('wkendflag',weekendflag); end; %if &modeltype ne AMOUNT %then %do; /* prob part available */ stddev_u1 = sqrt(p_var_u1); stddev_u2 = sqrt(a_var_u2); corr_u1u2 = COV_U1U2 / (stddev_u1 * stddev_u2); %end; /* of prob part available */ %else %do; /* no prob part */ stddev_u2 = sqrt(a_var_u2); %end; /* of no prob part*/ run; /* notes to the log */ %put ## In the DISTRIB macro: ; %put ## the model type is &modeltype ; %put ## the food variable is &food ; %if &freq_var ne %str(dummywt) %then %put ## the weight variable name is &freq_var ; %if &byvar ne %str() %then %put ## the by-variable is &byvar ; %if &subgroup ne %str() %then %put ## the subgroup is &subgroup ; %if &wkendflag = %str(1) %then %put ## coded for a weekend variable ; %if &numvargrps ne %str(0) %then %put ## the number of variance groups is &numvargrps ; %put ## predicted data set = &pred ; %put ## parameter data set = ¶m ; %if &ncutpnt ne %str() %then %do; %put ## the number of cutpoints is &ncutpnt; %put ## the cutpoints are &cutpoints ; %end ; /* calculate group weights and total weights. */ proc means data=_predicted2 noprint; var indwts ; output out=_outmeans(keep=totwts totsubjects ) sum=totwts n=totsubjects; run; data _predicted2; set _predicted2 (drop=totwts totsubjects); if (_n_ = 1) then set _outmeans; %if (&subgroup = %str()) %then %do; grpwts = totwts; numsubjects = totsubjects; %end; run; %if (&subgroup ^= %str()) %then %do; proc sort data=_predicted2; by &subgroup; run; proc means data=_predicted2 noprint; by &subgroup; var indwts; output out=_outmeans(keep=&subgroup grpwts numsubjects) sum=grpwts n=numsubjects; run; data _predicted2; merge _predicted2 (drop=grpwts numsubjects) _outmeans; by &subgroup; run; %end; *****************************************; /* ** if a subgroup variable is in use, this macro will create an ** overall category. This will be coded either _overall or ** -255 depending on the type of the variable in the subgroup. ** the intake is calculated in the monte carlo simulation below. ** If subgroup has been identified a second record will be output ** each time with the overall subgroup code and an adjusted weight ** and number of subjects. ** */ /* find the variable type of the subgroup, if any, for the overall category */ %if &subgroup ne %str() %then %do ; %let dsid=%sysfunc(open(work._predicted2,is)); %if &dsid = 1 %then %do; %let subnum=%qsysfunc(varnum(&dsid,&subgroup)); %let subtype=%qsysfunc(vartype(&dsid,&subnum)); %let rc=%sysfunc(close(&dsid)); %end; /* of reading in variable type */ %else put " ERROR: unable to open _predicted2 to check subgroup length"; proc sort data=_predicted2 ; by &subgroup ; run; %end ; /* end of code to create overall category of the subgroup */ ***********************************************************************; /* monte carlo simulations of intake. */ /* if subgroups repeat the monte carlo simulations for overall group */ %if &subgroup ne %str() %then %let _mcsimrep = 2 ; %else %let _mcsimrep = 1 ; %do mc = 1 %to &_mcsimrep ; data _mcsim&mc (drop=grpwts numsubjects) groupinfo&mc(keep=&subgroup grpwts numsubjects ); retain seed &seed; set _predicted2 end=eof; /* Keep only variables needed. Which ones depend whether this is a weekend run */ %if &wkendflag eq %str(1) %then %do ; %if &modeltype ne %str(AMOUNT) %then %let mcp = %str(mc_p mc_p_0 mc_p_1); %else %let mcp = ; keep &subject &subgroup mcsim adjwt weekend mc_t mc_t_0 mc_t_1 &mcp mc_a mc_a_0 mc_a_1 grpwts numsubjects ; %end ; /* of keep for weekends */ %else %do ; %if &modeltype ne %str(AMOUNT) %then %let mcp = %str( mc_p); %else %let mcp = ; keep &subject &subgroup mcsim adjwt mc_t &mcp mc_a grpwts numsubjects ; %end ; /* of keep for not weekends */ /* for the overall data when subgroups are used: */ %if &subgroup ne %str() and &mc = 2 %then %do ; /* code the subgroup to an overall level */ %if &subtype = %str(N) %then %do ; /* numeric variable */ &subgroup = -255 ; %end; %else %if &subtype=%str(C) %then %do ; /* character variable */ &subgroup = %str(_Overall) ; %end; %else %do ; %put Error in recode of subgroups, vartype = &subtype ; %end; /* use the total sum of the weights to get the adjusted weight */ /* and output the new record nsim_mc times */ grpwts=totwts ; numsubjects=totsubjects ; %end ; /* of assigning overall values in mc sims if a subgroup used */ /* output data such as sum of weights and number of subjects */ %if (&subgroup ne %str() and &mc=1) %then %do ; if lag(&subgroup) ne &subgroup then output groupinfo&mc; %end; %else %do ; if _n_ = 1 then output groupinfo&mc ; %end; /* asssign variance of e here */ %if &Numvargrps =%str(2) and &wkendflag = %str(1) %then %do; a_var_e_0 = a_vargrp2; a_var_e_1 = a_vargrp1; %end; %else %if &numvargrps gt %str(2) %then %do; put "User ERROR: The DISTRIB macro does not process more than two variance groups at this time. "; %end; /* calculate an adjusted weight per person based on weight supplied in MIXTRAN */ /* number of repetitions is taken into account */ adjwt=(indwts/&nsim_mc)/grpwts ; /* generate monte carlo random effects. */ do mcsim = 1 to &nsim_mc; /* calculate u1 and u2 */ %if &modeltype ne %str(AMOUNT) %then %do; /* prob part available */ call rannor(seed,u1); call rannor(seed,u2); u2 = corr_u1u2 * u1 + sqrt(1 - corr_u1u2**2) * u2; u1 = stddev_u1 * u1; u2 = stddev_u2 * u2; %end; /* prob part available*/ %else %do; /* no prob part */ call rannor(seed,u2); u2 = stddev_u2 * u2; %end; /* no prob part */ /* for weekend runs calculate intake from x1b1_0, x1b1_1 x2b2_0 & x2b2_1 */ /* for runs with no weekend use x1b1 and x2b2 */ %if &wkendflag eq %str(1) %then %do; %let wk = _0 ; %let wkgr = 2 ; %if &numvargrps ne %str(0) %then %let ve = _0 ; %else %let ve = ; /* for variance of e */ %end; %else %do ; %let wk = ; %let wkgr=1; %let ve = ; %end; %do i = 1 %to &wkgr; %if &modeltype ne %str(AMOUNT) %then %do; /* prob part available */ mc_logit_p&wk = x1b1&wk + u1; mc_p&wk = 1 / (1 + exp(-mc_logit_p&wk)); %end; /* prob part available*/ mc_bca&wk = x2b2&wk + u2; min_bca&wk=((.5*min_amt)**a_lambda-1)/a_lambda ; /* back-transform A (amount on consumption day) to original scale, with Taylor linearization. */ if (a_lambda = 0) then do; ginv_a&wk = exp(mc_bca&wk); d2ginv_a&wk = exp(mc_bca&wk); end; else do; temp&wk = max(a_lambda*mc_bca&wk+1,a_lambda*min_bca&wk+1); ginv_a&wk = temp&wk**(1/a_lambda) ; d2ginv_a&wk = max(0,(1-a_lambda)*temp&wk**(1/a_lambda-2)) ; end; mc_a&wk = ginv_a&wk + d2ginv_a&wk * a_var_e&ve / 2; %if &modeltype ne %str(AMOUNT) %then %do ; /* Probability available */ mc_t&wk = mc_p&wk * mc_a&wk; %end ; %else %do; /* No Probability */ mc_t&wk = mc_a&wk ; %end; %let wk=_1; %if &numvargrps ne %str(0) %then %let ve = _1 ; %else %let ve = ; /* for variance of e */ %end; /* of i = 1 to wkgr */ %if &wkendflag = %str(1) %then %do ; mc_t=((4/7*mc_t_0)+(3/7*mc_t_1)); mc_a=((4/7*mc_a_0)+(3/7*mc_a_1)); %if &modeltype ne %str(AMOUNT) %then %do; mc_p=((4/7*mc_p_0)+(3/7*mc_p_1)); %end; %end; /* output the adjusted weight and the intake */ totsum+adjwt ; /* sum of adjwts = 1 if no subgroups, or # of subgroups*/ output _mcsim&mc; end; /* of monte carlo simulations through _mcsim */ if (eof) then call symput("seed",trim(left(put(seed,12.)))); if (eof) then put "the sum of the adjusted weights = " totsum ; run; %end ; /* of do mc = 1 to _mcsimrep */ /* if subgroups concatenate the subgroups and the overall group */ %if &subgroup ne %str() %then %do ; data _mcsim1; set _mcsim1 _mcsim2; /* add the overall information to the data set groupinfo */ data groupinfo1 ; set groupinfo1 groupinfo2; %end ; /* of concatenation if subgroup */ **************************************************************************; ********* sort by mc_t for calculation of percentiles *********; proc sort data= _mcsim1 ; by &subgroup mc_t; run; *************************************************************************** ** calculate the percentiles by linear interpolation. ***************************************************************************; data outpercentiles; set _mcsim1 end=finalrec; by &subgroup mc_t; retain jperc 1 adjwt_ties wsum calcflag 0 last_wsumplush last_mc_t tpercentile0-tpercentile100 horizline0-horizline100 ; ; %if &ncutpnt ne %str() %then %do ; retain kcut 1 cutcalcflag 0 cutprob1-cutprob&ncutpnt cutvertline1-cutvertline&ncutpnt ; %end ; /* of retain if cutpoints used */ array horizline(101) horizline0-horizline100; array tpercentile(101) tpercentile0-tpercentile100; %if &ncutpnt ne %str() %then %do ; array cutvertline(&ncutpnt) cutvertline1-cutvertline&ncutpnt; array cutprob(&ncutpnt) cutprob1-cutprob&ncutpnt; %end ; ***** read in cutpoints and generate values for the horizontal lines ********** ; if _n_ = 1 then do ; do j = 0 to 100 ; horizline(j+1) = j/100 ; end; %if &ncutpnt ne %str() %then %do ; %do c = 1 %to &ncutpnt ; cutvertline&c = %scan(&cutpoints,&c,%str( )) ; %end ; %end ; /* of reading in the cutpoints */ end ; /* of _n_ = 1 on percentiles calculation data */ %if &subgroup ne %str() %then %do ; /* initialise if subgroup used */ if first.&subgroup then do; do j = 0 to 100; tpercentile(j+1) = . ; end; jperc = 1; adjwt_ties = 0; wsum = 0 ; calcflag = 0 ; last_wsumplush = . ; last_mc_t = . ; sortorder=0; %if &ncutpnt ne %str() %then %do ; kcut = 1 ; cutcalcflag = 0; %do c = 1 %to &ncutpnt ; cutprob&c = . ; %end; %end ; /* of resetting cutprob values & flags to missing if subgroups */ end; %end ; /* initialising if subgroup */ adjwt_ties = adjwt_ties + adjwt; **** Calculate adjusted weights for tied t values; if last.mc_t then do; halfw = adjwt_ties/2; wsumplush = wsum + halfw; *** Need 2 points to calculate slope and intercept. Therefore, skip calculations for first t value ****; do while ( calcflag=1 and ((wsumplush >= horizline(jperc)) or (finalrec=1) &last_subgroup) ); **** use linear interpolation to obtain percentiles; slope = ( wsumplush - last_wsumplush ) / ( mc_t - last_mc_t ); interceptb = wsumplush - slope * mc_t; tpercentile(jperc) = max(0, ( horizline(jperc) - interceptb ) / slope); if jperc = 101 then calcflag = 0; **** All percentiles calculated. Set flag to end loop; else jperc = jperc + 1; end; /* of do while for calculating percentiles*/ ***********************************************************************; *** do while for cut points if requested */ %if &ncutpnt ne %str() %then %do ; *** Need 2 points to calculate slope and intercept. Therefore, skip calculations for first t value ; do while ( cutcalcflag=1 and ((mc_t >= cutvertline(kcut)) or (finalrec=1) &last_subgroup) ); **** use linear interpolation to obtain cutpoint probabilities; slope = ( wsumplush - last_wsumplush ) / ( mc_t - last_mc_t ); interceptb = wsumplush - slope * mc_t; tempcutprob = max(0, ( slope * cutvertline(kcut) + interceptb )); cutprob(kcut) = min(1,tempcutprob); if kcut = &ncutpnt then cutcalcflag = 0; **** All cutpoint probabilities calculated. Set flag to end loop; else kcut = kcut + 1; end; /* of do while for cutpoint probabilities */ %end ; /* of making cutpoint probabilities if requested */ ******************************************************************; if finalrec &last_subgroup then output outpercentiles; else do; *** prepare variable values for calculations involving next t value; adjwt_ties = 0; wsum = wsumplush + halfw; last_wsumplush = wsumplush; last_mc_t = mc_t; calcflag=1; %if &ncutpnt ne %str() %then %do ; if kcut < &ncutpnt then cutcalcflag=1; %end ; end; end; /* of last.mc_t */ run; *************************************************************************** ** end of percentiles and cutpoints code ****************************************************************************; /* combine percentiles, cutpoint probabilities, weighted mean of mc_t and number of subjects /* into one data file and save it. The data set will be saved to "&outlib.". /* the name will be 'descript' followed by the food name in &food and if a /* weight variable was used then also by the name of the weight variable */ proc means data=_mcsim1 noprint; %if &subgroup ne %str() %then %do ; class &subgroup ; %end; var mc_t ; weight adjwt ; output out=_mean mean=mean_mc_t ; /* sort groupinfo if subgroup, and subset means to subgroup level data */ %if &subgroup ne %str() %then %do ; proc sort data=groupinfo1 ; by &subgroup ; data _mean ; set _mean (where=(_type_=1)); %end; %if &freq_var = %str(dummywt) %then %let freq_var= %str(); data &outlib..descript_&food._&freq_var (keep= &subgroup numsubjects mean_mc_t tpercentile0-tpercentile100 &cutprobs); merge _mean (keep=&subgroup mean_mc_t _type_ ) outpercentiles groupinfo1 ; %if &subgroup ne %str() %then %do ; by &subgroup ; if first.&subgroup ; %end ; %else %do ; if _n_ =1 ; /* no subgroups, only need the number of subjects overall */ %end ; run; ** CHECK; Proc print data=&outlib..descript_&food._&freq_var noobs uniform; %if (&subgroup ^= %str()) %then id &subgroup%str(;); var numsubjects mean_mc_t tpercentile1 tpercentile5 tpercentile10 tpercentile15 tpercentile25 tpercentile40 tpercentile50 tpercentile75 tpercentile85 tpercentile90 tpercentile95 tpercentile99 &cutprobs ; title%eval(&titles+1) 'Selected percentiles and cutpoint probabilities from the distribution'; run; %put ## the data set output by DISTRIB will be &outlib..descript_&food._&freq_var ; proc datasets lib=work; delete _mcsim1 /* to keep _mcsim1 for further analysis in this program comment it out */ _predicted2 outpercentiles _mean groupinfo1 ; %if &subgroup ne %str() %then %do ; delete _mcsim2 groupinfo2 ; %end ; /* of deleting work data sets if subgroups */ quit; run; %mend DISTRIB; /* END OF THE DISTRIB MACRO */ /*******************************************************************/