1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
72
73 /* recursion.sas */
74 title "Binary repeated measures on Ana's recursion data";
75
76 data binary;
77 infile '/folders/myfolders/441s18/Lecture/LittleRecursion.data.txt' firstobs=2;
78 input line Participant AgeinMonths Agegroup $ Condition $ Item $ AllTargets;
79 /* Indicator dummy variables. */
80 /* Agegroup: age4 age5 age6*/
81 if AgeGroup = '4yos' then age4 = 1; else age4=0;
82 if AgeGroup = '5yos' then age5 = 1; else age5=0;
83 if AgeGroup = '6yos' then age6 = 1; else age6=0;
84 /* Condition: condA condB condC condE */
85 if Condition = 'A' then condA = 1; else condA = 0;
86 if Condition = 'B' then condB = 1; else condB = 0;
87 if Condition = 'C' then condC = 1; else condC = 0;
88 if Condition = 'E' then condE = 1; else condE = 0;
89 /* Effect coding dummy variables */
90 /* Agegroup: a4 a5 */
91 if AgeGroup = '6yos' then a4 = -1; else a4 = age4;
92 if AgeGroup = '6yos' then a5 = -1; else a5 = age5;
93 /* Condition: cA cB cC */
94 if Condition = 'E' then cA = -1; else cA = condA;
95 if Condition = 'E' then cB = -1; else cB = condB;
96 if Condition = 'E' then cC = -1; else cC = condC;
97 /* Interaction terms */
98 a4cA = a4*cA; a4cB = a4*cB; a4cC = a4*cC;
99 a5cA = a5*cA; a5cB = a5*cB; a5cC = a5*cC;
100
101 /*
102 proc freq;
103 title2 'Checking dummy variables';
104 tables (age4-age6 a4 a5) * AgeGroup / norow nocol nopercent missing;
105 tables (condA -- condE cA cB cC) * Condition
106 / norow nocol nopercent missing;
107 */
108
NOTE: The infile '/folders/myfolders/441s18/Lecture/LittleRecursion.data.txt' is:
Filename=/folders/myfolders/441s18/Lecture/LittleRecursion.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=17Mar2018:15:31:01,
File Size (bytes)=110443
NOTE: 1702 records were read from the infile '/folders/myfolders/441s18/Lecture/LittleRecursion.data.txt'.
The minimum record length was 0.
The maximum record length was 63.
NOTE: SAS went to a new line when INPUT statement reached past the end of a line.
NOTE: The data set WORK.BINARY has 1698 observations and 25 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.00 seconds
109 proc print data=binary (obs=10);
110 var Participant AgeinMonths Agegroup Condition Item AllTargets;
111 run;
NOTE: There were 10 observations read from the data set WORK.BINARY.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.05 seconds
cpu time 0.05 seconds
112
113 proc freq data=binary;
114 tables Condition * (Agegroup Item) / norow nocol nopercent;
115 run;
NOTE: There were 1698 observations read from the data set WORK.BINARY.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.13 seconds
cpu time 0.12 seconds
116
117 proc univariate data=binary plot;
118 title2 'Age in months';
119 var AgeinMonths;
120
NOTE: PROCEDURE UNIVARIATE used (Total process time):
real time 3.72 seconds
cpu time 0.29 seconds
121 proc freq data=binary;
122 title2 'Check relationships with AllTargets (no tests)';
123 tables (Condition Agegroup) * AllTargets / nocol nopercent;
124 run;
NOTE: There were 1698 observations read from the data set WORK.BINARY.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.06 seconds
cpu time 0.07 seconds
125
126 proc logistic;
127 title2 'Logistic regression to get starting values for simple regression';
128 model AllTargets (event=last) = AgeinMonths;
129 run;
NOTE: PROC LOGISTIC is modeling the probability that AllTargets=1.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 1698 observations read from the data set WORK.BINARY.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.08 seconds
cpu time 0.07 seconds
130
131 proc nlmixed ;
132 title2 "Simple regression";
133 title3 "Compare R's beta0hat = -6.21336, beta1hat = 0.06973, sigmasqhat = 0.6957";
134 parms beta0 = -5.7369 beta1 = 0.0653 sigmasq = 0.1;
135 /* Starting values from ordinary logistic regression, start sigmasq low. */
136 L = beta0 + beta1*AgeinMonths + delta; /* Delta is a person shock */
137 expL = exp(L);
138 pi = expL/(1+expL);
139 model AllTargets ~ binomial(1,pi);
140 random delta ~ normal(0,sigmasq) subject=Participant;
141 run;
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: PROCEDURE NLMIXED used (Total process time):
real time 0.32 seconds
cpu time 0.32 seconds
142
143 proc logistic;
144 title2 'Logistic regression to get starting values for AgeGroup by Condition';
145 class Agegroup Condition;
146 model AllTargets (event=last) = Agegroup|Condition;
147 run;
NOTE: PROC LOGISTIC is modeling the probability that AllTargets=1.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: Under full-rank parameterizations, Type 3 effect tests are replaced by joint tests. The joint test for an effect is a test
that all the parameters associated with that effect are zero. Such joint tests might not be equivalent to Type 3 effect tests
under GLM parameterization.
NOTE: There were 1698 observations read from the data set WORK.BINARY.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.12 seconds
cpu time 0.12 seconds
148
149 proc nlmixed;
150 title2 "AgeGroup by Condition with proc nlmixed";
151 /* Starting values from ordinary logistic regression, start sigmasq low. */
152 parms beta0 = -1.6958 beta1 = -0.5607 beta2 = -0.4127
153 beta3 = 0.6441 beta4 = 0.8237 beta5 = -0.4319
154 beta6 = 0.0977 beta7 = -0.0390 beta8 = 0.0493
155 beta9 = 0.0564 beta10 = 0.2039 beta11 = 0.00385 sigmasq = 0.1;
156 L = beta0 + beta1*a4 + beta2*a5
157 + beta3*cA + beta4*cB + beta5*cC
158 + beta6*a4cA + beta7*a4cB + beta8*a4cC
159 + beta9*a5cA + beta10*a5cB + beta11*a5cC
160 + delta; /* Delta is a person shock */
161 expL = exp(L);
162 pi = expL/(1+expL);
163 model AllTargets ~ binomial(1,pi);
164 random delta ~ normal(0,sigmasq) subject=Participant;
165 /* The contrast statement specifies a set of expressions equal to zero under H0.
166 They do not have to be linear! */
167 contrast 'Age group' beta1, beta2;
168 contrast 'Condition' beta3, beta4, beta5;
169 contrast 'AgeGroup by Condition' beta6, beta7, beta8, beta9, beta10, beta11;
170 /* Contrast produces approximate "F" statistics. Multiply by numerator df
171 to get a Wald chi-squared test statistic as in proc logistic. */
172 run;
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: At least one element of the gradient is greater than 1e-3.
NOTE: PROCEDURE NLMIXED used (Total process time):
real time 5.28 seconds
cpu time 5.22 seconds
173
174
175 proc logistic;
176 title2 'Logistic regression to get starting values for AgeinMonths, Condition';
177 model AllTargets (event=last) = condA condB condC condE AgeinMonths / noint;
178 run;
NOTE: PROC LOGISTIC is modeling the probability that AllTargets=1.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 1698 observations read from the data set WORK.BINARY.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.10 seconds
cpu time 0.09 seconds
179
180 proc nlmixed ;
181 title2 "Age in months and experimental Condition";
182 parms beta1 = -5.6202 beta2 = -5.4298 beta3 = -6.6776
183 beta4 = -7.0959 beta5 = 0.0695 sigmasq = 0.1;
184 /* Starting values from ordinary logistic regression, start sigmasq low. */
185 L = beta1*condA + beta2*condB + beta3*condC + beta4*condE
186 + beta5*AgeinMonths + delta; /* Delta is a person shock */
187 expL = exp(L);
188 pi = expL/(1+expL);
189 model AllTargets ~ binomial(1,pi);
190 random delta ~ normal(0,sigmasq) subject=Participant;
191 /* Estimate probabilities, with confidence interval, at mean age
192 of 64.66 months. */
193 estimate 'P(Y=1|A,x=xbar)' exp(beta1+beta5*64.66)/(1+exp(beta1+beta5*64.66));
194 estimate 'P(Y=1|B,x=xbar)' exp(beta2+beta5*64.66)/(1+exp(beta2+beta5*64.66));
195 estimate 'P(Y=1|C,x=xbar)' exp(beta3+beta5*64.66)/(1+exp(beta3+beta5*64.66));
196 estimate 'P(Y=1|E,x=xbar)' exp(beta4+beta5*64.66)/(1+exp(beta4+beta5*64.66));
197 /* Note P(Y=1|A,x=xbar) = P(Y=1|B,x=xbar) iff beta1=beta2 */
198 contrast 'A vs B' beta1-beta2; contrast 'A vs C' beta1-beta3;
199 contrast 'A vs E' beta1-beta4; contrast 'B vs C' beta2-beta3;
200 contrast 'B vs E' beta2-beta4; contrast 'C vs E' beta3-beta4;
201 run;
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: At least one element of the gradient is greater than 1e-3.
NOTE: PROCEDURE NLMIXED used (Total process time):
real time 1.31 seconds
cpu time 1.27 seconds
202
203 /* R code for comparison (without estimation of probabilities)
204
205 rdata = read.table("LittleRecursion.data.txt")
206 attach(rdata)
207 library(lme4) # For lmer function
208 library(car) # For F-tests with p-values, Wald chi-squared tests
209 # Effect coding
210 contrasts(Agegroup) = contr.sum; contrasts(Condition) = contr.sum
211 model = glmer(AllTargets ~ Condition*Agegroup + (1 | Participant), family=binomial, nAGQ=1)
212 summary(model)
213 Anova(model, type="III")
214
215
216 Comments:
217
218 The gmler function in R is good because you don't have to make your own
219 dummy variables, and it has excellent automatic starting values. For big
220 models, this is important. On the other hand, testing and estimation is
221 much nicer with proc nlmixed -- especially for non-linear functions of the
222 parameters. */
223
224
225
226 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
239