/* recursion.sas */ title "Binary repeated measures on Ana's recursion data"; data binary; infile '/home/brunner0/441s20/LittleRecursion.data.txt' firstobs=2; input line Participant AgeinMonths Agegroup $ Condition $ Item $ AllTargets; /* Indicator dummy variables. */ /* Agegroup: age4 age5 age6*/ if AgeGroup = '4yos' then age4 = 1; else age4=0; if AgeGroup = '5yos' then age5 = 1; else age5=0; if AgeGroup = '6yos' then age6 = 1; else age6=0; /* Condition: condA condB condC condE */ if Condition = 'A' then condA = 1; else condA = 0; if Condition = 'B' then condB = 1; else condB = 0; if Condition = 'C' then condC = 1; else condC = 0; if Condition = 'E' then condE = 1; else condE = 0; /* Effect coding dummy variables */ /* Agegroup: a4 a5 */ if AgeGroup = '6yos' then a4 = -1; else a4 = age4; if AgeGroup = '6yos' then a5 = -1; else a5 = age5; /* Condition: cA cB cC */ if Condition = 'E' then cA = -1; else cA = condA; if Condition = 'E' then cB = -1; else cB = condB; if Condition = 'E' then cC = -1; else cC = condC; /* Interaction terms */ a4cA = a4*cA; a4cB = a4*cB; a4cC = a4*cC; a5cA = a5*cA; a5cB = a5*cB; a5cC = a5*cC; /* proc freq; title2 'Checking dummy variables'; tables (age4-age6 a4 a5) * AgeGroup / norow nocol nopercent missing; tables (condA -- condE cA cB cC) * Condition / norow nocol nopercent missing; */ proc print data=binary (obs=10); var Participant AgeinMonths Agegroup Condition Item AllTargets; run; proc freq data=binary; tables Condition * (Agegroup Item) / norow nocol nopercent; run; proc univariate data=binary plot; title2 'Age in months'; var AgeinMonths; proc freq data=binary; title2 'Check relationships with AllTargets (no tests)'; tables (Condition Agegroup) * AllTargets / nocol nopercent; run; proc logistic; title2 'Logistic regression to get starting values for simple regression'; model AllTargets (event=last) = AgeinMonths; run; proc nlmixed ; title2 "Simple regression"; title3 "Compare R's beta0hat = -6.21336, beta1hat = 0.06973, sigmasqhat = 0.6957"; parms beta0 = -5.7369 beta1 = 0.0653 sigmasq = 0.1; /* Starting values from ordinary logistic regression, start sigmasq low. */ L = beta0 + beta1*AgeinMonths + delta; /* Delta is a person shock */ expL = exp(L); pi = expL/(1+expL); model AllTargets ~ binomial(1,pi); random delta ~ normal(0,sigmasq) subject=Participant; run; proc logistic; title2 'Logistic regression to get starting values for AgeGroup by Condition'; class Agegroup Condition; model AllTargets (event=last) = Agegroup|Condition; run; proc nlmixed; title2 "AgeGroup by Condition with proc nlmixed"; /* Starting values from ordinary logistic regression, start sigmasq low. */ parms beta0 = -1.6958 beta1 = -0.5607 beta2 = -0.4127 beta3 = 0.6441 beta4 = 0.8237 beta5 = -0.4319 beta6 = 0.0977 beta7 = -0.0390 beta8 = 0.0493 beta9 = 0.0564 beta10 = 0.2039 beta11 = 0.00385 sigmasq = 0.1; L = beta0 + beta1*a4 + beta2*a5 + beta3*cA + beta4*cB + beta5*cC + beta6*a4cA + beta7*a4cB + beta8*a4cC + beta9*a5cA + beta10*a5cB + beta11*a5cC + delta; /* Delta is a person shock */ expL = exp(L); pi = expL/(1+expL); model AllTargets ~ binomial(1,pi); random delta ~ normal(0,sigmasq) subject=Participant; /* The contrast statement specifies a set of expressions equal to zero under H0. They do not have to be linear! */ contrast 'Age group' beta1, beta2; contrast 'Condition' beta3, beta4, beta5; contrast 'AgeGroup by Condition' beta6, beta7, beta8, beta9, beta10, beta11; /* Contrast produces approximate "F" statistics. Multiply by numerator df to get a Wald chi-squared test statistic as in proc logistic. */ run; proc logistic; title2 'Logistic regression to get starting values for AgeinMonths, Condition'; model AllTargets (event=last) = condA condB condC condE AgeinMonths / noint; run; proc nlmixed ; title2 "Age in months and experimental Condition"; parms beta1 = -5.6202 beta2 = -5.4298 beta3 = -6.6776 beta4 = -7.0959 beta5 = 0.0695 sigmasq = 0.1; /* Starting values from ordinary logistic regression, start sigmasq low. */ L = beta1*condA + beta2*condB + beta3*condC + beta4*condE + beta5*AgeinMonths + delta; /* Delta is a person shock */ expL = exp(L); pi = expL/(1+expL); model AllTargets ~ binomial(1,pi); random delta ~ normal(0,sigmasq) subject=Participant; /* Estimate probabilities, with confidence interval, at mean age of 64.66 months. */ estimate 'P(Y=1|A,x=xbar)' exp(beta1+beta5*64.66)/(1+exp(beta1+beta5*64.66)); estimate 'P(Y=1|B,x=xbar)' exp(beta2+beta5*64.66)/(1+exp(beta2+beta5*64.66)); estimate 'P(Y=1|C,x=xbar)' exp(beta3+beta5*64.66)/(1+exp(beta3+beta5*64.66)); estimate 'P(Y=1|E,x=xbar)' exp(beta4+beta5*64.66)/(1+exp(beta4+beta5*64.66)); /* Note P(Y=1|A,x=xbar) = P(Y=1|B,x=xbar) iff beta1=beta2 */ contrast 'A vs B' beta1-beta2; contrast 'A vs C' beta1-beta3; contrast 'A vs E' beta1-beta4; contrast 'B vs C' beta2-beta3; contrast 'B vs E' beta2-beta4; contrast 'C vs E' beta3-beta4; run; /* R code for comparison (without estimation of probabilities) rdata = read.table("LittleRecursion.data.txt") attach(rdata) library(lme4) # For lmer function library(car) # For F-tests with p-values, Wald chi-squared tests # Effect coding contrasts(Agegroup) = contr.sum; contrasts(Condition) = contr.sum model = glmer(AllTargets ~ Condition*Agegroup + (1 | Participant), family=binomial, nAGQ=1) summary(model) Anova(model, type="III") Comments: The gmler function in R is good because you don't have to make your own dummy variables, and it has excellent automatic starting values. For big models, this is important. On the other hand, testing and estimation is much nicer with proc nlmixed -- especially for non-linear functions of the parameters. */