1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
NOTE: ODS statements in the SAS Studio environment may disable some output features.
73
74 /* potato2018.sas */
75 title 'Rotten Potatoes: Factorial ANOVA several different ways';
76
77 proc format;
78 value tfmt 1 = '10 Degrees' 2 = '16 Degrees';
NOTE: Format TFMT has been output.
79 value ofmt 1 = '2%' 2 = '6%' 3 = '10%';
NOTE: Format OFMT has been output.
80
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.03 seconds
cpu time 0.00 seconds
81 data fries;
82 infile '/folders/myfolders/441s18/Lecture/potato.data.txt' firstobs=2;
83 /* Skip the header that R uses. */
84 input id bact temp oxygen rot;
85 label bact = 'Bacteria Type'
86 temp = 'Storage Temperature'
87 oxygen = 'Oxygen level';
88 format temp tfmt.; format oxygen ofmt.;
89
NOTE: The infile '/folders/myfolders/441s18/Lecture/potato.data.txt' is:
Filename=/folders/myfolders/441s18/Lecture/potato.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=March 01, 2018 13:19:01,
File Size (bytes)=2037
NOTE: 54 records were read from the infile '/folders/myfolders/441s18/Lecture/potato.data.txt'.
The minimum record length was 35.
The maximum record length was 35.
NOTE: The data set WORK.FRIES has 54 observations and 5 variables.
NOTE: DATA statement used (Total process time):
real time 0.02 seconds
cpu time 0.02 seconds
90 proc freq data=fries;
91 tables oxygen*temp*bact / norow nocol nopercent;
92 run;
NOTE: There were 54 observations read from the data set WORK.FRIES.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.15 seconds
cpu time 0.14 seconds
93
94 proc glm data=fries;
95 title2 'Three-way ANOVA with proc glm';
96 class bact temp oxygen;
97 model rot=temp|bact|oxygen; /* All main effects and interactions */
98 means temp|bact; /* Based on the tests */
99 run;
NOTE: Means from the MEANS statement are not adjusted for other terms in the model. For adjusted means, use the LSMEANS statement.
100
101 /* Want to test everything involving oxygen, all at once.
102 Null hypothesis is that oxygen level has no effect within ANY
103 combination of temperature and bacteria type. This is
104 sometimes called "conditional independence." Do it with
105 effect coding and proc reg. */
106
NOTE: PROCEDURE GLM used (Total process time):
real time 5.20 seconds
cpu time 0.62 seconds
107 data mashed;
108 set fries;
109 /* Effect coding, with interactions */
110 if bact = 1 then b1 = 1;
111 else if bact = 2 then b1 = 0;
112 else if bact = 3 then b1 = -1;
113 if bact = 1 then b2 = 0;
114 else if bact = 2 then b2 = 1;
115 else if bact = 3 then b2 = -1;
116
117 if temp = 1 then t = 1;
118 else if temp = 2 then t = -1;
119
120 if oxygen = 1 then ox1 = 1;
121 else if oxygen = 2 then ox1 = 0;
122 else if oxygen = 3 then ox1 = -1;
123 if oxygen = 1 then ox2 = 0;
124 else if oxygen = 2 then ox2 = 1;
125 else if oxygen = 3 then ox2 = -1;
126
127 /******* Interaction terms *******/
128 /**** temp by bact ****/
129 tb1 = t*b1; tb2 = t*b2;
130 /**** temp by oxygen ****/
131 tox1 = t*ox1; tox2 = t*ox2;
132 /**** bact by oxygen ****/
133 b1ox1 = b1*ox1; b1ox2 = b1*ox2;
134 b2ox1 = b2*ox1; b2ox2 = b2*ox2;
135 /**** temp by bact by oxygen ****/
136 tb1ox1 = t*b1ox1; tb1ox2 = t*b1ox2;
137 tb2ox1 = t*b2ox1; tb2ox2 = t*b2ox2;
138
NOTE: There were 54 observations read from the data set WORK.FRIES.
NOTE: The data set WORK.MASHED has 54 observations and 22 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
139 proc reg data=mashed plots=none;
140 title2 'Three-way ANOVA with proc reg and effect coding';
141 model rot = b1 -- tb2ox2;
142 /* Test main effects and interactions in the same order
143 as proc glm */
144 Temperature: test t=0;
145 Bacteria: test b1=b2=0;
146 Bact_by_Temp: test tb1=tb2=0;
147 Oxygen: test ox1=ox2=0;
148 Temp_by_Oxygen: test tox1=tox2=0;
149 Bact_by_Oxygen: test b1ox1=b1ox2=b2ox1=b2ox2=0;
150 Bact_by_Temp_by_Oxygen: test tb1ox1=tb1ox2=tb2ox1=tb2ox2=0;
151 Conditional_Indep: test ox1=ox2 = tox1=tox2 =
152 b1ox1=b1ox2=b2ox1=b2ox2 =
153 tb1ox1=tb1ox2=tb2ox1=tb2ox2 = 0;
154 run;
155
156 /* Drop oxygen. Implicitly we are accepting H0. */
157
NOTE: PROCEDURE REG used (Total process time):
real time 0.31 seconds
cpu time 0.30 seconds
158 proc tabulate data=mashed;
159 title2 'Two-way table of means from proc tabulate';
160 class bact temp;
161 var rot;
162 table (temp all), (bact all) * (mean*rot);
163 run;
NOTE: There were 54 observations read from the data set WORK.MASHED.
NOTE: PROCEDURE TABULATE used (Total process time):
real time 0.07 seconds
cpu time 0.06 seconds
164
165 /* Suppressing the lsmeans plots */
166 ods exclude MeanPlot (persist) DiffPlot (persist);
167 proc glm data=mashed;
168 title2 'Two-way ANOVA, ignoring oxygen level';
169 class bact temp;
170 model rot=temp|bact;
171 lsmeans bact / pdiff tdiff adjust = tukey;
172 lsmeans bact / pdiff tdiff adjust = bon;
173 /* ODS output table name = My data table name (Backwards)*/
174 ods output OverallANOVA = Overall
175 ModelANOVA = TwoWay;
176 run;
NOTE: The data set WORK.TWOWAY has 6 observations and 8 variables.
NOTE: The data set WORK.OVERALL has 3 observations and 7 variables.
177
NOTE: PROCEDURE GLM used (Total process time):
real time 1.18 seconds
cpu time 0.42 seconds
178 proc print data=Overall;
179 title2 'Look at the ODS output tables';
NOTE: There were 3 observations read from the data set WORK.OVERALL.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.04 seconds
cpu time 0.04 seconds
180 proc print data=TwoWay;
181
NOTE: There were 6 observations read from the data set WORK.TWOWAY.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.05 seconds
cpu time 0.05 seconds
182 proc iml;
NOTE: IML Ready
183 title2 'Proportions of remaining variation';
184 use overall;
184 ! read all into anova1;
184 ! /* "all" means all cases */
185 use TwoWay;
185 ! read all into anova2;
186 print "Take a look at the matrices";
187 print anova1;
187 ! print anova2;
188 print "Proportion of remaining variation";
189 NminusP = anova1[2,1];
190 s1 = anova2[4,2];
190 ! F1 = anova2[4,5];
190 ! /* Main effect for Temperature */
191 s2 = anova2[5,2];
191 ! F2 = anova2[5,5];
191 ! /* Main effect for Bacteria */
192 s3 = anova2[6,2];
192 ! F3 = anova2[6,5];
192 ! /* Interaction */
193 Temperature = s1*F1/(s1*F1 + NminusP);
194 Bacteria = s2*F2/(s2*F2 + NminusP);
195 Temp_by_Bact = s3*F3/(s3*F3 + NminusP);
196 print Temperature Bacteria Temp_by_Bact;
197
198
199 /* Regression with cell means coding */
200
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.07 seconds
cpu time 0.07 seconds
201 data baked;
202 set mashed;
203 /* Cell means coding for all 6 treatment combinations
204 of temperature and bacteria type -- use with proc reg*/
205 if temp=1 and bact=1 then mu11=1; else mu11=0;
206 if temp=1 and bact=2 then mu12=1; else mu12=0;
207 if temp=1 and bact=3 then mu13=1; else mu13=0;
208 if temp=2 and bact=1 then mu21=1; else mu21=0;
209 if temp=2 and bact=2 then mu22=1; else mu22=0;
210 if temp=2 and bact=3 then mu23=1; else mu23=0;
211 /* Combination variable for temperature and bacteria
212 type -- use for contrasts with proc glm */
213 combo = 10*temp+bact; /* 11 12 13 21 22 23 */
214
215 /*
216 BACTERIA TYPE
217 TEMP 1 2 3
218 Cool mu11 mu12 mu13
219 Warm mu21 mu22 mu23
220
221 The test statement of proc reg uses variable names to stand for the
222 corresponding regression coefficients. By naming the effect cell mean
223 coding dummy variables the same as the population cell means, I can
224 just state the null hypothesis. Isn't this a cute SAS trick? */
225
NOTE: There were 54 observations read from the data set WORK.MASHED.
NOTE: The data set WORK.BAKED has 54 observations and 29 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.02 seconds
226 proc reg data = baked plots = none;
227 title2 'Using the proc reg test statement and cell means coding';
228 model rot = mu11--mu23 / noint;
229 Overall: test mu11=mu12=mu13=mu21=mu22=mu23;
230 Temperature: test mu11+mu12+mu13 = mu21+mu22+mu23;
231 Bacteria: test mu11+mu21 = mu12+mu22 = mu13+mu23;
232 Bact_by_Temp1: test mu11-mu21 = mu12-mu22 = mu13-mu23;
233 Bact_by_Temp2: test mu12-mu11 = mu22-mu21,
234 mu13-mu12 = mu23-mu22;
235 /* Bact_by_Temp1 checks equality of temperature effects.
236 Bact_by_Temp2 checks parallel line segments. They are equivalent. */
237 run;
238
NOTE: PROCEDURE REG used (Total process time):
real time 0.21 seconds
cpu time 0.20 seconds
239 proc glm data = baked;
240 title2 'Proc glm: Using contrasts on the combination variable';
241 class combo; /* 11 12 13 21 22 23 */
242 model rot=combo;
243 contrast 'Main Effect for Temperature'
244 combo 1 1 1 -1 -1 -1;
245 contrast 'Main Effect for Bacteria'
246 combo 1 -1 0 1 -1 0,
247 combo 0 1 -1 0 1 -1;
248 contrast 'Temperature by Bacteria Interaction'
249 combo 1 -1 0 -1 1 0,
250 combo 0 1 -1 0 -1 1;
251 run;
252
253
254 /* Follow up the interaction and do some more exploration. The regression
255 with cell means coding is easiest. The final product of several runs
256 is shown below. For reference, here is the table of population means again.
257
258 BACTERIA TYPE
259 TEMP 1 2 3
260 Cool mu11 mu12 mu13
261 Warm mu21 mu22 mu23 */
262
NOTE: PROCEDURE GLM used (Total process time):
real time 1.13 seconds
cpu time 0.29 seconds
263 proc reg plots = none;
264 title3 'Further exploration using cell means coding';
265 model rot = mu11--mu23 / noint;
266 /* Pairwise comparisons of marginal means for Bacteria Type */
267 Bact1vs2: test mu11+mu21=mu12+mu22;
268 Bact1vs3: test mu11+mu21=mu13+mu23;
269 Bact2vs3: test mu12+mu22=mu13+mu23;
270 /* Effect of temperature for each bacteria type */
271 Temp_for_Bac1: test mu11=mu21;
272 Temp_for_Bac2: test mu12=mu22;
273 Temp_for_Bac3: test mu13=mu23;
274 /* Effect of bacteria type for each temperature */
275 Bact_for_CoolTemp: test mu11=mu12=mu13;
276 Bact_for_WarmTemp: test mu21=mu22=mu23;
277 /* Pairwise comparisons of bacteria types at warm temperature */
278 Bact1vs2_for_WarmTemp: test mu21=mu22;
279 Bact1vs3_for_WarmTemp: test mu21=mu23;
280 Bact2vs3_for_WarmTemp: test mu22=mu23;
281 /* Follow up the interaction with pairwise
282 comparisons of temperature effects. */
283 TempEffectBact1_vs_2: test mu11-mu21 = mu12-mu22;
284 TempEffectBact1_vs_3: test mu11-mu21 = mu13-mu23;
285 TempEffectBact2_vs_3: test mu12-mu22 = mu13-mu23;
286
287 /* We have done a lot of tests. Concerned about buildup of Type I
288 error? We can make ALL the tests into Scheffe follow-ups of the
289 initial test for equality of the 6 cell means. The Scheffe family
290 includes all COLLECTIONS of contrasts, not just all contrasts. */
291
NOTE: PROCEDURE REG used (Total process time):
real time 0.44 seconds
cpu time 0.39 seconds
292 proc iml;
NOTE: IML Ready
293 title3 'Table of critical values for all possible Scheffe tests';
294 numdf = 5;
294 ! /* Numerator degrees of freedom for initial test */
295 dendf =48;
295 ! /* Denominator degrees of freedom for initial test */
296 alpha = 0.05;
297 critval = finv(1-alpha,numdf,dendf);
298 zero = {0 0};
298 ! S_table = repeat(zero,numdf,1);
298 ! /* Make empty matrix */
299 /* Label the columns */
300 namz = {"Number of Contrasts in followup test"
301 " Scheffe Critical Value"};
301 ! mattrib S_table colname=namz;
302 do i = 1 to numdf;
303 s_table(|i,1|) = i;
304 s_table(|i,2|) = numdf/i * critval;
305 end;
306 reset noname;
306 ! /* Makes output look nicer in this case */
307 print "Initial test has" numdf " and " dendf "degrees of freedom."
308 "Using significance level alpha = " alpha;
309 print s_table;
310
311
312 /*****************************************************************/
313
314
315
316
317
318
319
320 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
333