1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
NOTE: ODS statements in the SAS Studio environment may disable some output features.
56
57 /* potato.sas */
58 title 'Rotten Potatoes: Factorial ANOVA several different ways';
59 title2 'First, illustrate a 2-factor ANOVA';
60
61 proc format;
62 value tfmt 1 = '10 Degrees' 2 = '16 Degrees';
NOTE: Format TFMT is already on the library WORK.FORMATS.
NOTE: Format TFMT has been output.
63 value ofmt 1 = '2 percent' 2 = '6 percent' 3 = '10%';
NOTE: Format OFMT is already on the library WORK.FORMATS.
NOTE: Format OFMT has been output.
64
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
65 data spud;
66 infile '/folders/myfolders/potato.data.txt' firstobs=2;
67 /* Skip the header */
68 input id bact temp oxygen rot;
69 label bact = 'Bacteria Type'
70 temp = 'Storage Temperature'
71 oxygen = 'Oxygen level';
72 /* Cell means coding for all 6 treatment combinations
73 of temperature and bacteria type */
74 if temp=1 and bact=1 then mu11=1; else mu11=0;
75 if temp=1 and bact=2 then mu12=1; else mu12=0;
76 if temp=1 and bact=3 then mu13=1; else mu13=0;
77 if temp=2 and bact=1 then mu21=1; else mu21=0;
78 if temp=2 and bact=2 then mu22=1; else mu22=0;
79 if temp=2 and bact=3 then mu23=1; else mu23=0;
80 combo = 10*temp+bact; /* 11 12 13 21 22 23 */
81 format temp tfmt.; format oxygen ofmt.;
82 run;
NOTE: The infile '/folders/myfolders/potato.data.txt' is:
Filename=/folders/myfolders/potato.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=16Feb2016:10:28:39,
File Size (bytes)=2037
NOTE: 54 records were read from the infile '/folders/myfolders/potato.data.txt'.
The minimum record length was 35.
The maximum record length was 35.
NOTE: The data set WORK.SPUD has 54 observations and 12 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
83
84 proc freq;
85 tables temp*bact*oxygen / norow nocol nopercent;
86
NOTE: There were 54 observations read from the data set WORK.SPUD.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.08 seconds
cpu time 0.08 seconds
87 proc means;
88 class bact temp;
89 var rot;
90
91 /* Better looking output from proc tabulate */
92
NOTE: There were 54 observations read from the data set WORK.SPUD.
NOTE: PROCEDURE MEANS used (Total process time):
real time 0.03 seconds
cpu time 0.03 seconds
93 proc tabulate;
94 class bact temp;
95 var rot;
96 table (temp all),(bact all) * (mean*rot);
97
98 ods trace on; /* Writes names of output tables on the log file. */
Output Added:
-------------
Name: Table
Label: Table 1
Data Name: Report
Path: Tabulate.Report.Table
-------------
NOTE: There were 54 observations read from the data set WORK.SPUD.
NOTE: PROCEDURE TABULATE used (Total process time):
real time 0.03 seconds
cpu time 0.04 seconds
99 proc glm;
100 title3 'Standard 2-way ANOVA with proc glm';
101 class bact temp;
102 model rot=temp|bact; /* Could have said bact temp bact*temp */
103 means temp|bact;
104 run;
Output Added:
-------------
Name: ClassLevels
Label: Class Levels
Template: STAT.GLM.ClassLevels
Path: GLM.Data.ClassLevels
-------------
Output Added:
-------------
Name: NObs
Label: Number of Observations
Template: STAT.GLM.NObsNotitle
Path: GLM.Data.NObs
-------------
Output Added:
-------------
Name: OverallANOVA
Label: Overall ANOVA
Template: stat.GLM.OverallANOVA
Path: GLM.ANOVA.rot.OverallANOVA
-------------
Output Added:
-------------
Name: FitStatistics
Label: Fit Statistics
Template: stat.GLM.FitStatistics
Path: GLM.ANOVA.rot.FitStatistics
-------------
Output Added:
-------------
Name: ModelANOVA
Label: Type I Model ANOVA
Template: stat.GLM.Tests
Path: GLM.ANOVA.rot.ModelANOVA
-------------
Output Added:
-------------
Name: ModelANOVA
Label: Type III Model ANOVA
Template: stat.GLM.Tests
Path: GLM.ANOVA.rot.ModelANOVA
-------------
Output Added:
-------------
Name: IntPlot
Label: Interaction Plot
Template: Stat.GLM.Graphics.IntPlot
Path: GLM.ANOVA.rot.IntPlot
-------------
NOTE: Means from the MEANS statement are not adjusted for other terms in the model. For adjusted means, use the LSMEANS statement.
Output Added:
-------------
Name: BoxPlot
Label: Distribution of rot by temp
Template: Stat.GLM.Graphics.MeansBoxPlot
Path: GLM.Means.temp.rot.BoxPlot
-------------
Output Added:
-------------
Name: Means
Label: Means
Template: stat.GLM.Means
Path: GLM.Means.temp.Means
-------------
Output Added:
-------------
Name: BoxPlot
Label: Distribution of rot by bact
Template: Stat.GLM.Graphics.MeansBoxPlot
Path: GLM.Means.bact.rot.BoxPlot
-------------
Output Added:
-------------
Name: Means
Label: Means
Template: stat.GLM.Means
Path: GLM.Means.bact.Means
-------------
Output Added:
-------------
Name: BoxPlot
Label: Distribution of rot by bact*temp
Template: Stat.GLM.Graphics.MeansBoxPlot
Path: GLM.Means.bact_temp.rot.BoxPlot
-------------
Output Added:
-------------
Name: Means
Label: Means
Template: stat.GLM.Means
Path: GLM.Means.bact_temp.Means
-------------
104 ! /* Need run with ODS trace */
105 ods trace off;
106
107 /* Proportion of remaining variation for the default tests
108 a = sF/(sF + n-p) */
109
NOTE: PROCEDURE GLM used (Total process time):
real time 0.93 seconds
cpu time 0.42 seconds
110 proc iml;
NOTE: IML Ready
111 title3 'Proportion of remaining variation';
112 NminusP = 53;
112 ! /* From the overall F test */
113 s1 = 1;
113 ! F1 = 38.61;
113 ! /* Main effect for Temperature */
114 s2 = 2;
114 ! F2 = 14.84;
114 ! /* Main effect for Bacteria */
115 s3 = 2;
115 ! F3 = 3.84;
115 ! /* Interaction */
116 Temperature = s1*F1/(s1*F1 + NminusP);
117 Bacteria = s2*F2/(s2*F2 + NminusP);
118 Temp_by_Bact = s3*F3/(s3*F3 + NminusP);
119 print Temperature Bacteria Temp_by_Bact;
120
121 /* Automate the procedure with ODS. */
122
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.02 seconds
cpu time 0.02 seconds
123 proc glm data = spud;
124 title3 'Standard 2-way ANOVA with proc glm AGAIN';
125 class bact temp;
126 model rot=temp|bact; /* Could have said bact temp bact*temp */
127 means temp|bact;
128 /* ODS output table name = My data table name (Backwards)*/
129 ods output OverallANOVA = Overall
130 ModelANOVA = TwoWay;
131 run;
NOTE: Means from the MEANS statement are not adjusted for other terms in the model. For adjusted means, use the LSMEANS statement.
NOTE: The data set WORK.TWOWAY has 6 observations and 8 variables.
NOTE: The data set WORK.OVERALL has 3 observations and 7 variables.
132
133 /* Warning: The most recently created data dable is now TwoWay, not spud. */
134
NOTE: PROCEDURE GLM used (Total process time):
real time 0.69 seconds
cpu time 0.37 seconds
135 proc print data=TwoWay;
136 title3 'Look at an ODS output table';
137
NOTE: There were 6 observations read from the data set WORK.TWOWAY.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.03 seconds
cpu time 0.04 seconds
138 proc iml;
NOTE: IML Ready
139 title3 'Automated proportion of remaining variation';
140 use overall;
140 ! read all into anova1;
140 ! /* "all" means all cases */
141 use TwoWay;
141 ! read all into anova2;
142 print "Take a look at the matrices";
143 print anova1;
143 ! print anova2;
144 print "Proportion of remaining variation";
145 NminusP = anova1[3,1];
146 s1 = anova2[4,2];
146 ! F1 = anova2[4,5];
146 ! /* Main effect for Temperature */
147 s2 = anova2[5,2];
147 ! F2 = anova2[5,5];
147 ! /* Main effect for Bacteria */
148 s3 = anova2[6,2];
148 ! F3 = anova2[6,5];
148 ! /* Interaction */
149 Temperature = s1*F1/(s1*F1 + NminusP);
150 Bacteria = s2*F2/(s2*F2 + NminusP);
151 Temp_by_Bact = s3*F3/(s3*F3 + NminusP);
152 print Temperature Bacteria Temp_by_Bact;
153
154 /* Now generate the tests for main effects and interaction using cell means
155 coding.
156
157 BACTERIA TYPE
158 TEMP 1 2 3
159 Cool mu11 mu12 mu13
160 Warm mu21 mu22 mu23 */
161
162
163 /* The test statement of proc reg uses variable names to stand for the
164 corresponding regression coefficients. By naming the effect cell mean
165 coding dummy variables the same as the population cell means, I can
166 just state the null hypothesis. Isn't this a cute SAS trick? */
167
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.04 seconds
cpu time 0.04 seconds
168 proc reg data = spud plots = none;
169 title3 'Using the proc reg test statement and cell means coding';
170 model rot = mu11--mu23 / noint;
171 Overall: test mu11=mu12=mu13=mu21=mu22=mu23;
172 Temperature: test mu11+mu12+mu13 = mu21+mu22+mu23;
173 Bacteria: test mu11+mu21 = mu12+mu22 = mu13+mu23;
174 Bact_by_Temp1: test mu11-mu21 = mu12-mu22 = mu13-mu23;
175 Bact_by_Temp2: test mu12-mu11 = mu22-mu21,
176 mu13-mu12 = mu23-mu22;
177
178 /* Bact_by_Temp1 checks equality of temperature effects.
179 Bact_by_Temp2 checks parallel line segments. They are equivalent. */
180
181
NOTE: PROCEDURE REG used (Total process time):
real time 0.12 seconds
cpu time 0.12 seconds
182 proc glm data = spud;
183 title3 'Proc glm: Using contrasts on the combination variable';
184 class combo; /* 11 12 13 21 22 23 */
185 model rot=combo;
186 contrast 'Main Effect for Temperature'
187 combo 1 1 1 -1 -1 -1;
188 contrast 'Main Effect for Bacteria'
189 combo 1 -1 0 1 -1 0,
190 combo 0 1 -1 0 1 -1;
191 contrast 'Temperature by Bacteria Interaction'
192 combo 1 -1 0 -1 1 0,
193 combo 0 1 -1 0 -1 1;
194
195 /* Illustrate effect coding */
196
NOTE: PROCEDURE GLM used (Total process time):
real time 0.33 seconds
cpu time 0.16 seconds
197 data mashed;
198 set spud;
199 /* Effect coding, with interactions */
200 if bact = 1 then b1 = 1;
201 else if bact = 2 then b1 = 0;
202 else if bact = 3 then b1 = -1;
203 if bact = 1 then b2 = 0;
204 else if bact = 2 then b2 = 1;
205 else if bact = 3 then b2 = -1;
206 if temp = 1 then t = 1;
207 else if temp = 2 then t = -1;
208 /* Interaction terms */
209 tb1 = t*b1; tb2 = t*b2;
210
NOTE: There were 54 observations read from the data set WORK.SPUD.
NOTE: The data set WORK.MASHED has 54 observations and 17 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
211 proc reg plots = none;
212 title3 'Effect coding';
213 model rot = b1 b2 t tb1 tb2;
214 Temperature: test t=0;
215 Bacteria: test b1=b2=0;
216 Bact_by_Temp: test tb1=tb2=0;
217
218 /* Do some exploration to follow up the interaction. The regression
219 with cell means coding is easiest. The final product of several runs
220 is shown below. For reference, here is the table of population means again.
221
222 BACTERIA TYPE
223 TEMP 1 2 3
224 Cool mu11 mu12 mu13
225 Warm mu21 mu22 mu23 */
226
NOTE: PROCEDURE REG used (Total process time):
real time 0.13 seconds
cpu time 0.09 seconds
227 proc reg plots = none;
228 title3 'Further exploration using cell means coding';
229 model rot = mu11--mu23 / noint;
230 /* Pairwise comparisons of marginal means for Bacteria Type */
231 Bact1vs2: test mu11+mu21=mu12+mu22;
232 Bact1vs3: test mu11+mu21=mu13+mu23;
233 Bact2vs3: test mu12+mu22=mu13+mu23;
234 /* Effect of temperature for each bacteria type */
235 Temp_for_Bac1: test mu11=mu21;
236 Temp_for_Bac2: test mu12=mu22;
237 Temp_for_Bac3: test mu13=mu23;
238 /* Effect of bacteria type for each temperature */
239 Bact_for_CoolTemp: test mu11=mu12=mu13;
240 Bact_for_WarmTemp: test mu21=mu22=mu23;
241 /* Pairwise comparisons of bacteria types at warm temperature */
242 Bact1vs2_for_WarmTemp: test mu21=mu22;
243 Bact1vs3_for_WarmTemp: test mu21=mu23;
244 Bact2vs3_for_WarmTemp: test mu22=mu23;
245
246 /* We have done a lot of tests. Concerned about buildup of Type I
247 error? We can make ALL the tests into Scheffe follow-ups of the
248 initial test for equality of the 6 cell means. The Scheffe family
249 includes all COLLECTIONS of contrasts, not just all contrasts. */
250
NOTE: PROCEDURE REG used (Total process time):
real time 0.20 seconds
cpu time 0.20 seconds
251 proc iml;
NOTE: IML Ready
252 title3 'Table of critical values for all possible Scheffe tests';
253 numdf = 5;
253 ! /* Numerator degrees of freedom for initial test */
254 dendf =48;
254 ! /* Denominator degrees of freedom for initial test */
255 alpha = 0.05;
256 critval = finv(1-alpha,numdf,dendf);
257 zero = {0 0};
257 ! S_table = repeat(zero,numdf,1);
257 ! /* Make empty matrix */
258 /* Label the columns */
259 namz = {"Number of Contrasts in followup test"
260 " Scheffe Critical Value"};
260 ! mattrib S_table colname=namz;
261 do i = 1 to numdf;
262 s_table(|i,1|) = i;
263 s_table(|i,2|) = numdf/i * critval;
264 end;
265 reset noname;
265 ! /* Makes output look nicer in this case */
266 print "Initial test has" numdf " and " dendf "degrees of freedom."
267 "Using significance level alpha = " alpha;
268 print s_table;
269
270 /* The potato data are balanced; they have equal sample sizes. This yields
271 the same F-tessts for Type I and Type III sums of squares in proc glm.
272 Look at an unbalanced example. */
273
274 %include '/folders/myfolders/441s16/Lecture/readmath2b.sas';
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.04 seconds
cpu time 0.04 seconds
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
383 /* Creates the data set mathex */
384
NOTE: The infile '/folders/myfolders/exploremath.data.txt' is:
Filename=/folders/myfolders/exploremath.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=18Jan2016:17:34:49,
File Size (bytes)=44583
NOTE: 579 records were read from the infile '/folders/myfolders/exploremath.data.txt'.
The minimum record length was 75.
The maximum record length was 75.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
99 at 297:24 99 at 334:13
NOTE: The data set WORK.MATHEX has 579 observations and 34 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
385 proc freq;
386 title2 'An unbalanced example: The math data';
387 tables sex*course2 / nocol nopercent chisq;
388
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.04 seconds
cpu time 0.04 seconds
389 proc glm;
390 title2 'An unbalanced example: The math data';
391 class course2 sex ;
392 model hscalc = sex|course2;
393
394 /* In Type III sums of squares, each effect is tested controlling for
395 all the other effects. This is what we get from the standard
396 regresssion tests.
397
398 In Type I sums of squares, the numerator SS is improvement in
399 explained sum of squares after allowing for all the preceding
400 effects. But in every case the denominator is MSE from the
401 full model. */
402
403 /* Now back to the potato data. t's actually a 3-factor design.
404 First look at the cell means.
405 */
406
NOTE: PROCEDURE GLM used (Total process time):
real time 0.52 seconds
cpu time 0.29 seconds
407 proc tabulate data=spud;
408 title2 'Oxygen by temperature by bactieria type';
409 class bact temp oxygen;
410 var rot;
411 table (oxygen all),(temp all),(bact all) * (mean*rot);
412
NOTE: There were 54 observations read from the data set WORK.SPUD.
NOTE: PROCEDURE TABULATE used (Total process time):
real time 0.07 seconds
cpu time 0.08 seconds
413 proc glm data=spud;
414 title2 'Three-way ANOVA with proc glm';
415 class oxygen bact temp;
416 model rot=temp|bact|oxygen;
417 means temp|bact; /* Based on the tests */
418
419
420
421
422
423
424
425
426
427
428 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
440