/* potato.sas */ title 'Rotten Potatoes: Factorial ANOVA several different ways'; proc format; value tfmt 1 = '10 Degrees' 2 = '16 Degrees'; value ofmt 1 = '2%' 2 = '6%' 3 = '10%'; data fries; infile '/home/u1407221/441s24/data/potato.data.txt' firstobs=2; /* Skip the header that R uses. */ input id bact temp oxygen rot; label bact = 'Bacteria Type' temp = 'Storage Temperature' oxygen = 'Oxygen level'; format temp tfmt.; format oxygen ofmt.; proc freq data=fries; tables oxygen*temp*bact / norow nocol nopercent; proc glm data=fries plots=none; title2 'Three-way ANOVA with proc glm'; class bact temp oxygen; model rot=temp|bact|oxygen; /* All main effects and interactions */ means temp|bact; /* Based on the tests */ /* Want to test everything involving oxygen, all at once. Null hypothesis is that oxygen level has no effect within ANY combination of temperature and bacteria type. This is sometimes called "conditional independence." Do it with effect coding and proc reg. */ data mashed; set fries; /* Effect coding, with interactions */ if bact = 1 then b1 = 1; else if bact = 2 then b1 = 0; else if bact = 3 then b1 = -1; if bact = 1 then b2 = 0; else if bact = 2 then b2 = 1; else if bact = 3 then b2 = -1; if temp = 1 then t = 1; else if temp = 2 then t = -1; if oxygen = 1 then ox1 = 1; else if oxygen = 2 then ox1 = 0; else if oxygen = 3 then ox1 = -1; if oxygen = 1 then ox2 = 0; else if oxygen = 2 then ox2 = 1; else if oxygen = 3 then ox2 = -1; /******* Interaction terms *******/ /**** temp by bact ****/ tb1 = t*b1; tb2 = t*b2; /**** temp by oxygen ****/ tox1 = t*ox1; tox2 = t*ox2; /**** bact by oxygen ****/ b1ox1 = b1*ox1; b1ox2 = b1*ox2; b2ox1 = b2*ox1; b2ox2 = b2*ox2; /**** temp by bact by oxygen ****/ tb1ox1 = t*b1ox1; tb1ox2 = t*b1ox2; tb2ox1 = t*b2ox1; tb2ox2 = t*b2ox2; proc reg data=mashed plots=none; title2 'Three-way ANOVA with proc reg and effect coding'; model rot = b1 -- tb2ox2; /* Test main effects and interactions in the same order as proc glm */ Temperature: test t=0; Bacteria: test b1=b2=0; Bact_by_Temp: test tb1=tb2=0; Oxygen: test ox1=ox2=0; Temp_by_Oxygen: test tox1=tox2=0; Bact_by_Oxygen: test b1ox1=b1ox2=b2ox1=b2ox2=0; Bact_by_Temp_by_Oxygen: test tb1ox1=tb1ox2=tb2ox1=tb2ox2=0; Conditional_Indep: test ox1=ox2 = tox1=tox2 = b1ox1=b1ox2=b2ox1=b2ox2 = tb1ox1=tb1ox2=tb2ox1=tb2ox2 = 0; /* Drop oxygen. Implicitly we are accepting H0. */ proc tabulate data=mashed; title2 'Two-way table of means from proc tabulate'; class bact temp; var rot; table (temp all), (bact all) * (mean*rot); /* Two-way with proc glm, suppressing the lsmeans plots */ ods exclude MeanPlot (persist) DiffPlot (persist); proc glm data=mashed; title2 'Two-way ANOVA, ignoring oxygen level'; class bact temp; model rot=temp|bact; lsmeans bact / pdiff tdiff adjust = tukey; lsmeans bact / pdiff tdiff adjust = bon; /* ODS output table name = My data table name (Backwards)*/ ods output OverallANOVA = Overall ModelANOVA = TwoWay; run; proc print data=Overall; title2 'Look at the ODS output tables'; proc print data=TwoWay; proc iml; title2 'Proportions of remaining variation'; use overall; read all into anova1; /* "all" means all cases */ use TwoWay; read all into anova2; print "Take a look at the matrices"; print anova1; print anova2; print "Proportion of remaining variation"; NminusP = anova1[2,1]; s1 = anova2[4,2]; F1 = anova2[4,5]; /* Main effect for Temperature */ s2 = anova2[5,2]; F2 = anova2[5,5]; /* Main effect for Bacteria */ s3 = anova2[6,2]; F3 = anova2[6,5]; /* Interaction */ Temperature = s1*F1/(s1*F1 + NminusP); Bacteria = s2*F2/(s2*F2 + NminusP); Temp_by_Bact = s3*F3/(s3*F3 + NminusP); print Temperature Bacteria Temp_by_Bact; /* Regression with cell means coding */ data baked; set mashed; /* Cell means coding for all 6 treatment combinations of temperature and bacteria type -- use with proc reg*/ if temp=1 and bact=1 then mu11=1; else mu11=0; if temp=1 and bact=2 then mu12=1; else mu12=0; if temp=1 and bact=3 then mu13=1; else mu13=0; if temp=2 and bact=1 then mu21=1; else mu21=0; if temp=2 and bact=2 then mu22=1; else mu22=0; if temp=2 and bact=3 then mu23=1; else mu23=0; /* Combination variable for temperature and bacteria type -- use for contrasts with proc glm */ combo = 10*temp+bact; /* 11 12 13 21 22 23 */ /* BACTERIA TYPE TEMP 1 2 3 Cool mu11 mu12 mu13 Warm mu21 mu22 mu23 The test statement of proc reg uses variable names to stand for the corresponding regression coefficients. By naming the effect cell mean coding dummy variables the same as the population cell means, I can just state the null hypothesis. Isn't this a cute SAS trick? */ proc reg data = baked plots = none; title2 'Using the proc reg test statement and cell means coding'; model rot = mu11--mu23 / noint; Overall: test mu11=mu12=mu13=mu21=mu22=mu23; Temperature: test mu11+mu12+mu13 = mu21+mu22+mu23; Bacteria: test mu11+mu21 = mu12+mu22 = mu13+mu23; Bact_by_Temp1: test mu11-mu21 = mu12-mu22 = mu13-mu23; Bact_by_Temp2: test mu12-mu11 = mu22-mu21, mu13-mu12 = mu23-mu22; /* Bact_by_Temp1 checks equality of temperature effects. Bact_by_Temp2 checks parallel line segments. They are equivalent. */ proc glm data = baked; title2 'Proc glm: Using contrasts on the combination variable'; class combo; /* 11 12 13 21 22 23 */ model rot=combo; contrast 'Main Effect for Temperature' combo 1 1 1 -1 -1 -1; contrast 'Main Effect for Bacteria' combo 1 -1 0 1 -1 0, combo 0 1 -1 0 1 -1; contrast 'Temperature by Bacteria Interaction' combo 1 -1 0 -1 1 0, combo 0 1 -1 0 -1 1; /* Follow up the interaction and do some more exploration. The regression with cell means coding is easiest. The final product of several runs is shown below. For reference, here is the table of population means again. BACTERIA TYPE TEMP 1 2 3 Cool mu11 mu12 mu13 Warm mu21 mu22 mu23 */ proc reg plots = none data=baked; title3 'Further exploration using cell means coding'; model rot = mu11--mu23 / noint; /* Pairwise comparisons of marginal means for Bacteria Type */ Bact1vs2: test mu11+mu21=mu12+mu22; Bact1vs3: test mu11+mu21=mu13+mu23; Bact2vs3: test mu12+mu22=mu13+mu23; /* Effect of temperature for each bacteria type */ Temp_for_Bac1: test mu11=mu21; Temp_for_Bac2: test mu12=mu22; Temp_for_Bac3: test mu13=mu23; /* Effect of bacteria type for each temperature */ Bact_for_CoolTemp: test mu11=mu12=mu13; Bact_for_WarmTemp: test mu21=mu22=mu23; /* Pairwise comparisons of bacteria types at warm temperature */ Bact1vs2_for_WarmTemp: test mu21=mu22; Bact1vs3_for_WarmTemp: test mu21=mu23; Bact2vs3_for_WarmTemp: test mu22=mu23; /* Follow up the interaction with pairwise comparisons of temperature effects. */ TempEffectBact1_vs_2: test mu11-mu21 = mu12-mu22; TempEffectBact1_vs_3: test mu11-mu21 = mu13-mu23; TempEffectBact2_vs_3: test mu12-mu22 = mu13-mu23; run; /* We have done a lot of tests. Concerned about buildup of Type I error? We can make ALL the tests into Scheffe follow-ups of the initial test for equality of the 6 cell means. The Scheffe family includes all COLLECTIONS of contrasts, not just all contrasts. */ proc iml; title3 'Table of critical values for all possible Scheffe tests'; numdf = 5; /* Numerator degrees of freedom for initial test */ dendf =48; /* Denominator degrees of freedom for initial test */ alpha = 0.05; critval = finv(1-alpha,numdf,dendf); zero = {0 0}; S_table = repeat(zero,numdf,1); /* Make empty matrix */ /* Syntax is repeat(x,nrow,ncol) to get a matrix of repeated x matrices. */ /* Label the columns */ namz = {"Number of Contrasts in followup test" " Scheffe Critical Value"}; mattrib S_table colname=namz; do i = 1 to numdf; s_table[i,1] = i; s_table[i,2] = numdf/i * critval; end; print "Initial test has" numdf " and " dendf "degrees of freedom." "Using significance level alpha = " alpha; print s_table; quit; /*****************************************************************/