STA 402S 1998: SAS Lesson 4

Castle Bakery Example from Chapter 19

The file castle.dat looks like this.

1   1   47
1   1   43
2   1   62
2   1   68
3   1   41
3   1   39
1   2   46
1   2   40
2   2   67
2   2   71
3   2   42
3   2   46

Here is the command file.


/*************  castle1.sas *********************
 Two-way ANOVA SAS example with dummy var coding:
         See Section 19.4; data on P. 818
***************************************************/

options linesize = 79;

proc format;  /* value labels used in data step below */
     value htfmt 1 = 'Bottom'  2 = 'Middle' 3 = 'Top';
     value widfmt 1 = 'Regular' 2 = 'Wide';

data bake;
     infile 'castle.dat';
     input height width sales;
     label height = 'Display Height'
           width  = 'Display Width'
           sales  = 'Sales in cases';
     format height htfmt.;
     format width  widfmt.; /* Asssociate variables with print formats */
     /* Effect dummy variable coding */
     if width = 1 then w1 = 1; else w1 = -1;
     if height = 1 then h1 = 1;
                  else if height = 3 then h1 = -1;
                  else h1 = 0;
     if height = 2 then h2 = 1;
                  else if height = 3 then h2 = -1;
                  else h2 = 0;
     h1w1 = h1*w1 ; h2w1 = h2*w1; /* Interactions */
     /* Cell means coding: indicators are named after their parameters. */
     if height = 1 and width = 1 then mu11 = 1 ; else mu11 = 0;
     if height = 1 and width = 2 then mu12 = 1 ; else mu12 = 0;
     if height = 2 and width = 1 then mu21 = 1 ; else mu21 = 0;
     if height = 2 and width = 2 then mu22 = 1 ; else mu22 = 0;
     if height = 3 and width = 1 then mu31 = 1 ; else mu31 = 0;
     if height = 3 and width = 2 then mu32 = 1 ; else mu32 = 0;
     /* Make combination variable */
     hwcombo = 10*height + width;

proc freq;
     tables height * width / nopercent norow nocol;
     tables hwcombo * height width / nopercent norow nocol;

proc glm; /* glm does the dummy vars for us */
     class height width;
     model sales = height width height*width;
     means height width height*width;
     means height / alpha=.05 tukey bon scheffe cldiff;
     /* Why didn't I bother to do it for shelf width? */

proc reg; /* Now do it with dummy var regression using effect coding */
     model sales = h1 h2 w1 h1w1 h2w1;
     height:   test h1=h2=0;
     width:    test w1 = 0;
     h_by_w:   test h1w1 = h2w1 = 0;

proc reg; /* Now with cell means coding */
     model sales = mu11 -- mu32 / noint ;
     height:     test mu11+mu12=mu21+mu22=mu31+mu32;
     width:      test mu11+mu21+mu31=mu12+mu22+mu32;
     h_by_w:     test mu11-mu12 = mu21-mu22 = mu31-mu32;

proc glm; /* Combination variables are good for comparing cell means */
     class hwcombo;
     model sales=hwcombo;
     means hwcombo / tukey;  /* Could have used cldiff option */
 

Part of the lst file:


                         TABLE OF HEIGHT BY WIDTH

                      HEIGHT(Display Height)
                                WIDTH(Display Width)
                      Frequency|Regular |Wide    |  Total
                      ---------+--------+--------+
                      Bottom   |      2 |      2 |      4
                      ---------+--------+--------+
                      Middle   |      2 |      2 |      4
                      ---------+--------+--------+
                      Top      |      2 |      2 |      4
                      ---------+--------+--------+
                      Total           6        6       12

                          TABLE OF HWCOMBO BY HEIGHT

                 HWCOMBO     HEIGHT(Display Height)

                 Frequency|Bottom  |Middle  |Top     |  Total
                 ---------+--------+--------+--------+
                       11 |      2 |      0 |      0 |      2
                 ---------+--------+--------+--------+
                       12 |      2 |      0 |      0 |      2
                 ---------+--------+--------+--------+
                       21 |      0 |      2 |      0 |      2
                 ---------+--------+--------+--------+
                       22 |      0 |      2 |      0 |      2
                 ---------+--------+--------+--------+
                       31 |      0 |      0 |      2 |      2
                 ---------+--------+--------+--------+
                       32 |      0 |      0 |      2 |      2
                 ---------+--------+--------+--------+
                 Total           4        4        4       12


                        General Linear Models Procedure
                            Class Level Information

                     Class    Levels    Values

                     HEIGHT        3    Bottom Middle Top

                     WIDTH         2    Regular Wide

                    Number of observations in data set = 12


                        General Linear Models Procedure

Dependent Variable: SALES   Sales in cases
                                     Sum of            Mean
Source                  DF          Squares          Square  F Value    Pr > F

Model                    5     1580.0000000     316.0000000    30.58    0.0003
Error                    6       62.0000000      10.3333333
Corrected Total         11     1642.0000000

                  R-Square             C.V.        Root MSE         SALES Mean

                  0.962241         6.303040       3.2145503          51.000000

Source                  DF        Type I SS     Mean Square  F Value    Pr > F

HEIGHT                   2     1544.0000000     772.0000000    74.71    0.0001
WIDTH                    1       12.0000000      12.0000000     1.16    0.3226
HEIGHT*WIDTH             2       24.0000000      12.0000000     1.16    0.3747

Source                  DF      Type III SS     Mean Square  F Value    Pr > F

HEIGHT                   2     1544.0000000     772.0000000    74.71    0.0001
WIDTH                    1       12.0000000      12.0000000     1.16    0.3226
HEIGHT*WIDTH             2       24.0000000      12.0000000     1.16    0.3747


                        General Linear Models Procedure

                 Level of       ------------SALES------------
                 HEIGHT     N       Mean              SD

                 Bottom     4     44.0000000       3.16227766
                 Middle     4     67.0000000       3.74165739
                 Top        4     42.0000000       2.94392029

                 Level of       ------------SALES------------
                 WIDTH      N       Mean              SD

                 Regular    6     50.0000000       12.0664825
                 Wide       6     52.0000000       13.4313067

            Level of   Level of       ------------SALES------------
            HEIGHT     WIDTH      N       Mean              SD

            Bottom     Regular    2     45.0000000       2.82842712
            Bottom     Wide       2     43.0000000       4.24264069
            Middle     Regular    2     65.0000000       4.24264069
            Middle     Wide       2     69.0000000       2.82842712
            Top        Regular    2     40.0000000       1.41421356
            Top        Wide       2     44.0000000       2.82842712
            

                         General Linear Models Procedure

           Tukey's Studentized Range (HSD) Test for variable: SALES

          NOTE: This test controls the type I experimentwise error rate.

              Alpha= 0.05  Confidence= 0.95  df= 6  MSE= 10.33333
                  Critical Value of Studentized Range= 4.339
                    Minimum Significant Difference= 6.9743

       Comparisons significant at the 0.05 level are indicated by '***'.

                             Simultaneous            Simultaneous
                                 Lower    Difference     Upper
               HEIGHT         Confidence    Between   Confidence
             Comparison          Limit       Means       Limit

          Middle - Bottom       16.026      23.000      29.974   ***
          Middle - Top          18.026      25.000      31.974   ***

          Bottom - Middle      -29.974     -23.000     -16.026   ***
          Bottom - Top          -4.974       2.000       8.974

          Top    - Middle      -31.974     -25.000     -18.026   ***
          Top    - Bottom       -8.974      -2.000       4.974


Compare Tukey confidence intervals on P. 858.

Now regression: First effect coding


Model: MODEL1
Dependent Variable: SALES      Sales in cases

                             Analysis of Variance

                                Sum of         Mean
       Source          DF      Squares       Square      F Value       Prob>F

       Model            5   1580.00000    316.00000       30.581       0.0003
       Error            6     62.00000     10.33333
       C Total         11   1642.00000

           Root MSE       3.21455     R-square       0.9622
           Dep Mean      51.00000     Adj R-sq       0.9308
           C.V.           6.30304

                              Parameter Estimates

                      Parameter      Standard    T for H0:
     Variable  DF      Estimate         Error   Parameter=0    Prob > |T|

     INTERCEP   1     51.000000    0.92796073        54.959        0.0001
     H1         1     -7.000000    1.31233465        -5.334        0.0018
     H2         1     16.000000    1.31233465        12.192        0.0001
     W1         1     -1.000000    0.92796073        -1.078        0.3226
     H1W1       1      2.000000    1.31233465         1.524        0.1783
     H2W1       1     -1.000000    1.31233465        -0.762        0.4749

Dependent Variable: SALES
Test: HEIGHT   Numerator:    772.0000  DF:    2   F value:  74.7097
               Denominator:  10.33333  DF:    6   Prob>F:    0.0001

Dependent Variable: SALES
Test: WIDTH    Numerator:     12.0000  DF:    1   F value:   1.1613
               Denominator:  10.33333  DF:    6   Prob>F:    0.3226

Dependent Variable: SALES
Test: H_BY_W   Numerator:     12.0000  DF:    2   F value:   1.1613
               Denominator:  10.33333  DF:    6   Prob>F:    0.3747


Compare, from GLM (earlier)

Source                  DF        Type I SS     Mean Square  F Value    Pr > F

HEIGHT                   2     1544.0000000     772.0000000    74.71    0.0001
WIDTH                    1       12.0000000      12.0000000     1.16    0.3226
HEIGHT*WIDTH             2       24.0000000      12.0000000     1.16    0.3747

And now cell means coding

Model: MODEL1
NOTE: No intercept in model. R-square is redefined.
Dependent Variable: SALES      Sales in cases

                             Analysis of Variance

                                Sum of         Mean
       Source          DF      Squares       Square      F Value       Prob>F

       Model            6  32792.00000   5465.33333      528.903       0.0001
       Error            6     62.00000     10.33333
       U Total         12  32854.00000

           Root MSE       3.21455     R-square       0.9981
           Dep Mean      51.00000     Adj R-sq       0.9962
           C.V.           6.30304

                              Parameter Estimates

                      Parameter      Standard    T for H0:
     Variable  DF      Estimate         Error   Parameter=0    Prob > |T|

     MU11       1     45.000000    2.27303028        19.797        0.0001
     MU12       1     43.000000    2.27303028        18.917        0.0001
     MU21       1     65.000000    2.27303028        28.596        0.0001
     MU22       1     69.000000    2.27303028        30.356        0.0001
     MU31       1     40.000000    2.27303028        17.598        0.0001
     MU32       1     44.000000    2.27303028        19.357        0.0001

Note the overall ANOVA is different: The null hypothesis here is that all means equal zero, not just that they are equal. Also, comparing parameter estimates to sample cell means from earlier, ...

         Level of   Level of       ------------SALES------------
            HEIGHT     WIDTH      N       Mean              SD

            Bottom     Regular    2     45.0000000       2.82842712
            Bottom     Wide       2     43.0000000       4.24264069
            Middle     Regular    2     65.0000000       4.24264069
            Middle     Wide       2     69.0000000       2.82842712
            Top        Regular    2     40.0000000       1.41421356
            Top        Wide       2     44.0000000       2.82842712

But the tests for main effects and interactions are the same.


Dependent Variable: SALES
Test: HEIGHT   Numerator:    772.0000  DF:    2   F value:  74.7097
               Denominator:  10.33333  DF:    6   Prob>F:    0.0001

Dependent Variable: SALES
Test: WIDTH    Numerator:     12.0000  DF:    1   F value:   1.1613
               Denominator:  10.33333  DF:    6   Prob>F:    0.3226

Dependent Variable: SALES
Test: H_BY_W   Numerator:     12.0000  DF:    2   F value:   1.1613
               Denominator:  10.33333  DF:    6   Prob>F:    0.3747

Now the combination variable. Really, this is most useful when the interaction is significant.

                        General Linear Models Procedure
                            Class Level Information

                     Class    Levels    Values

                     HWCOMBO       6    11 12 21 22 31 32

                    Number of observations in data set = 12

Dependent Variable: SALES   Sales in cases
                                     Sum of            Mean
Source                  DF          Squares          Square  F Value    Pr > F
Model                    5     1580.0000000     316.0000000    30.58    0.0003
Error                    6       62.0000000      10.3333333
Corrected Total         11     1642.0000000

                  R-Square             C.V.        Root MSE         SALES Mean

                  0.962241         6.303040       3.2145503          51.000000

Source                  DF        Type I SS     Mean Square  F Value    Pr > F
HWCOMBO                  5     1580.0000000     316.0000000    30.58    0.0003

Source                  DF      Type III SS     Mean Square  F Value    Pr > F
HWCOMBO                  5     1580.0000000     316.0000000    30.58    0.0003

                        General Linear Models Procedure

           Tukey's Studentized Range (HSD) Test for variable: SALES

          NOTE: This test controls the type I experimentwise error rate, but
                generally has a higher type II error rate than REGWQ.

                       Alpha= 0.05  df= 6  MSE= 10.33333
                  Critical Value of Studentized Range= 5.628
                    Minimum Significant Difference= 12.793

          Means with the same letter are not significantly different.

               Tukey Grouping              Mean      N  HWCOMBO

                            A            69.000      2  22
                            A
                            A            65.000      2  21

                            B            45.000      2  11
                            B
                            B            44.000      2  32
                            B
                            B            43.000      2  12
                            B
                            B            40.000      2  31