STA429/1007 F 2004 Handout 13

Multivariate Regression and Analysis of Variance (Salmon data)


/* salmon1.sas */
title 'Multivariate Analysis of Salmon data';

options linesize=79 noovp formdlim='_';
proc format;      value sexfmt    1 = 'Female'  2 = 'Male' ;
                  value cfmt     1 = 'Alaskan' 2 = 'Canadian';
data fish;
     infile 'salmon.dat';
     input country sex fresh marine;
     growth = fresh+marine;
     combo = 10*sex+country;
     if combo = 11 then FA=1 ; else FA=0;
     if combo = 12 then FC=1 ; else FC=0;
     if combo = 21 then MA=1 ; else MA=0;
     if combo = 22 then MC=1 ; else MC=0;
     label  fresh = 'Freshwater growth'
            marine = 'Marine growth'
            growth = 'Total growth (fresh+marine)';
     format country cfmt.;
     format sex sexfmt.;

proc freq;
     tables country*sex / norow nocol nopercent;

proc glm;
     class country sex;
     model fresh marine = country|sex;
     manova h = _all_;

proc reg;
     model fresh marine = FA FC MA MC / noint;
     anydiff: mtest FA=FC=MA=MC; /* Overall Test Significant */
     country: mtest FA+MA=FC+MC; /* Sig */
     gender:  mtest FA+FC=MA+MC;
     inter:   mtest FA-FC=MA-MC;
     /*  Pairwise MV*/
     FAvsFC:  mtest FA=FC; /* Sig */
     FAvsMA:  mtest FA=MA;
     FAvsMC:  mtest FA=MC; /* Sig */
     FCvsMA:  mtest FC=MA; /* Sig */
     FCvsMC:  mtest FC=MC;
     MAvsMC:  mtest MA=MC; /* Sig */

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                     1
                                                13:52 Friday, November 26, 2004

                              The FREQ Procedure

                            Table of country by sex

                      country     sex

                      Frequency|Female  |Male    |  Total
                      ---------+--------+--------+
                      Alaskan  |     26 |     24 |     50
                      ---------+--------+--------+
                      Canadian |     26 |     24 |     50
                      ---------+--------+--------+
                      Total          52       48      100

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                     2
                                                13:52 Friday, November 26, 2004

                               The GLM Procedure

                            Class Level Information

                   Class         Levels    Values

                   country            2    Alaskan Canadian

                   sex                2    Female Male


                         Number of observations    100

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                     3
                                                13:52 Friday, November 26, 2004

                               The GLM Procedure

Dependent Variable: fresh   Freshwater growth

                                      Sum of
Source                     DF        Squares    Mean Square   F Value   Pr > F

Model                       3    38591.26064    12863.75355     43.58   <.0001

Error                      96    28338.09936      295.18853

Corrected Total            99    66929.36000


              R-Square     Coeff Var      Root MSE    fresh Mean

              0.576597      14.57009      17.18105      117.9200


Source                     DF      Type I SS    Mean Square   F Value   Pr > F

country                     1    38181.16000    38181.16000    129.34   <.0001
sex                         1        2.05391        2.05391      0.01   0.9337
country*sex                 1      408.04673      408.04673      1.38   0.2426


Source                     DF    Type III SS    Mean Square   F Value   Pr > F

country                     1    37805.20673    37805.20673    128.07   <.0001
sex                         1        2.05391        2.05391      0.01   0.9337
country*sex                 1      408.04673      408.04673      1.38   0.2426

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                     4
                                                13:52 Friday, November 26, 2004

                               The GLM Procedure

Dependent Variable: marine   Marine growth

                                      Sum of
Source                     DF        Squares    Mean Square   F Value   Pr > F

Model                       3    101611.8637     33870.6212     29.54   <.0001

Error                      96    110064.1763      1146.5018

Corrected Total            99    211676.0400


              R-Square     Coeff Var      Root MSE    marine Mean

              0.480035      8.504554      33.86003       398.1400


Source                     DF      Type I SS    Mean Square   F Value   Pr > F

country                     1    99351.04000    99351.04000     86.66   <.0001
sex                         1      356.11853      356.11853      0.31   0.5786
country*sex                 1     1904.70519     1904.70519      1.66   0.2005


Source                     DF    Type III SS    Mean Square   F Value   Pr > F

country                     1    100294.7452    100294.7452     87.48   <.0001
sex                         1       356.1185       356.1185      0.31   0.5786
country*sex                 1      1904.7052      1904.7052      1.66   0.2005

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                     5
                                                13:52 Friday, November 26, 2004

                               The GLM Procedure
                       Multivariate Analysis of Variance

           Characteristic Roots and Vectors of: E Inverse * H, where
                      H = Type III SSCP Matrix for country
                             E = Error SSCP Matrix

           Characteristic               Characteristic Vector  V'EV=1
                     Root    Percent           fresh          marine

               2.11440493     100.00      0.00449006     -0.00183481
               0.00000000       0.00      0.00390756      0.00239907


                MANOVA Test Criteria and Exact F Statistics for
                  the Hypothesis of No Overall country Effect
                     H = Type III SSCP Matrix for country
                             E = Error SSCP Matrix

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.32108863     100.43         2        95    <.0001
Pillai's Trace              0.67891137     100.43         2        95    <.0001
Hotelling-Lawley Trace      2.11440493     100.43         2        95    <.0001
Roy's Greatest Root         2.11440493     100.43         2        95    <.0001


           Characteristic Roots and Vectors of: E Inverse * H, where
                        H = Type III SSCP Matrix for sex
                             E = Error SSCP Matrix

           Characteristic               Characteristic Vector  V'EV=1
                     Root    Percent           fresh          marine

               0.00325984     100.00     -0.00051382      0.00298651
               0.00000000       0.00      0.00593006      0.00045035


                  MANOVA Test Criteria and Exact F Statistics
                  for the Hypothesis of No Overall sex Effect
                       H = Type III SSCP Matrix for sex
                             E = Error SSCP Matrix

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.99675075       0.15         2        95    0.8568
Pillai's Trace              0.00324925       0.15         2        95    0.8568
Hotelling-Lawley Trace      0.00325984       0.15         2        95    0.8568
Roy's Greatest Root         0.00325984       0.15         2        95    0.8568

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                     6
                                                13:52 Friday, November 26, 2004

                               The GLM Procedure
                       Multivariate Analysis of Variance

           Characteristic Roots and Vectors of: E Inverse * H, where
                    H = Type III SSCP Matrix for country*sex
                             E = Error SSCP Matrix

           Characteristic               Characteristic Vector  V'EV=1
                     Root    Percent           fresh          marine

               0.03383491     100.00      0.00416036      0.00228909
               0.00000000       0.00      0.00425689     -0.00197030


                MANOVA Test Criteria and Exact F Statistics for
                the Hypothesis of No Overall country*sex Effect
                   H = Type III SSCP Matrix for country*sex
                             E = Error SSCP Matrix

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.96727242       1.61         2        95    0.2059
Pillai's Trace              0.03272758       1.61         2        95    0.2059
Hotelling-Lawley Trace      0.03383491       1.61         2        95    0.2059
Roy's Greatest Root         0.03383491       1.61         2        95    0.2059

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                     7
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                  Dependent Variable: fresh Freshwater growth

              NOTE: No intercept in model. R-Square is redefined.

                             Analysis of Variance

                                    Sum of           Mean
Source                   DF        Squares         Square    F Value    Pr > F

Model                     4        1429104         357276    1210.33    <.0001
Error                    96          28338      295.18853
Uncorrected Total       100        1457442


             Root MSE             17.18105    R-Square     0.9806
             Dependent Mean      117.92000    Adj R-Sq     0.9797
             Coeff Var            14.57009


                              Parameter Estimates

                                     Parameter     Standard
 Variable   Label              DF     Estimate        Error  t Value  Pr > |t|

 FA                             1     96.57692      3.36948    28.66    <.0001
 FC                             1    139.53846      3.36948    41.41    <.0001
 MA                             1    100.33333      3.50707    28.61    <.0001
 MC                             1    135.20833      3.50707    38.55    <.0001

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                     8
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                   Dependent Variable: marine Marine growth

              NOTE: No intercept in model. R-Square is redefined.

                             Analysis of Variance

                                    Sum of           Mean
Source                   DF        Squares         Square    F Value    Pr > F

Model                     4       15953158        3988289    3478.66    <.0001
Error                    96         110064     1146.50184
Uncorrected Total       100       16063222


             Root MSE             33.86003    R-Square     0.9931
             Dependent Mean      398.14000    Adj R-Sq     0.9929
             Coeff Var             8.50455


                              Parameter Estimates

                                     Parameter     Standard
 Variable   Label              DF     Estimate        Error  t Value  Pr > |t|

 FA                             1    423.65385      6.64050    63.80    <.0001
 FC                             1    369.00000      6.64050    55.57    <.0001
 MA                             1    436.16667      6.91165    63.11    <.0001
 MC                             1    364.04167      6.91165    52.67    <.0001

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                     9
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                          Multivariate Test: anydiff

                 Multivariate Statistics and F Approximations

                             S=2    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.30949882      25.25         6       190    <.0001
Pillai's Trace              0.71366714      17.75         6       192    <.0001
Hotelling-Lawley Trace      2.15618019      33.96         6     124.9    <.0001
Roy's Greatest Root         2.12088842      67.87         3        96    <.0001

         NOTE: F Statistic for Roy's Greatest Root is an upper bound.
                 NOTE: F Statistic for Wilks' Lambda is exact.

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                    10
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                          Multivariate Test: country

                Multivariate Statistics and Exact F Statistics

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.32108863     100.43         2        95    <.0001
Pillai's Trace              0.67891137     100.43         2        95    <.0001
Hotelling-Lawley Trace      2.11440493     100.43         2        95    <.0001
Roy's Greatest Root         2.11440493     100.43         2        95    <.0001

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                    11
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                           Multivariate Test: gender

                Multivariate Statistics and Exact F Statistics

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.99675075       0.15         2        95    0.8568
Pillai's Trace              0.00324925       0.15         2        95    0.8568
Hotelling-Lawley Trace      0.00325984       0.15         2        95    0.8568
Roy's Greatest Root         0.00325984       0.15         2        95    0.8568

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                    12
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                           Multivariate Test: inter

                Multivariate Statistics and Exact F Statistics

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.96727242       1.61         2        95    0.2059
Pillai's Trace              0.03272758       1.61         2        95    0.2059
Hotelling-Lawley Trace      0.03383491       1.61         2        95    0.2059
Roy's Greatest Root         0.03383491       1.61         2        95    0.2059

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                    13
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                           Multivariate Test: FAvsFC

                Multivariate Statistics and Exact F Statistics

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.46839145      53.91         2        95    <.0001
Pillai's Trace              0.53160855      53.91         2        95    <.0001
Hotelling-Lawley Trace      1.13496640      53.91         2        95    <.0001
Roy's Greatest Root         1.13496640      53.91         2        95    <.0001

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                    14
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                           Multivariate Test: FAvsMA

                Multivariate Statistics and Exact F Statistics

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.97523247       1.21         2        95    0.3038
Pillai's Trace              0.02476753       1.21         2        95    0.3038
Hotelling-Lawley Trace      0.02539654       1.21         2        95    0.3038
Roy's Greatest Root         0.02539654       1.21         2        95    0.3038

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                    15
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                           Multivariate Test: FAvsMC

                Multivariate Statistics and Exact F Statistics

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.50021824      47.46         2        95    <.0001
Pillai's Trace              0.49978176      47.46         2        95    <.0001
Hotelling-Lawley Trace      0.99912743      47.46         2        95    <.0001
Roy's Greatest Root         0.99912743      47.46         2        95    <.0001

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                    16
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                           Multivariate Test: FCvsMA

                Multivariate Statistics and Exact F Statistics

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.47202378      53.13         2        95    <.0001
Pillai's Trace              0.52797622      53.13         2        95    <.0001
Hotelling-Lawley Trace      1.11853735      53.13         2        95    <.0001
Roy's Greatest Root         1.11853735      53.13         2        95    <.0001

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                    17
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                           Multivariate Test: FCvsMC

                Multivariate Statistics and Exact F Statistics

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.98843705       0.56         2        95    0.5755
Pillai's Trace              0.01156295       0.56         2        95    0.5755
Hotelling-Lawley Trace      0.01169822       0.56         2        95    0.5755
Roy's Greatest Root         0.01169822       0.56         2        95    0.5755

_______________________________________________________________________________

                     Multivariate Analysis of Salmon data                    18
                                                13:52 Friday, November 26, 2004

                               The REG Procedure
                                 Model: MODEL1
                           Multivariate Test: MAvsMC

                Multivariate Statistics and Exact F Statistics

                             S=1    M=0    N=46.5

Statistic                        Value    F Value    Num DF    Den DF    Pr > F

Wilks' Lambda               0.49555145      48.35         2        95    <.0001
Pillai's Trace              0.50444855      48.35         2        95    <.0001
Hotelling-Lawley Trace      1.01795395      48.35         2        95    <.0001
Roy's Greatest Root         1.01795395      48.35         2        95    <.0001