STA 402S 1998: SAS Lesson 2

More regression, including dummy variable coding in a one-factor design



/********************** cars98a.sas **************************/
options linesize=79 pagesize=35;
title 'STA 320F98 SAS Lesson 2:  Prediction, Diagnosis & Testing';

proc format; /* Used to label values of the categorical variables */
     value carfmt    1 = 'US'
                     2 = 'Japanese'
                     3 = 'Other' ;
data auto;
     infile 'cars.dat';
     input country mpg weight length;
     len2 = length**2;
     weight2 = weight**2;
     lenxwt = length*weight;
/* Indicator dummy vars */
     if country = 1 then c1=1;  else c1=0;
     if country = 3 then c2=1;  else c2=0;
/* Cell means dummy vars */
     if country = 1 then cm1=1;  else cm1=0;
     if country = 2 then cm2=1;  else cm2=0;
     if country = 3 then cm3=1;  else cm3=0;
/* Effect coding dummy vars */
     if country = 1 then ec1=1;  else if country = 3 then ec1 = -1;
                                 else ec1=0;
     if country = 2 then ec2=1;  else if country = 3 then ec2 = -1;
                                 else ec2=0;
     mileage = mpg;
     if _n_ = 71 then mileage = .;
     label country = 'Country of Origin'
           mpg = 'Miles per Gallon';
     format country carfmt.;

proc reg;
     model mpg = weight length c1 c2;
     dvar:  test c1 = 0, c2 = 0;
proc reg;
     model mpg = weight length cm1-cm3 / noint; /* No intercept */
     cellmean:  test cm1=cm2=cm3;
proc reg;
     model mpg = weight length ec1 ec2;
     effcode:  test ec1=ec2=0;

proc sort; by country;
proc univariate plot; var mpg; by country;  /* side-by-side boxplots */

proc means;
     var mpg;
     class country;
proc means;
     var mpg length weight;

proc reg;
     model mpg = weight length c1 c2
                 / i ss1 clm cli r influence partial;
/*         i         prints (X'X)-inverse
           ss1       prints sequential sums of squares
           clm       prints confidence interval for E(Yh)
           cli       prints prediction interval for new observation
           r         prints residual analysis
           influence prints influence statistics
           partial   prints partial regression plots
*/
                

Part of the list file looks like this.

 
Dependent Variable: MPG

Test: DVAR     Numerator:     24.5673  DF:    2   F value:   2.3009
               Denominator:  10.67714  DF:   69   Prob>F:    0.1078

Test: CELLMEAN Numerator:     24.5673  DF:    2   F value:   2.3009
               Denominator:  10.67714  DF:   69   Prob>F:    0.1078

Test: EFFCODE  Numerator:     24.5673  DF:    2   F value:   2.3009
               Denominator:  10.67714  DF:   69   Prob>F:    0.1078