STA429/1007 F 2004 Handout 4: The Math Data

Regression Example One



/********************** mathreg1.sas **********************/
title2 'Illustrate multiple regression';
options linesize=79 pagesize=2000 noovp formdlim='_';
libname math 'mathlib'; /* Location of permanent SAS datasets */
libname library 'mathlib'; /* SAS will seach for permanently stored
                              formats ONLY in a place called "library."  */

proc reg data=math.explore simple corr;
     model grade = gpa hscalc totscore;
     hschool: test gpa=hscalc=0;

/* Calculate predicted Y and explained remaining variation with proc
iml. Could use a calculator instead. */

proc iml;
     title3 'Predicted Y and explained remaining variation with proc iml';
     /* Predict Final Mark for a student with
             HSGPA = 80
             HSCALC = 75
             Diagnostic test score = 15   */
     Yhat = -73.12627 + 1.20851*80 + 0.34197*75 + 1.03078*15;
     print "gpa=80     hscalc=75   totscore=15";
     print "Yhat = " yhat;
     print "  ";
     F1 = 4.08**2;  /*  F = t-squared*/
     a1 = 1*F1/(287+1*F1); /*  n-p is error df */
     print "Controlling for gpa and hscalc, diagnostic test explains";
     print a1;
     print "... of the remaining variation in grade.";
     print "";
     F2 = 65.47;
     a2 = 2*F2/(287+2*F2);
     print "Controlling for diagnostic test, gpa and hscalc explain";
     print a2;
     print "... of the remaining variation in grade.";
     print "";

/*  Many refinements of the regression analysis are possible. Here is one. */
/* First get rid of labels so correlation matrix will look nice */
data blank;
     set math.explore;
     label grade=' ' gpa=' ' hscalc=' ' precalc=' ' calc=' ';

proc reg  simple corr;
     title3 'Calc and precalc separately';
     model grade = gpa hscalc precalc calc;
     hschool: test gpa=hscalc=0;
     dtest: test precalc=calc=0;
     compare: test precalc=calc;

proc standard data=blank  mean=0  std=1 out=withz;
     var  gpa hscalc totscore; /* Standardize these vars */

proc reg;
     title3 'Use standardized IVs';
     model grade = gpa hscalc precalc calc;
     gpacalc: test gpa=hscalc;
% cat mathreg1.lst

_______________________________________________________________________________

                                The SAS System                                1
                        Illustrate multiple regression
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure

                            Descriptive Statistics

                                        Uncorrected                    Standard
Variable            Sum          Mean            SS      Variance     Deviation

Intercept     291.00000       1.00000     291.00000             0             0
gpa               23551      80.93299       1916508      35.91325       5.99277
hscalc            22840      78.48797       1829830     128.15417      11.32052
totscore     2561.00000       8.80069         26239      12.76014       3.57213
grade             17633      60.59450       1165607     334.97983      18.30245

                            Descriptive Statistics

                 Variable    Label

                 Intercept   Intercept
                 gpa         High School GPA
                 hscalc      HS Calculus
                 totscore    Total # right on diagnostic test
                 grade       Final mark (if any)


                                  Correlation

Variable     Label                                         gpa           hscalc

gpa          High School GPA                            1.0000           0.6313
hscalc       HS Calculus                                0.6313           1.0000
totscore     Total # right on diagnostic test           0.3358           0.4350
grade        Final mark (if any)                        0.5968           0.5489

                                  Correlation

Variable     Label                                    totscore            grade

gpa          High School GPA                            0.3358           0.5968
hscalc       HS Calculus                                0.4350           0.5489
totscore     Total # right on diagnostic test           1.0000           0.4261
grade        Final mark (if any)                        0.4261           1.0000


_______________________________________________________________________________

                                The SAS System                                2
                        Illustrate multiple regression
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure
                                 Model: MODEL1
                 Dependent Variable: grade Final mark (if any)

                             Analysis of Variance

                                    Sum of           Mean
Source                   DF        Squares         Square    F Value    Pr > F

Model                     3          42546          14182      74.55    <.0001
Error                   287          54598      190.23637
Corrected Total         290          97144


             Root MSE             13.79262    R-Square     0.4380
             Dependent Mean       60.59450    Adj R-Sq     0.4321
             Coeff Var            22.76216


                              Parameter Estimates

                                                       Parameter       Standard
Variable     Label                             DF       Estimate          Error

Intercept    Intercept                          1      -73.12627       11.17387
gpa          High School GPA                    1        1.20851        0.17495
hscalc       HS Calculus                        1        0.34197        0.09688
totscore     Total # right on diagnostic        1        1.03078        0.25278
             test

                             Parameter Estimates

   Variable     Label                             DF    t Value    Pr > |t|

   Intercept    Intercept                          1      -6.54      <.0001
   gpa          High School GPA                    1       6.91      <.0001
   hscalc       HS Calculus                        1       3.53      0.0005
   totscore     Total # right on diagnostic        1       4.08      <.0001
                test


_______________________________________________________________________________

                                The SAS System                                3
                        Illustrate multiple regression
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure
                                 Model: MODEL1

               Test hschool Results for Dependent Variable grade

                                           Mean
           Source             DF         Square    F Value    Pr > F

           Numerator           2          12455      65.47    <.0001
           Denominator       287      190.23637

_______________________________________________________________________________

                                The SAS System                                4
                        Illustrate multiple regression
          Predicted Y and explained remaining variation with proc iml
                                            22:27 Wednesday, September 29, 2004

                      gpa=80     hscalc=75   totscore=15


                                            YHAT

                               Yhat =   64.66398



           Controlling for gpa and hscalc, diagnostic test explains


                                       A1

                                   0.0548217


                   ... of the remaining variation in grade.





            Controlling for diagnostic test, gpa and hscalc explain


                                       A2

                                   0.3132986


                   ... of the remaining variation in grade.




_______________________________________________________________________________

                                The SAS System                                5
                        Illustrate multiple regression
                          Calc and precalc separately
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure

                            Descriptive Statistics

                                        Uncorrected                    Standard
Variable            Sum          Mean            SS      Variance     Deviation

Intercept     291.00000       1.00000     291.00000             0             0
gpa               23551      80.93299       1916508      35.91325       5.99277
hscalc            22840      78.48797       1829830     128.15417      11.32052
precalc      1409.00000       4.84192    7603.00000       2.69217       1.64078
calc         1152.00000       3.95876    6406.00000       6.36381       2.52266
grade             17633      60.59450       1165607     334.97983      18.30245


                                 Correlation

Variable           gpa        hscalc       precalc          calc         grade

gpa             1.0000        0.6313        0.2824        0.2919        0.5968
hscalc          0.6313        1.0000        0.3185        0.4088        0.5489
precalc         0.2824        0.3185        1.0000        0.4475        0.3668
calc            0.2919        0.4088        0.4475        1.0000        0.3648
grade           0.5968        0.5489        0.3668        0.3648        1.0000

_______________________________________________________________________________

                                The SAS System                                6
                        Illustrate multiple regression
                          Calc and precalc separately
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure
                                 Model: MODEL1
                          Dependent Variable: grade

                             Analysis of Variance

                                    Sum of           Mean
Source                   DF        Squares         Square    F Value    Pr > F

Model                     4          42792          10698      56.29    <.0001
Error                   286          54352      190.04077
Corrected Total         290          97144



             Root MSE             13.78553    R-Square     0.4405
             Dependent Mean       60.59450    Adj R-Sq     0.4327
             Coeff Var            22.75046


                             Parameter Estimates

                          Parameter       Standard
     Variable     DF       Estimate          Error    t Value    Pr > |t|

     Intercept     1      -74.09971       11.20082      -6.62      <.0001
     gpa           1        1.19490        0.17527       6.82      <.0001
     hscalc        1        0.34928        0.09704       3.60      0.0004
     precalc       1        1.60250        0.56228       2.85      0.0047
     calc          1        0.71082        0.37797       1.88      0.0610

_______________________________________________________________________________

                                The SAS System                                7
                        Illustrate multiple regression
                          Calc and precalc separately
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure
                                 Model: MODEL1

               Test hschool Results for Dependent Variable grade

                                           Mean
           Source             DF         Square    F Value    Pr > F

           Numerator           2          12416      65.34    <.0001
           Denominator       286      190.04077

_______________________________________________________________________________

                                The SAS System                                8
                        Illustrate multiple regression
                          Calc and precalc separately
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure
                                 Model: MODEL1

                Test dtest Results for Dependent Variable grade

                                           Mean
           Source             DF         Square    F Value    Pr > F

           Numerator           2     1704.71633       8.97    0.0002
           Denominator       286      190.04077


_______________________________________________________________________________

                                The SAS System                                9
                        Illustrate multiple regression
                          Calc and precalc separately
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure
                                 Model: MODEL1

               Test compare Results for Dependent Variable grade

                                           Mean
           Source             DF         Square    F Value    Pr > F

           Numerator           1      246.17805       1.30    0.2560
           Denominator       286      190.04077

_______________________________________________________________________________

                                The SAS System                               10
                        Illustrate multiple regression
                             Use standardized IVs
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure
                                 Model: MODEL1
                          Dependent Variable: grade

                             Analysis of Variance

                                    Sum of           Mean
Source                   DF        Squares         Square    F Value    Pr > F

Model                     4          42792          10698      56.29    <.0001
Error                   286          54352      190.04077
Corrected Total         290          97144


             Root MSE             13.78553    R-Square     0.4405
             Dependent Mean       60.59450    Adj R-Sq     0.4327
             Coeff Var            22.75046


                             Parameter Estimates

                          Parameter       Standard
     Variable     DF       Estimate          Error    t Value    Pr > |t|

     Intercept     1       46.97005        2.62614      17.89      <.0001
     gpa           1        7.21537        1.05835       6.82      <.0001
     hscalc        1        4.25842        1.18314       3.60      0.0004
     precalc       1        1.60250        0.56228       2.85      0.0047
     calc          1        0.71082        0.37797       1.88      0.0610

_______________________________________________________________________________

                                The SAS System                               11
                        Illustrate multiple regression
                             Use standardized IVs
                                            22:27 Wednesday, September 29, 2004

                               The REG Procedure
                                 Model: MODEL1

               Test gpacalc Results for Dependent Variable grade

                                           Mean
           Source             DF         Square    F Value    Pr > F

           Numerator           1      419.85843       2.21    0.1383
           Denominator       286      190.04077