STA429/1007 F 2004 Handout 11

Logistic regression with more than 2 response categories (Math data)


/********************** mathlog3.sas **********************/
title2 'Logistic regression on math data: Part III Using proc catmod';
options linesize=79 pagesize=2000 noovp formdlim='_';
libname math '/homes/students/u0/stats/brunner/mathlib';
               /* Full path to permanent SAS datasets */
libname library '/homes/students/u0/stats/brunner/mathlib';
               /* SAS will seach for permanently stored formats ONLY in a
                  place called "library."  */

/* Make proc catmod do the logistic regression LR test for dtest, G = 14.903 */

data quant; /* Includes only cases that are used for full model. */
     set math.explore;
     goodcase = gpa+hscalc+precalc+calc; /* Will be missing if any missing */
     if goodcase =. then delete;

proc catmod; /* Regular 2-category logistic regression as a test */
     title3 'Test precalc & calc controlling for gpa & hscalc: Full model';
     direct gpa hscalc precalc calc; /* Direct means no dummy vars please */
     model passed = gpa hscalc precalc calc / noprofile;
     contrast 'dtest' all_parms 0 0 0 1 0,
                      all_parms 0 0 0 0 1; /* Just a Wald test (sigh) */

proc catmod;
     title3 'Test precalc & calc controlling for gpa & hscalc: Reducd model';
     direct gpa hscalc;
     model passed = gpa hscalc / noprofile;

/* Okay, got 380.90973 - 366.00715 = 14.90258. The parameter estimates have
reversed signs, but we can live with it. */

/* Now try a 4-category response */

data quant2;
     set math.explore;
     goodcase = sex+course;
     if goodcase=. then delete;

proc freq;
     tables sex*course / nocol nopercent chisq;

proc catmod;
     title3 'Course is a 4-category DV: Full';
     direct sex; /* already 0=M, 1=F */
     model course = sex;

proc catmod;
     title3 'Course is a 4-category DV: Reduced';
     model course = ;


proc iml;
     title3 'Calculate G: Should be Likelihood Ratio Chi-Square = 15.4099';
     title4 '(from proc freq)';
     G = 895.64588 - 880.23601; pval = 1-probchi(G,3);
     print "G = " G ", df = 3, p = " pval;


proc iml;
     title3 'Predicted probability of female taking course 2 = 0.8158 ?';
     /* Need to solve 3 linear equations in 3 unknowns */
     xm = {1,0};
     xf = {1,1};
     beta1 = {0.2007,0.6817}; /* Values from printout */
     beta2 = {2.0025,0.8925};
     beta3 = {-0.0770,-0.3285};
     A = J(3,3); /* 3x3 matrix of ones */
     A(|1,1|) = 1+exp(-xf`*beta1);
     A(|2,2|) = 1+exp(-xf`*beta2);
     A(|3,3|) = 1+exp(-xf`*beta3);
     one = {1,1,1}; print A one;
     print "Need to solve    A pi = one    for pi";
     pi = solve(A,one);
     /* Tack on the last (redundant) element of pi */
     pi4 = 1-sum(pi);
     pi = pi // pi4;  /* Vertical concatination */
     print "Predicted probabilities for females:";
     print pi;
     /* Could do the same thing for males, of course */

     /* Predict choice of class from HS & diagnos. To make things simpler,
        make course 2 (MAT132) the reference (last) cagtegory. But proc catmod 
        does not use alphabetical order. */

proc format;
     value cfmt 1 = 'Mat122Y?' 2 = 'Mat138Y' 3 = 'Other' 4 = 'Mat132Y';

     data quant3; /* Only cases used in full model */
          set math.explore;
          /* Make course2 with Mat132 last */
          course2=course;
          if         course=1 then course2=1;
             else if course=2 then course2=4;
             else if course=3 then course2=2;
             else if course=4 then course2=3;
             else course=.;
          good = precalc+calc+gpa+hscalc+english+sex;
          if good=. then delete;
          format course2 cfmt.;

     proc freq;
          title3 '';
          tables course2*course / norow nocol nopercent;


     proc catmod;
          direct precalc calc gpa hscalc english sex;
          model course = precalc calc gpa hscalc english sex
                          / noprofile;
          contrast 'dtest' all_parms 0 1 0 0 0 0 0,
                           all_parms 0 0 1 0 0 0 0;


Here is mathlog3.lst


_______________________________________________________________________________

                                The SAS System                                1
         Logistic regression on math data: Part III Using proc catmod
         Test precalc & calc controlling for gpa & hscalc: Full model
                                             21:27 Wednesday, November 10, 2004

                             The CATMOD Procedure

                                 Data Summary

              Response           passed     Response Levels    2
              Weight Variable    None       Populations      374
              Data Set           QUANT      Total Frequency  375
              Frequency Missing  0          Observations     375


                          Maximum Likelihood Analysis

                              Sub           -2 Log    Convergence
              Iteration    Iteration    Likelihood      Criterion
              ---------------------------------------------------
                   0           0         519.86039         1.0000
                   1           0         380.37464         0.2683
                   2           0         366.86764         0.0355
                   3           0         366.01214       0.002332
                   4           0         366.00715      0.0000136
                   5           0         366.00715      5.337E-10

                         Maximum Likelihood Analysis

                                    Parameter Estimates
  Iteration            1            2            3            4            5
  --------------------------------------------------------------------------
       0               0            0            0            0            0
       1          8.4509      -0.0548      -0.0481      -0.1728      -0.0336
       2         12.9510      -0.1001      -0.0567      -0.2429      -0.0687
       3         14.5028      -0.1167      -0.0590      -0.2619      -0.0811
       4         14.6347      -0.1181      -0.0592      -0.2633      -0.0821
       5         14.6355      -0.1181      -0.0592      -0.2633      -0.0821

                  Maximum likelihood computations converged.



                   Maximum Likelihood Analysis of Variance

              Source               DF   Chi-Square    Pr > ChiSq
              --------------------------------------------------
              Intercept             1        41.20        <.0001
              gpa                   1        14.42        0.0001
              hscalc                1        18.91        <.0001
              precalc               1         8.75        0.0031
              calc                  1         1.60        0.2063

              Likelihood Ratio    369       366.01        0.5342


                   Analysis of Maximum Likelihood Estimates

                                  Standard        Chi-
          Parameter    Estimate      Error      Square    Pr > ChiSq
          ----------------------------------------------------------
          Intercept     14.6355     2.2803       41.20        <.0001
          gpa           -0.1181     0.0311       14.42        0.0001
          hscalc        -0.0592     0.0136       18.91        <.0001
          precalc       -0.2633     0.0890        8.75        0.0031
          calc          -0.0821     0.0650        1.60        0.2063


                   Contrasts of Maximum Likelihood Estimates

                   Contrast   DF    Chi-Square    Pr > ChiSq
                   -----------------------------------------
                   dtest       2         13.86        0.0010

_______________________________________________________________________________

                                The SAS System                                2
         Logistic regression on math data: Part III Using proc catmod
        Test precalc & calc controlling for gpa & hscalc: Reducd model
                                             21:27 Wednesday, November 10, 2004

                             The CATMOD Procedure

                                 Data Summary

              Response           passed     Response Levels    2
              Weight Variable    None       Populations      354
              Data Set           QUANT      Total Frequency  375
              Frequency Missing  0          Observations     375



                         Maximum Likelihood Analysis

              Sub         -2 Log  Convergence        Parameter Estimates
Iteration  Iteration  Likelihood    Criterion          1          2          3
------------------------------------------------------------------------------
     0         0       519.86039       1.0000          0          0          0
     1         0        391.3283       0.2472     9.0738    -0.0663    -0.0563
     2         0       381.37804       0.0254    13.3942    -0.1109    -0.0688
     3         0       380.91122     0.001224    14.6249    -0.1242    -0.0717
     4         0       380.90973    3.9307E-6    14.6999    -0.1250    -0.0719
     5         0       380.90973    4.506E-11    14.7001    -0.1250    -0.0719

                  Maximum likelihood computations converged.


                   Maximum Likelihood Analysis of Variance

              Source               DF   Chi-Square    Pr > ChiSq
              --------------------------------------------------
              Intercept             1        44.09        <.0001
              gpa                   1        17.05        <.0001
              hscalc                1        30.58        <.0001

              Likelihood Ratio    351       360.46        0.3524


                   Analysis of Maximum Likelihood Estimates

                                  Standard        Chi-
          Parameter    Estimate      Error      Square    Pr > ChiSq
          ----------------------------------------------------------
          Intercept     14.7001     2.2138       44.09        <.0001
          gpa           -0.1250     0.0303       17.05        <.0001
          hscalc        -0.0719     0.0130       30.58        <.0001

_______________________________________________________________________________


                                The SAS System                                3
         Logistic regression on math data: Part III Using proc catmod
        Test precalc & calc controlling for gpa & hscalc: Reducd model
                                             21:27 Wednesday, November 10, 2004

                              The FREQ Procedure

                            Table of sex by course

             sex       course

             Frequency|
             Row Pct  |      1 |      2 |      3 |No Resp |  Total
             ---------+--------+--------+--------+--------+
             Male     |     33 |    200 |     25 |     27 |    285
                      |  11.58 |  70.18 |   8.77 |   9.47 |
             ---------+--------+--------+--------+--------+
             Female   |     29 |    217 |      8 |     12 |    266
                      |  10.90 |  81.58 |   3.01 |   4.51 |
             ---------+--------+--------+--------+--------+
             Total          62      417       33       39      551


                     Statistics for Table of sex by course

            Statistic                     DF       Value      Prob
            ------------------------------------------------------
            Chi-Square                     3     14.8404    0.0020
            Likelihood Ratio Chi-Square    3     15.4099    0.0015
            Mantel-Haenszel Chi-Square     1      6.9148    0.0085
            Phi Coefficient                       0.1641
            Contingency Coefficient               0.1619
            Cramer's V                            0.1641

                               Sample Size = 551

_______________________________________________________________________________

                                The SAS System                                4
         Logistic regression on math data: Part III Using proc catmod
                        Course is a 4-category DV: Full
                                             21:27 Wednesday, November 10, 2004

                             The CATMOD Procedure

                                 Data Summary

              Response           course     Response Levels    4
              Weight Variable    None       Populations        2
              Data Set           QUANT2     Total Frequency  551
              Frequency Missing  0          Observations     551



                              Population Profiles

                        Sample    sex       Sample Size
                        -------------------------------
                            1     Male              285
                            2     Female            266


                               Response Profiles

                              Response    course
                              -------------------
                                  1             1
                                  2             2
                                  3             3
                                  4       No Resp


                          Maximum Likelihood Analysis

                             Sub           -2 Log    Convergence
               Iteration  Iteration    Likelihood      Criterion
               -------------------------------------------------
                    0         0         1527.6964         1.0000
                    1         0         906.57678         0.4066
                    2         0         884.29299         0.0246
                    3         0          880.2869       0.004530
                    4         0         880.23602      0.0000578
                    5         0         880.23601      1.6226E-8
                    6         0         880.23601      1.292E-15

                          Maximum Likelihood Analysis

                                    Parameter Estimates
  Iteration          1          2          3          4          5          6
  ---------------------------------------------------------------------------
       0             0          0          0          0          0          0
       1        0.0842     2.4281    -0.0281     0.1714     0.6546    -0.0321
       2        0.2527     1.9553    -0.0932     0.9879     0.9340    -0.2970
       3        0.2034     2.0023    -0.0773     0.7232     0.8930    -0.3287
       4        0.2007     2.0025    -0.0770     0.6825     0.8925    -0.3285
       5        0.2007     2.0025    -0.0770     0.6817     0.8925    -0.3285
       6        0.2007     2.0025    -0.0770     0.6817     0.8925    -0.3285

                  Maximum likelihood computations converged.


                   Maximum Likelihood Analysis of Variance

              Source               DF   Chi-Square    Pr > ChiSq
              --------------------------------------------------
              Intercept             3       227.38        <.0001
              sex                   3        13.90        0.0030

              Likelihood Ratio      0          .           .


                   Analysis of Maximum Likelihood Estimates

                Function               Standard        Chi-
      Parameter  Number     Estimate      Error      Square    Pr > ChiSq
      -------------------------------------------------------------------
      Intercept    1          0.2007     0.2595        0.60        0.4393
                   2          2.0025     0.2050       95.39        <.0001
                   3         -0.0770     0.2776        0.08        0.7816
      sex          1          0.6817     0.4303        2.51        0.1131
                   2          0.8925     0.3605        6.13        0.0133
                   3         -0.3285     0.5342        0.38        0.5386

_______________________________________________________________________________

                                The SAS System                                5
         Logistic regression on math data: Part III Using proc catmod
                      Course is a 4-category DV: Reduced

                             The CATMOD Procedure

                                 Data Summary

              Response           course     Response Levels    4
              Weight Variable    None       Populations        1
              Data Set           QUANT2     Total Frequency  551
              Frequency Missing  0          Observations     551


                              Population Profiles

                             Sample    Sample Size
                             ---------------------
                                 1             551


                               Response Profiles

                              Response    course
                              -------------------
                                  1             1
                                  2             2
                                  3             3
                                  4       No Resp


                         Maximum Likelihood Analysis

              Sub         -2 Log  Convergence        Parameter Estimates
Iteration  Iteration  Likelihood    Criterion          1          2          3
------------------------------------------------------------------------------
     0         0       1527.6964       1.0000          0          0          0
     1         0       920.21756       0.3976     0.1670     2.7441    -0.0436
     2         0       897.90453       0.0242     0.6237     2.3308    -0.1972
     3         0       895.65595     0.002504     0.4774     2.3699    -0.1669
     4         0       895.64588    0.0000112     0.4636     2.3695    -0.1671
     5         0       895.64588    3.033E-10     0.4636     2.3695    -0.1671

                  Maximum likelihood computations converged.


                   Maximum Likelihood Analysis of Variance

              Source               DF   Chi-Square    Pr > ChiSq
              --------------------------------------------------
              Intercept             3       499.35        <.0001

              Likelihood Ratio      0          .           .


                   Analysis of Maximum Likelihood Estimates

                Function               Standard        Chi-
      Parameter  Number     Estimate      Error      Square    Pr > ChiSq
      -------------------------------------------------------------------
      Intercept    1          0.4636     0.2044        5.14        0.0233
                   2          2.3695     0.1674      200.24        <.0001
                   3         -0.1671     0.2365        0.50        0.4800

_______________________________________________________________________________

         Calculate G: Should be Likelihood Ratio Chi-Square = 15.4099
                               (from proc freq)
                                             21:27 Wednesday, November 10, 2004

                                 G                     PVAL

                    G =   15.40987 , df = 3, p =  0.0014979

_______________________________________________________________________________

                                The SAS System                                7
         Logistic regression on math data: Part III Using proc catmod
          Predicted probability of female taking course 2 = 0.8158 ?
                                             21:27 Wednesday, November 10, 2004

                            A                           ONE

                    1.4137886         1         1         1
                            1  1.055299         1         1
                            1         1 2.5000523         1


                     Need to solve    A pi = one    for pi


                     Predicted probabilities for females:


                                       PI

                                   0.1090229
                                   0.8157908
                                   0.0300739
                                   0.0451124

_______________________________________________________________________________

                                The SAS System                                8
         Logistic regression on math data: Part III Using proc catmod
                                             21:27 Wednesday, November 10, 2004

                              The FREQ Procedure

                          Table of course2 by course

                 course2     course

                 Frequency|      1 |      2 |      3 |  Total
                 ---------+--------+--------+--------+
                 Mat122Y? |     20 |      0 |      0 |     20
                 ---------+--------+--------+--------+
                 Mat138Y  |      0 |      0 |     24 |     24
                 ---------+--------+--------+--------+
                 Mat132Y  |      0 |    324 |      0 |    324
                 ---------+--------+--------+--------+
                 Total          20      324       24      368

_______________________________________________________________________________

                                The SAS System                                9
         Logistic regression on math data: Part III Using proc catmod
          Predicted probability of female taking course 2 = 0.8158 ?
                                             21:27 Wednesday, November 10, 2004

                             The CATMOD Procedure

                                 Data Summary

              Response           course     Response Levels    3
              Weight Variable    None       Populations      368
              Data Set           QUANT3     Total Frequency  368
              Frequency Missing  0          Observations     368


                         Maximum Likelihood Analysis

              Sub         -2 Log  Convergence        Parameter Estimates
Iteration  Iteration  Likelihood    Criterion          1          2          3
------------------------------------------------------------------------------
     0         0       808.57864       1.0000          0          0          0
     1         0        310.6374       0.6158     1.1397     1.5742    -0.0490
     2         0       281.36458       0.0942     6.4226     2.8677    -0.2137
     3         0       274.79215       0.0234    10.0860     3.3236    -0.2707
     4         0        273.9678     0.003000    11.7154     3.4453    -0.2896
     5         0       273.94419    0.0000862    11.9603     3.4508    -0.2918
     6         0       273.94416    1.0121E-7    11.9671     3.4508    -0.2919
     7         0       273.94416    1.544E-13    11.9671     3.4508    -0.2919


                          Maximum Likelihood Analysis

                                    Parameter Estimates
  Iteration          4          5          6          7          8          9
  ---------------------------------------------------------------------------
       0             0          0          0          0          0          0
       1       -0.0568    -0.0321     0.0174   0.004018    -0.0173    -0.0198
       2       -0.1669    -0.1672  -0.007892   0.000429    -0.0207    -0.0819
       3       -0.1949    -0.2670  -0.002234    -0.0183  -0.005235    -0.1074
       4       -0.2009    -0.3299  -0.001903    -0.0296  -0.002096    -0.1172
       5       -0.2011    -0.3439  -0.001982    -0.0316  -0.002008    -0.1187
       6       -0.2011    -0.3444  -0.001985    -0.0316  -0.002008    -0.1187
       7       -0.2011    -0.3444  -0.001985    -0.0316  -0.002008    -0.1187

                         Maximum Likelihood Analysis

                                   Parameter Estimates
   Iteration         10           11           12           13           14
   ------------------------------------------------------------------------
        0             0            0            0            0            0
        1      0.004332     0.004316       0.0253       0.0426       0.3729
        2       -0.0193       0.0124       0.0440       0.2434       0.7878
        3       -0.0351       0.0122       0.0414       0.3387       0.8962
        4       -0.0389       0.0127       0.0410       0.3577       0.9171
        5       -0.0390       0.0129       0.0409       0.3576       0.9177
        6       -0.0390       0.0129       0.0409       0.3576       0.9177
        7       -0.0390       0.0129       0.0409       0.3576       0.9177

                  Maximum likelihood computations converged.


                   Maximum Likelihood Analysis of Variance

              Source               DF   Chi-Square    Pr > ChiSq
              --------------------------------------------------
              Intercept             2         5.10        0.0782
              precalc               2         1.98        0.3724
              calc                  2         3.65        0.1614
              gpa                   2         0.18        0.9133
              hscalc                2        10.76        0.0046
              english               2         2.47        0.2910
              sex                   2         4.21        0.1218

              Likelihood Ratio    722       273.94        1.0000



                   Analysis of Maximum Likelihood Estimates

                Function               Standard        Chi-
      Parameter  Number     Estimate      Error      Square    Pr > ChiSq
      -------------------------------------------------------------------
      Intercept    1         11.9671     5.3022        5.09        0.0240
                   2          3.4508     2.9503        1.37        0.2422
      precalc      1         -0.2919     0.2390        1.49        0.2220
                   2         -0.2011     0.1543        1.70        0.1925
      calc         1         -0.3444     0.2034        2.87        0.0904
                   2        -0.00198     0.1005        0.00        0.9842
      gpa          1         -0.0316     0.0906        0.12        0.7268
                   2        -0.00201     0.0597        0.00        0.9732
      hscalc       1         -0.1187     0.0400        8.82        0.0030
                   2         -0.0390     0.0306        1.62        0.2030
      english      1          0.0129     0.0495        0.07        0.7943
                   2          0.0409     0.0284        2.07        0.1500
      sex          1          0.3576     0.7373        0.24        0.6277
                   2          0.9177     0.5082        3.26        0.0710


                   Contrasts of Maximum Likelihood Estimates

                   Contrast   DF    Chi-Square    Pr > ChiSq
                   -----------------------------------------
                   dtest       2          2.98        0.2255