/* MathLogReg4.sas */ %include '/folders/myfolders/441s16/Lecture/readmath2b.sas'; /* Creates data table mathex */ title2 'Logistic regression with more than 2 resp. categories'; /*************** Data step continues ************************/ hsutil = hsgpa+hscalc+hsengl; if hsutil = . then hsmiss=1; else hsmiss=0; label hsmiss = 'Missing Any High School Data'; format hsmiss ynfmt.; if (0<=mark<=49) then outcome = 'Fail'; else if (50<=mark<=100) then outcome = 'Pass'; else outcome = 'Gone'; /*************************************************************/ proc freq; tables outcome*passed / norow nocol nopercent missing; proc freq; title3 'One at a time cat IVs with proc freq'; tables (course2 sex ethnic tongue hsmiss) * outcome / nocol nopercent chisq; /* Multinomial Logit model is ln(pi1/pi3) = beta01 + beta11 x Fail vs. Pass ln(pi2/pi3) = beta02 + beta12 x Gone vs. Pass */ proc logistic data=mathex outest=ParmNames; title3 'Multinomial logit model with proc logistic'; model outcome (ref='Pass') = hsmiss / link = glogit; contrast 'HS Missing method 1' hsmiss 1; /* Find out the parameter names. */ proc transpose data=ParmNames; run; proc print noobs; run; proc logistic data = mathex; title3 'Multinomial logit model with proc logistic'; model outcome (ref='Pass') = hsmiss / link = glogit; contrast 'HS Missing method 1' hsmiss 1; HS_MissingMethod2: test hsmiss_Fail = hsmiss_Gone = 0; proc iml; title3 'Estimate Probabilities using output from proc catmod'; b01 = -1.3594; b11 = 0.6663; b02 = -0.8306; b12 = 1.2360; hsmiss = 0; L1 = b01 + b11*hsmiss; L2 = b02 + b12*hsmiss; denom = 1 + exp(L1) + exp(L2); Fail = exp(L1)/denom; Gone = exp(L2)/denom; Pass = 1/denom; print "No Missing HS Data:" Fail Gone Pass; hsmiss = 1; L1 = b01 + b11*hsmiss; L2 = b02 + b12*hsmiss; denom = 1 + exp(L1) + exp(L2); Fail = exp(L1)/denom; Gone = exp(L2)/denom; Pass = 1/denom; print "Yes Missing HS Data:" Fail Gone Pass; proc freq data = mathex; title3 'Hsmiss by outcome again for comparison'; tables hsmiss * outcome / nocol nopercent; /* Now seek a good predictive model */ proc logistic data = mathex; title3 'HS variables'; model outcome (ref='Pass') = hsgpa hscalc hsengl / link = glogit; /* Drop HS English */ proc logistic data = mathex; title3 'HS gpa and calc + course2 '; class course2 / param=ref; /* Last category is reference by default. */ model outcome (ref='Pass') = hsgpa hscalc course2 / link = glogit; /* Forget course2 */ proc logistic data = mathex; title3 'HS gpa and calc + diagnostic test'; model outcome (ref='Pass') = hsgpa hscalc precalc calc / link = glogit; /* Drop calc subtest, keep precalc */ proc logistic data = mathex; title3 'Try gender, ethnic and mother tongue controlling for good stuff'; class ethnic (param=ref ref='East Indian'); /* Specifying a reference category that's not the last value */ model outcome (ref='Pass') = hsgpa hscalc precalc ethnic gender mtongue / link = glogit; contrast 'Demographics' ethnic 1 0 0 0 0, ethnic 0 1 0 0 0, ethnic 0 0 1 0 0, ethnic 0 0 0 1 0, ethnic 0 0 0 0 1, gender 1, mtongue 1; contrast 'Ethnic and Gender' ethnic 1 0 0 0 0, ethnic 0 1 0 0 0, ethnic 0 0 1 0 0, ethnic 0 0 0 1 0, ethnic 0 0 0 0 1, gender 1; /* Mother tongue is significant. Still true when we drop ethnic and gender? */ proc logistic data = mathex; title3 'hsgpa hscalc precalc mtongue'; model outcome (ref='Pass') = hsgpa hscalc precalc mtongue / link = glogit; proc logistic data = mathex; title3 'hsgpa hscalc precalc mtongue'; model outcome (ref='Pass') = hsgpa hscalc precalc mtongue / link = glogit; /* Allowing for academic background, students whose first language is English are more likely to fail the course as opposed to passing, and less likely to disappear as opposed to passing. If this is replicated, it will be very interesting. Now explore in more detail. Recall the response categories are 1=Fail 2=Gone 3=Pass. We want to know whether failing is different from disappearing in terms of their relationship to the explanatory variables. We are getting advanced here. What is H0? Model (using b instead of beta) is ln(pi1/pi3) = b01 + b11 hsgpa + b21 hscalc + b31 precalc + b41 mtongue ln(pi2/pi3) = b02 + b12 hsgpa + b22 hscalc + b32 precalc + b42 mtongue The null hypothesis is b11=b12, b21=b22, b31=b32, b41=b42 Parameter names are easy to guess. */ proc logistic data = mathex; title3 'Different coefficients for Gone and Fail?'; model outcome (ref='Pass') = hsgpa hscalc precalc mtongue / link = glogit; DiffOverall: test hsgpa_Fail = hsgpa_Gone, hscalc_Fail = hscalc_Gone, precalc_Fail = precalc_Gone, mtongue_Fail = mtongue_Gone; Diff_hsgpa: test hsgpa_Fail = hsgpa_Gone; Diff_hscalc: test hscalc_Fail = hscalc_Gone; Diff_precalc: test precalc_Fail = precalc_Gone; Diff_mtongue: test mtongue_Fail = mtongue_Gone; run; /************************** Replication *********************** For interpretation, want to replicate 8 findings: Gone vs. Pass and Fail vs. Pass for each explanatory variable. ***************************************************************/ %include '/folders/myfolders/441s16/Lecture/readreplic.sas'; if (0<=mark<=49) then outcome = 'Fail'; else if (50<=mark<=100) then outcome = 'Pass'; else outcome = 'Gone'; proc logistic data = replic; /* That's the default anyway. */ title2 'Replicate hsgpa hscalc precalc calc mtongue 0.05/8 = .00625'; model outcome (ref='Pass') = hsgpa hscalc precalc mtongue / link = glogit; Diff_mtongue: test mtongue_Fail = mtongue_Gone; /* Final conclusions: Students with higher High School GPA were less likely to fail as opposed to passing and less likely to disappear as opposed to passing. Students with higher High School Calculus marks were less likely to disappear as opposed to passing. Students with higher scores on the pre-calculus portion of the diagnostic test were less likely to disappear as opposed to passing. There was no convincing evidence of a connection between Mother Tongue (English vs. Other) and outcome. */