/* MathReg1.sas */ %include '/home/brunner0/441s20/readmath2b.sas'; /* readmath2b has dummy variable definitions e1-e4,e6 for ethnic (Reference category is East Indian) gender=1 for Female mtongue=1 for English c1-c3: c1 = 'Catch-up' c2 = 'Mainstream' c3 = 'Elite' */ title2 'Variable Selection for Predicting Grade'; proc freq; title3 'Check dummy variables'; tables sex*gender / norow nocol nopercent missing; tables tongue*mtongue / norow nocol nopercent missing; tables (e1-e4 e6) * ethnic / norow nocol nopercent missing; tables (c1-c3) * course / norow nocol nopercent missing; proc reg plots = none; /* Suppress diagnostic plots for now*/ title3 'Model A: Predict University Calculus Grade from HS Information'; model grade = hsgpa hscalc hsengl; /* It is very interesting to know what proportion of the remaining variation is explained by each variable, controlling for the other two. F = t-squared, and a = sF/(n-p + sF) */ proc iml; title3 'Proportion of remaining variation for HS information'; n = 323; p = 4; s = 1; print "hsgpa controlling for hscalc and hsengl"; t = 8.00; F = t**2; a = s*F/(n-p + s*F); print a; print "hscalc controlling for hsgpa and hsengl"; t = 3.14; F = t**2; a = s*F/(n-p + s*F); print a; print "hsengl controlling for hsgpa and hscalc"; t = -3.26; F = t**2; a = s*F/(n-p + s*F); print a; proc reg plots = none; title3 'Model B: Predict University Calculus Grade from Diagnostic Test'; model grade = precalc calc; proc reg plots = none; title3 'Model C: Do the diagnostic test and HS info both contribute?'; model grade = hsgpa hscalc hsengl precalc calc; Diagnostic_Test: test precalc=calc=0; HS_Information: test hsgpa=hscalc=hsengl=0; proc iml; title3 'Proportion of remaining variation explained by diagnostic test'; print "Precalc and calc controlling for hsgpa hscalc hsengl"; n = 289; p = 6; s = 2; F = 8.28; a = s*F/(n-p + s*F); print a; proc iml; title3 'Proportion of remaining variation explained by HS info'; print "Hsgpa hscalc hsengl controlling for precalc and calc"; n = 289; p = 6; s = 3; F = 46.97; a = s*F/(n-p + s*F); print a; proc reg plots = none; title3 'Model D: See if Course makes a contribution'; model grade = hsgpa hscalc hsengl precalc calc c1 c3; Course: test c1=c3=0; Diagnostic_Test: test precalc=calc=0; proc glm; title3 'Model D again with proc glm'; class course; model grade = hsgpa hscalc hsengl precalc calc course; contrast 'Replicate Test of Course' course 1 -1 0, course 0 1 -1; contrast 'Diagnostic Test F = 9.06' precalc 1, calc 1; proc reg plots = none; title3 'Model E: Include Language, Sex and Ethnic Background'; model grade = hsgpa hscalc hsengl precalc calc mtongue gender e1-e4 e6; TroubleVars: test mtongue=gender=e1=e2=e3=e4=e6=0; Nationality: test e1=e2=e3=e4=e6=0; proc reg plots = none; title3 'Model F: Discarding Gender and Nationality'; model grade = hsgpa hscalc hsengl precalc calc mtongue; EnglishTongue: test hsengl=mtongue=0; proc iml; title3 'Proportion of remaining variation explained by mother tongue'; print "Mtongue controlling for hsgpa hscalc hsengl precalc calc"; n = 287; p = 7; s = 1; t = -2.23 ; F = t**2; a = s*F/(n-p + s*F); print a; proc reg plots = none; title3 'Model G: Drop mtongue and calc'; title4 'Compare R-Square = 0.4556, Adj R-Sq = 0.4460 From Model 3'; model grade = hsgpa hscalc hsengl precalc; proc iml; title3 'Proportion of remaining variation explained by Pre-calculus'; print "precalc controlling for hsgpa hscalc hsengl"; n = 289; p = 5; s = 1; t = 3.63 ; F = t**2; a = s*F/(n-p + s*F); print a; proc reg plots = none; title3 'Model H: Combine precalc and calc instead of dropping calc'; title4 'Compare R-Square = 0.4492 from Model G'; model grade = hsgpa hscalc hsengl totscore; proc iml; title3 'Proportion of remaining variation explained by Pre-calculus'; print "totscore controlling for hsgpa hscalc hsengl"; n = 289; p = 5; s = 1; t = 3.92 ; F = t**2; a = s*F/(n-p + s*F); print a; print "For prediction, I am happy with Model H: hsgpa hscalc hsengl totscore"; proc reg plots = none; title3 'Model I: Same as Model H but including Mother Tongue'; model grade = hsgpa hscalc hsengl totscore mtongue; /* A slightly better model except for the lawsuit. */ proc reg plots = none; title3 'Try automatic (stepwise) selection'; model grade = hsgpa hscalc hsengl precalc calc totscore mtongue gender e1-e4 e6 / selection = stepwise slentry = 0.05 slstay = 0.05 ; /* Default slentry = slstay = 0.15 */ quit;