1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;5556 /* MathReg1.sas */57 %include '/folders/myfolders/441s16/Lecture/readmath2b.sas';NOTE: Format YNFMT is already on the library WORK.FORMATS.NOTE: Format YNFMT has been output.NOTE: Format CRSFMT is already on the library WORK.FORMATS.NOTE: Format CRSFMT has been output.NOTE: Format NFMT is already on the library WORK.FORMATS.NOTE: Format NFMT has been output.NOTE: PROCEDURE FORMAT used (Total process time):real time 0.00 secondscpu time 0.00 seconds159 /* readmath2b has dummy variable definitions160 e1-e4,e6 for ethnic (Reference category is East Indian)161 gender=1 for Female162 mtongue=1 for English163 c1-c3: c1 = 'Catch-up' c2 = 'Mainstream' c3 = 'Elite' */164165 title2 'Variable Selection for Predicting Grade';166NOTE: The infile '/folders/myfolders/exploremath.data.txt' is:Filename=/folders/myfolders/exploremath.data.txt,Owner Name=root,Group Name=vboxsf,Access Permission=-rwxrwx---,Last Modified=18Jan2016:17:34:49,File Size (bytes)=44583NOTE: 579 records were read from the infile '/folders/myfolders/exploremath.data.txt'.The minimum record length was 75.The maximum record length was 75.NOTE: Missing values were generated as a result of performing an operation on missing values.Each place is given by: (Number of times) at (Line):(Column).99 at 80:24 99 at 117:13NOTE: The data set WORK.MATHEX has 579 observations and 34 variables.NOTE: DATA statement used (Total process time):real time 0.02 secondscpu time 0.02 seconds167 proc freq;168 title3 'Check dummy variables';169 tables sex*gender / norow nocol nopercent missing;170 tables tongue*mtongue / norow nocol nopercent missing;171 tables (e1-e4 e6) * ethnic / norow nocol nopercent missing;172 tables (c1-c3) * course / norow nocol nopercent missing;173NOTE: There were 579 observations read from the data set WORK.MATHEX.NOTE: PROCEDURE FREQ used (Total process time):real time 0.24 secondscpu time 0.23 seconds174 proc reg plots = none; /* Suppress diagnostic plots for now*/175 title3 'Model A: Predict University Calculus Grade from HS Information';176 model grade = hsgpa hscalc hsengl;177178 /* It is very interesting to know what proportion of the remaining179 variation is explained by each variable, controlling for the other two.180 F = t-squared, and181 a = sF/(n-p + sF)182 */183NOTE: PROCEDURE REG used (Total process time):real time 0.06 secondscpu time 0.06 seconds184 proc iml;NOTE: IML Ready185 title3 'Proportion of remaining variation for HS information';186 n = 323;186 ! p = 4;186 ! s = 1;187 print "hsgpa controlling for hscalc and hsengl";188 t = 8.00;188 ! F = t**2;188 ! a = s*F/(n-p + s*F);189 print a;190191 print "hscalc controlling for hsgpa and hsengl";192 t = 3.14;192 ! F = t**2;192 ! a = s*F/(n-p + s*F);193 print a;194195 print "hsengl controlling for hsgpa and hscalc";196 t = -3.26;196 ! F = t**2;196 ! a = s*F/(n-p + s*F);197 print a;198NOTE: Exiting IML.NOTE: PROCEDURE IML used (Total process time):real time 0.03 secondscpu time 0.04 seconds199 proc reg plots = none;200 title3 'Model B: Predict University Calculus Grade from Diagnostic Test';201 model grade = precalc calc;202NOTE: PROCEDURE REG used (Total process time):real time 0.05 secondscpu time 0.05 seconds203 proc reg plots = none;204 title3 'Model C: Do the diagnostic test and HS info both contribute?';205 model grade = hsgpa hscalc hsengl precalc calc;206 Diagnostic_Test: test precalc=calc=0;207 HS_Information: test hsgpa=hscalc=hsengl=0;208NOTE: PROCEDURE REG used (Total process time):real time 0.09 secondscpu time 0.08 seconds209 proc iml;NOTE: IML Ready210 title3 'Proportion of remaining variation explained by diagnostic test';211 print "Precalc and calc controlling for hsgpa hscalc hsengl";212 n = 289;212 ! p = 6;212 ! s = 2;212 ! F = 8.28;213 a = s*F/(n-p + s*F);213 ! print a;214NOTE: Exiting IML.NOTE: PROCEDURE IML used (Total process time):real time 0.02 secondscpu time 0.02 seconds215 proc reg plots = none;216 title3 'Model D: See if Course makes a contribution';217 model grade = hsgpa hscalc hsengl precalc calc c1 c3;218 Course: test c1=c3=0;219 Diagnostic_Test: test precalc=calc=0;220NOTE: PROCEDURE REG used (Total process time):real time 0.08 secondscpu time 0.07 seconds221 proc glm;222 title3 'Model D again with proc glm';223 class course;224 model grade = hsgpa hscalc hsengl precalc calc course;225 contrast 'Replicate Test of Course' course 1 -1 0,226 course 0 1 -1;227 contrast 'Diagnostic Test F = 9.06' precalc 1, calc 1;228NOTE: PROCEDURE GLM used (Total process time):real time 0.09 secondscpu time 0.08 seconds229 proc reg plots = none;230 title3 'Model E: Include Language, Sex and Ethnic Background';231 model grade = hsgpa hscalc hsengl precalc calc232 mtongue gender e1-e4 e6;233 TroubleVars: test mtongue=gender=e1=e2=e3=e4=e6=0;234 Nationality: test e1=e2=e3=e4=e6=0;235NOTE: PROCEDURE REG used (Total process time):real time 0.10 secondscpu time 0.09 seconds236 proc reg plots = none;237 title3 'Model F: Discarding Gender and Nationality';238 model grade = hsgpa hscalc hsengl precalc calc mtongue;239 EnglishTongue: test hsengl=mtongue=0;240NOTE: PROCEDURE REG used (Total process time):real time 0.07 secondscpu time 0.07 seconds241 proc iml;NOTE: IML Ready242 title3 'Proportion of remaining variation explained by mother tongue';243 print "Mtongue controlling for hsgpa hscalc hsengl precalc calc";244 n = 287;244 ! p = 7;244 ! s = 1;244 ! t = -2.23 ;244 ! F = t**2;245 a = s*F/(n-p + s*F);245 ! print a;246NOTE: Exiting IML.NOTE: PROCEDURE IML used (Total process time):real time 0.02 secondscpu time 0.01 seconds247 proc reg plots = none;248 title3 'Model G: Drop mtongue and calc';249 title4 'Compare R-Square = 0.4556, Adj R-Sq = 0.4460 From Model 3';250 model grade = hsgpa hscalc hsengl precalc;251NOTE: PROCEDURE REG used (Total process time):real time 0.06 secondscpu time 0.05 seconds252 proc iml;NOTE: IML Ready253 title3 'Proportion of remaining variation explained by Pre-calculus';254 print "precalc controlling for hsgpa hscalc hsengl";255 n = 289;255 ! p = 5;255 ! s = 1;255 ! t = 3.63 ;255 ! F = t**2;256 a = s*F/(n-p + s*F);256 ! print a;257NOTE: Exiting IML.NOTE: PROCEDURE IML used (Total process time):real time 0.02 secondscpu time 0.02 seconds258 proc reg plots(only) = ResidualPlot ;259 title3 'Model H: Combine precalc and calc instead of dropping calc';260 title4 'Compare R-Square = 0.4492 from Model 7';261 model grade = hsgpa hscalc hsengl totscore;262NOTE: PROCEDURE REG used (Total process time):real time 0.50 secondscpu time 0.25 seconds263 proc iml;NOTE: IML Ready264 title3 'Proportion of remaining variation explained by Pre-calculus';265 print "totscore controlling for hsgpa hscalc hsengl";266 n = 289;266 ! p = 5;266 ! s = 1;266 ! t = 3.92 ;266 ! F = t**2;267 a = s*F/(n-p + s*F);267 ! print a;268 print "For prediction, I am happy with Model 8: hsgpa hscalc hsengl totscore";269NOTE: Exiting IML.NOTE: PROCEDURE IML used (Total process time):real time 0.05 secondscpu time 0.05 seconds270 proc reg plots = none;271 title3 'Model I: Same as Model H but including Mother Tongue';272 model grade = hsgpa hscalc hsengl totscore mtongue;273NOTE: PROCEDURE REG used (Total process time):real time 0.10 secondscpu time 0.06 seconds274 proc reg plots = none;275 title3 'Try automatic (stepwise) selection';276 model grade = hsgpa hscalc hsengl precalc calc277 mtongue gender e1-e4 e6278 / selection = stepwise slentry = 0.05 slstay = 0.05 ;279 /* Default slentry = slstay = 0.15 */280281282 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;294