1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
55
56 /* MathReg1.sas */
57 %include '/folders/myfolders/441s16/Lecture/readmath2b.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
159 /* readmath2b has dummy variable definitions
160 e1-e4,e6 for ethnic (Reference category is East Indian)
161 gender=1 for Female
162 mtongue=1 for English
163 c1-c3: c1 = 'Catch-up' c2 = 'Mainstream' c3 = 'Elite' */
164
165 title2 'Variable Selection for Predicting Grade';
166
NOTE: The infile '/folders/myfolders/exploremath.data.txt' is:
Filename=/folders/myfolders/exploremath.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=18Jan2016:17:34:49,
File Size (bytes)=44583
NOTE: 579 records were read from the infile '/folders/myfolders/exploremath.data.txt'.
The minimum record length was 75.
The maximum record length was 75.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
99 at 80:24 99 at 117:13
NOTE: The data set WORK.MATHEX has 579 observations and 34 variables.
NOTE: DATA statement used (Total process time):
real time 0.02 seconds
cpu time 0.02 seconds
167 proc freq;
168 title3 'Check dummy variables';
169 tables sex*gender / norow nocol nopercent missing;
170 tables tongue*mtongue / norow nocol nopercent missing;
171 tables (e1-e4 e6) * ethnic / norow nocol nopercent missing;
172 tables (c1-c3) * course / norow nocol nopercent missing;
173
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.24 seconds
cpu time 0.23 seconds
174 proc reg plots = none; /* Suppress diagnostic plots for now*/
175 title3 'Model A: Predict University Calculus Grade from HS Information';
176 model grade = hsgpa hscalc hsengl;
177
178 /* It is very interesting to know what proportion of the remaining
179 variation is explained by each variable, controlling for the other two.
180 F = t-squared, and
181 a = sF/(n-p + sF)
182 */
183
NOTE: PROCEDURE REG used (Total process time):
real time 0.06 seconds
cpu time 0.06 seconds
184 proc iml;
NOTE: IML Ready
185 title3 'Proportion of remaining variation for HS information';
186 n = 323;
186 ! p = 4;
186 ! s = 1;
187 print "hsgpa controlling for hscalc and hsengl";
188 t = 8.00;
188 ! F = t**2;
188 ! a = s*F/(n-p + s*F);
189 print a;
190
191 print "hscalc controlling for hsgpa and hsengl";
192 t = 3.14;
192 ! F = t**2;
192 ! a = s*F/(n-p + s*F);
193 print a;
194
195 print "hsengl controlling for hsgpa and hscalc";
196 t = -3.26;
196 ! F = t**2;
196 ! a = s*F/(n-p + s*F);
197 print a;
198
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.03 seconds
cpu time 0.04 seconds
199 proc reg plots = none;
200 title3 'Model B: Predict University Calculus Grade from Diagnostic Test';
201 model grade = precalc calc;
202
NOTE: PROCEDURE REG used (Total process time):
real time 0.05 seconds
cpu time 0.05 seconds
203 proc reg plots = none;
204 title3 'Model C: Do the diagnostic test and HS info both contribute?';
205 model grade = hsgpa hscalc hsengl precalc calc;
206 Diagnostic_Test: test precalc=calc=0;
207 HS_Information: test hsgpa=hscalc=hsengl=0;
208
NOTE: PROCEDURE REG used (Total process time):
real time 0.09 seconds
cpu time 0.08 seconds
209 proc iml;
NOTE: IML Ready
210 title3 'Proportion of remaining variation explained by diagnostic test';
211 print "Precalc and calc controlling for hsgpa hscalc hsengl";
212 n = 289;
212 ! p = 6;
212 ! s = 2;
212 ! F = 8.28;
213 a = s*F/(n-p + s*F);
213 ! print a;
214
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.02 seconds
cpu time 0.02 seconds
215 proc reg plots = none;
216 title3 'Model D: See if Course makes a contribution';
217 model grade = hsgpa hscalc hsengl precalc calc c1 c3;
218 Course: test c1=c3=0;
219 Diagnostic_Test: test precalc=calc=0;
220
NOTE: PROCEDURE REG used (Total process time):
real time 0.08 seconds
cpu time 0.07 seconds
221 proc glm;
222 title3 'Model D again with proc glm';
223 class course;
224 model grade = hsgpa hscalc hsengl precalc calc course;
225 contrast 'Replicate Test of Course' course 1 -1 0,
226 course 0 1 -1;
227 contrast 'Diagnostic Test F = 9.06' precalc 1, calc 1;
228
NOTE: PROCEDURE GLM used (Total process time):
real time 0.09 seconds
cpu time 0.08 seconds
229 proc reg plots = none;
230 title3 'Model E: Include Language, Sex and Ethnic Background';
231 model grade = hsgpa hscalc hsengl precalc calc
232 mtongue gender e1-e4 e6;
233 TroubleVars: test mtongue=gender=e1=e2=e3=e4=e6=0;
234 Nationality: test e1=e2=e3=e4=e6=0;
235
NOTE: PROCEDURE REG used (Total process time):
real time 0.10 seconds
cpu time 0.09 seconds
236 proc reg plots = none;
237 title3 'Model F: Discarding Gender and Nationality';
238 model grade = hsgpa hscalc hsengl precalc calc mtongue;
239 EnglishTongue: test hsengl=mtongue=0;
240
NOTE: PROCEDURE REG used (Total process time):
real time 0.07 seconds
cpu time 0.07 seconds
241 proc iml;
NOTE: IML Ready
242 title3 'Proportion of remaining variation explained by mother tongue';
243 print "Mtongue controlling for hsgpa hscalc hsengl precalc calc";
244 n = 287;
244 ! p = 7;
244 ! s = 1;
244 ! t = -2.23 ;
244 ! F = t**2;
245 a = s*F/(n-p + s*F);
245 ! print a;
246
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.02 seconds
cpu time 0.01 seconds
247 proc reg plots = none;
248 title3 'Model G: Drop mtongue and calc';
249 title4 'Compare R-Square = 0.4556, Adj R-Sq = 0.4460 From Model 3';
250 model grade = hsgpa hscalc hsengl precalc;
251
NOTE: PROCEDURE REG used (Total process time):
real time 0.06 seconds
cpu time 0.05 seconds
252 proc iml;
NOTE: IML Ready
253 title3 'Proportion of remaining variation explained by Pre-calculus';
254 print "precalc controlling for hsgpa hscalc hsengl";
255 n = 289;
255 ! p = 5;
255 ! s = 1;
255 ! t = 3.63 ;
255 ! F = t**2;
256 a = s*F/(n-p + s*F);
256 ! print a;
257
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.02 seconds
cpu time 0.02 seconds
258 proc reg plots(only) = ResidualPlot ;
259 title3 'Model H: Combine precalc and calc instead of dropping calc';
260 title4 'Compare R-Square = 0.4492 from Model 7';
261 model grade = hsgpa hscalc hsengl totscore;
262
NOTE: PROCEDURE REG used (Total process time):
real time 0.50 seconds
cpu time 0.25 seconds
263 proc iml;
NOTE: IML Ready
264 title3 'Proportion of remaining variation explained by Pre-calculus';
265 print "totscore controlling for hsgpa hscalc hsengl";
266 n = 289;
266 ! p = 5;
266 ! s = 1;
266 ! t = 3.92 ;
266 ! F = t**2;
267 a = s*F/(n-p + s*F);
267 ! print a;
268 print "For prediction, I am happy with Model 8: hsgpa hscalc hsengl totscore";
269
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.05 seconds
cpu time 0.05 seconds
270 proc reg plots = none;
271 title3 'Model I: Same as Model H but including Mother Tongue';
272 model grade = hsgpa hscalc hsengl totscore mtongue;
273
NOTE: PROCEDURE REG used (Total process time):
real time 0.10 seconds
cpu time 0.06 seconds
274 proc reg plots = none;
275 title3 'Try automatic (stepwise) selection';
276 model grade = hsgpa hscalc hsengl precalc calc
277 mtongue gender e1-e4 e6
278 / selection = stepwise slentry = 0.05 slstay = 0.05 ;
279 /* Default slentry = slstay = 0.15 */
280
281
282 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
294