1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
72
73 /* mathreg1.sas */
74 title2 'Regression on the math data: Part One';
75 %include '/home/u1407221/441s24/SAS06/ReadLabelMath.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: Format NCFMT is already on the library WORK.FORMATS.
NOTE: Format NCFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 257.46k
OS Memory 28836.00k
Timestamp 02/14/2024 07:29:58 PM
Step Count 108 Switch Count 0
Page Faults 0
Page Reclaims 20
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 32
NOTE: The infile '/home/u1407221/441s24/data/math.data.txt' is:
Filename=/home/u1407221/441s24/data/math.data.txt,
Owner Name=u1407221,Group Name=oda,
Access Permission=-rw-r--r--,
Last Modified=10Feb2024:16:04:10,
File Size (bytes)=90324
NOTE: 1158 records were read from the infile '/home/u1407221/441s24/data/math.data.txt'.
The minimum record length was 76.
The maximum record length was 76.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
180 at 121:24
NOTE: The data set WORK.MATH has 1158 observations and 26 variables.
NOTE: DATA statement used (Total process time):
real time 0.02 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 989.34k
OS Memory 29608.00k
Timestamp 02/14/2024 07:29:58 PM
Step Count 109 Switch Count 2
Page Faults 0
Page Reclaims 136
Page Swaps 0
Voluntary Context Switches 18
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
NOTE: There were 1158 observations read from the data set WORK.MATH.
NOTE: The data set WORK.REPLIC has 579 observations and 26 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 1127.96k
OS Memory 29868.00k
Timestamp 02/14/2024 07:29:58 PM
Step Count 110 Switch Count 2
Page Faults 0
Page Reclaims 124
Page Swaps 0
Voluntary Context Switches 13
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
NOTE: There were 1158 observations read from the data set WORK.MATH.
NOTE: The data set WORK.EXPLORE has 579 observations and 17 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.01 seconds
memory 1138.81k
OS Memory 29868.00k
Timestamp 02/14/2024 07:29:58 PM
Step Count 111 Switch Count 2
Page Faults 0
Page Reclaims 109
Page Swaps 0
Voluntary Context Switches 11
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
198
199 /* id sex tongue rater1 rater2 ethnic
200 hsgpa hscalc hsengl havecalc
201 class precalc calc totscore grade passed outcome */
202
203 data dummy; /* Define dummy variables here, for now.
204 Later move them to ReadLabelMath.sas. */
205 set explore;
206 /* Dummy variables for ethnic background */
207 if ethnic=. then e1=.;
208 else if ethnic=1 then e1=1;
209 else e1=0;
210 if ethnic=. then e2=.;
211 else if ethnic=2 then e2=1;
212 else e2=0;
213 if ethnic=. then e3=.;
214 else if ethnic=3 then e3=1;
215 else e3=0;
216 if ethnic=. then e4=.;
217 else if ethnic=4 then e4=1;
218 else e4=0;
219 if ethnic=. then e6=.;
220 else if ethnic=6 then e6=1;
221 else e6=0;
222
223 label e1 = 'Asian vs East Ind.'
224 e2 = 'East Eur. vs East Ind.'
225 e3 = 'Other Eur. vs East Ind.'
226 e4 = 'Mid. East & Pak. vs East Ind.'
227 e6 = 'Other/DK vs East Ind.';
228
229 if sex = 'Female' then gender=1; else if sex = 'Male' then gender=0;
230 if tongue = 'English' then mtongue=1; else if tongue='Other' then mtongue=0;
231 label mtongue = 'English vs. Other';
232 /* Only use 2 of these if the model has an intercept! */
233 if class=. then c1=.; else if class=1 then c1=1; else c1=0;
234 if class=. then c2=.; else if class=2 then c2=1; else c2=0;
235 if class=. then c3=.; else if class=3 then c3=1; else c3=0;
236 label c1 = 'Catch-up' c2 = 'Mainstream' c3 = 'Elite';
237
238 /* Commented out
239 proc freq;
240 title3 'Check dummy variables';
241 tables sex*gender / norow nocol nopercent missing;
242 tables tongue*mtongue / norow nocol nopercent missing;
243 tables (e1-e4 e6) * ethnic / norow nocol nopercent missing;
244 tables (c1-c3) * class / norow nocol nopercent missing;
245 */
246
NOTE: There were 579 observations read from the data set WORK.EXPLORE.
NOTE: The data set WORK.DUMMY has 579 observations and 27 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 1133.09k
OS Memory 29868.00k
Timestamp 02/14/2024 07:29:58 PM
Step Count 112 Switch Count 2
Page Faults 0
Page Reclaims 97
Page Swaps 0
Voluntary Context Switches 10
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
247 proc reg plots=none corr data=dummy; /* Suppress diagnostic plots for now*/
248 title3 'Model A: Predict University Calculus Grade from HS Information';
249 model grade = hsgpa hscalc hsengl;
250
251 /* It is very interesting to know what proportion of the remaining
252 variation is explained by each variable, controlling for the other two.
253 F = t-squared, and
254 a = sF/(n-p + sF)
255 */
256
NOTE: PROCEDURE REG used (Total process time):
real time 0.06 seconds
user cpu time 0.06 seconds
system cpu time 0.00 seconds
memory 5057.93k
OS Memory 31680.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 113 Switch Count 2
Page Faults 0
Page Reclaims 415
Page Swaps 0
Voluntary Context Switches 14
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 80
257 proc iml;
NOTE: IML Ready
258 title3 'Proportion of remaining variation for HS information';
259 n = 323;
259 ! p = 4;
259 ! s = 1;
260 print "hsgpa controlling for hscalc and hsengl";
261 t = 8.00;
261 ! F = t**2;
261 ! a = s*F/(n-p + s*F);
262 print a;
263
264 print "hscalc controlling for hsgpa and hsengl";
265 t = 3.14;
265 ! F = t**2;
265 ! a = s*F/(n-p + s*F);
266 print a;
267
268 print "hsengl controlling for hsgpa and hscalc";
269 t = -3.26;
269 ! F = t**2;
269 ! a = s*F/(n-p + s*F);
270 print a;
271
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.01 seconds
user cpu time 0.02 seconds
system cpu time 0.00 seconds
memory 543.46k
OS Memory 30116.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 114 Switch Count 1
Page Faults 0
Page Reclaims 58
Page Swaps 0
Voluntary Context Switches 11
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 8
272 proc reg plots = none;
273 title3 'Model B: Predict University Calculus Grade from Diagnostic Test';
274 model grade = precalc calc;
275
NOTE: PROCEDURE REG used (Total process time):
real time 0.03 seconds
user cpu time 0.04 seconds
system cpu time 0.00 seconds
memory 2551.12k
OS Memory 31936.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 115 Switch Count 2
Page Faults 0
Page Reclaims 273
Page Swaps 0
Voluntary Context Switches 23
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 48
276 proc reg plots = none;
277 title3 'Making a mistake on purpose';
278 model grade = totscore precalc calc;
279
NOTE: PROCEDURE REG used (Total process time):
real time 0.03 seconds
user cpu time 0.04 seconds
system cpu time 0.00 seconds
memory 2605.12k
OS Memory 32192.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 116 Switch Count 2
Page Faults 0
Page Reclaims 321
Page Swaps 0
Voluntary Context Switches 19
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 72
280 proc reg plots = none;
281 title3 'Model C: Do the diagnostic test and HS info both contribute?';
282 model grade = hsgpa hscalc hsengl precalc calc;
283 Diagnostic_Test: test precalc=calc=0;
284 HS_Information: test hsgpa=hscalc=hsengl=0;
285
NOTE: PROCEDURE REG used (Total process time):
real time 0.05 seconds
user cpu time 0.06 seconds
system cpu time 0.00 seconds
memory 2609.37k
OS Memory 32192.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 117 Switch Count 2
Page Faults 0
Page Reclaims 261
Page Swaps 0
Voluntary Context Switches 17
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 88
286 proc iml;
NOTE: IML Ready
287 title3 'Proportion of remaining variation explained by diagnostic test';
288 print "Precalc and calc controlling for hsgpa hscalc hsengl";
289 n = 289;
289 ! p = 6;
289 ! s = 2;
289 ! F = 8.28;
290 a = s*F/(n-p + s*F);
290 ! print a;
291
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.01 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 543.56k
OS Memory 30372.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 118 Switch Count 1
Page Faults 0
Page Reclaims 57
Page Swaps 0
Voluntary Context Switches 10
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 16
292 proc iml;
NOTE: IML Ready
293 title3 'Proportion of remaining variation explained by HS info';
294 print "Hsgpa hscalc hsengl controlling for precalc and calc";
295 n = 289;
295 ! p = 6;
295 ! s = 3;
295 ! F = 46.97;
296 a = s*F/(n-p + s*F);
296 ! print a;
297
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.00 seconds
user cpu time 0.01 seconds
system cpu time 0.01 seconds
memory 499.03k
OS Memory 30372.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 119 Switch Count 1
Page Faults 0
Page Reclaims 55
Page Swaps 0
Voluntary Context Switches 11
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 8
298 proc reg plots = none;
299 title3 'Model D: See if class makes a contribution';
300 model grade = hsgpa hscalc hsengl precalc calc c1 c3;
301 class: test c1=c3=0;
302 Diagnostic_Test: test precalc=calc=0;
303
NOTE: PROCEDURE REG used (Total process time):
real time 0.05 seconds
user cpu time 0.06 seconds
system cpu time 0.00 seconds
memory 2616.81k
OS Memory 32192.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 120 Switch Count 2
Page Faults 0
Page Reclaims 260
Page Swaps 0
Voluntary Context Switches 22
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 72
304 proc glm;
305 title3 'Model D again with proc glm';
306 class class;
307 model grade = hsgpa hscalc hsengl precalc calc class;
308 contrast 'Replicate Test of Class' class 1 -1 0,
309 class 0 1 -1;
310 contrast 'Diagnostic Test F = 9.06' precalc 1, calc 1;
311
NOTE: PROCEDURE GLM used (Total process time):
real time 0.05 seconds
user cpu time 0.05 seconds
system cpu time 0.00 seconds
memory 1981.43k
OS Memory 31672.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 121 Switch Count 3
Page Faults 0
Page Reclaims 224
Page Swaps 0
Voluntary Context Switches 24
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 312
312 proc reg plots = none;
313 title3 'Model E: Include Language, Sex and Ethnic Background';
314 model grade = hsgpa hscalc hsengl precalc calc
315 mtongue gender e1-e4 e6;
316 TroubleVars: test mtongue=gender=e1=e2=e3=e4=e6=0;
317 Nationality: test e1=e2=e3=e4=e6=0;
318
NOTE: PROCEDURE REG used (Total process time):
real time 0.06 seconds
user cpu time 0.06 seconds
system cpu time 0.01 seconds
memory 2700.75k
OS Memory 32448.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 122 Switch Count 2
Page Faults 0
Page Reclaims 319
Page Swaps 0
Voluntary Context Switches 25
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 80
319 proc reg plots = none;
320 title3 'Model F: Discarding Gender and Nationality';
321 model grade = hsgpa hscalc hsengl precalc calc mtongue;
322 EnglishTongue: test hsengl=mtongue=0;
323
NOTE: PROCEDURE REG used (Total process time):
real time 0.04 seconds
user cpu time 0.05 seconds
system cpu time 0.00 seconds
memory 2550.09k
OS Memory 32704.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 123 Switch Count 2
Page Faults 0
Page Reclaims 289
Page Swaps 0
Voluntary Context Switches 17
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 136
324 proc iml;
NOTE: IML Ready
325 title3 'Proportion of remaining variation explained by mother tongue';
326 print "Mtongue controlling for hsgpa hscalc hsengl precalc calc";
327 n = 287;
327 ! p = 7;
327 ! s = 1;
327 ! t = -2.23 ;
327 ! F = t**2;
328 a = s*F/(n-p + s*F);
328 ! print a;
329
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.01 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 528.12k
OS Memory 30884.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 124 Switch Count 1
Page Faults 0
Page Reclaims 57
Page Swaps 0
Voluntary Context Switches 11
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 24
330 proc reg plots = none;
331 title3 'Model G: Drop mtongue and calc';
332 title4 'Compare R-Square = 0.4556, Adj R-Sq = 0.4460 From Model 3';
333 model grade = hsgpa hscalc hsengl precalc;
334
NOTE: PROCEDURE REG used (Total process time):
real time 0.03 seconds
user cpu time 0.03 seconds
system cpu time 0.00 seconds
memory 2537.59k
OS Memory 32704.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 125 Switch Count 3
Page Faults 0
Page Reclaims 260
Page Swaps 0
Voluntary Context Switches 29
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 48
335 proc iml;
NOTE: IML Ready
336 title3 'Proportion of remaining variation explained by Pre-calculus';
337 print "precalc controlling for hsgpa hscalc hsengl";
338 n = 289;
338 ! p = 5;
338 ! s = 1;
338 ! t = 3.63 ;
338 ! F = t**2;
339 a = s*F/(n-p + s*F);
339 ! print a;
340
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.01 seconds
user cpu time 0.02 seconds
system cpu time 0.00 seconds
memory 500.40k
OS Memory 30884.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 126 Switch Count 1
Page Faults 0
Page Reclaims 57
Page Swaps 0
Voluntary Context Switches 10
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 8
341 proc reg plots = none;
342 title3 'Model H: Combine precalc and calc instead of dropping calc';
343 title4 'Compare R-Square = 0.4492 from Model G';
344 model grade = hsgpa hscalc hsengl totscore;
345
NOTE: PROCEDURE REG used (Total process time):
real time 0.03 seconds
user cpu time 0.03 seconds
system cpu time 0.00 seconds
memory 2536.25k
OS Memory 32704.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 127 Switch Count 3
Page Faults 0
Page Reclaims 260
Page Swaps 0
Voluntary Context Switches 28
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 64
346 proc iml;
NOTE: IML Ready
347 title3 'Proportion of remaining variation explained by Pre-calculus';
348 print "totscore controlling for hsgpa hscalc hsengl";
349 n = 289;
349 ! p = 5;
349 ! s = 1;
349 ! t = 3.92 ;
349 ! F = t**2;
350 a = s*F/(n-p + s*F);
350 ! print a;
351 print "For prediction, I am happy with Model H: hsgpa hscalc hsengl totscore";
352
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.01 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 500.62k
OS Memory 30884.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 128 Switch Count 1
Page Faults 0
Page Reclaims 57
Page Swaps 0
Voluntary Context Switches 11
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 16
353 proc reg plots = none;
354 title3 'Model I: Same as Model H but including Mother Tongue';
355 model grade = hsgpa hscalc hsengl totscore mtongue;
356 /* A slightly better model except for the lawsuit. */
357
358 /* For stepwise selection, note Mallow's Cp is Cp = SSE_p/MSE_F - n + 2(p+1)
359 Small is good. */
360
NOTE: PROCEDURE REG used (Total process time):
real time 0.03 seconds
user cpu time 0.03 seconds
system cpu time 0.00 seconds
memory 2541.90k
OS Memory 32704.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 129 Switch Count 2
Page Faults 0
Page Reclaims 259
Page Swaps 0
Voluntary Context Switches 18
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 64
361 proc reg plots = none;
362 title3 'Try automatic (stepwise) selection';
363 model grade = hsgpa hscalc hsengl precalc calc totscore
364 mtongue gender e1-e4 e6
365 / selection = stepwise slentry = 0.05 slstay = 0.05 ;
366 /* Default slentry = slstay = 0.15 */
367 quit;
NOTE: PROCEDURE REG used (Total process time):
real time 0.12 seconds
user cpu time 0.12 seconds
system cpu time 0.00 seconds
memory 2575.09k
OS Memory 32960.00k
Timestamp 02/14/2024 07:29:59 PM
Step Count 130 Switch Count 2
Page Faults 0
Page Reclaims 315
Page Swaps 0
Voluntary Context Switches 21
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 120
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
399