1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
72
73 /* mathlogreg3.sas */
74
75 /* Goal: Develop a prediction model that uses all the data and makes a
76 prediction for every case. */
77
78 %include '/home/u1407221/441s24/SAS08/ReadLabelMath2.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: Format NCFMT is already on the library WORK.FORMATS.
NOTE: Format NCFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 247.34k
OS Memory 28836.00k
Timestamp 03/10/2024 07:57:59 PM
Step Count 109 Switch Count 0
Page Faults 0
Page Reclaims 65
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 32
NOTE: The infile '/home/u1407221/441s24/data/math.data.txt' is:
Filename=/home/u1407221/441s24/data/math.data.txt,
Owner Name=u1407221,Group Name=oda,
Access Permission=-rw-r--r--,
Last Modified=10Feb2024:17:04:10,
File Size (bytes)=90324
NOTE: 1158 records were read from the infile '/home/u1407221/441s24/data/math.data.txt'.
The minimum record length was 76.
The maximum record length was 76.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
180 at 124:24
NOTE: The data set WORK.MATH has 1158 observations and 37 variables.
NOTE: DATA statement used (Total process time):
real time 0.02 seconds
user cpu time 0.01 seconds
system cpu time 0.01 seconds
memory 1172.81k
OS Memory 29352.00k
Timestamp 03/10/2024 07:57:59 PM
Step Count 110 Switch Count 2
Page Faults 0
Page Reclaims 123
Page Swaps 0
Voluntary Context Switches 21
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 776
NOTE: There were 1158 observations read from the data set WORK.MATH.
NOTE: The data set WORK.REPLIC has 579 observations and 37 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 1413.65k
OS Memory 29740.00k
Timestamp 03/10/2024 07:57:59 PM
Step Count 111 Switch Count 2
Page Faults 0
Page Reclaims 157
Page Swaps 0
Voluntary Context Switches 12
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
NOTE: There were 1158 observations read from the data set WORK.MATH.
NOTE: The data set WORK.EXPLORE has 579 observations and 28 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 1409.09k
OS Memory 29740.00k
Timestamp 03/10/2024 07:57:59 PM
Step Count 112 Switch Count 2
Page Faults 0
Page Reclaims 131
Page Swaps 0
Voluntary Context Switches 14
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
239 title2 'Try using missing values for prediction';
240
241 /* Make missing data indicators */
242 data mathex2;
243 set explore;
244 if gender = . then sexmiss = 1; else sexmiss=0; /* Includes mtongue */
245 if course2 = . then coursemiss = 1; else coursemiss=0;
246 if hsgpa = . then hsgpamiss = 1; else hsgpamiss=0;
247 if hscalc = . then hscalcmiss = 1; else hscalcmiss=0;
248 if hsengl = . then hsenglmiss = 1; else hsenglmiss=0;
249 if totscore = . then testmiss = 1; else testmiss=0;
250 nmiss = sum(sexmiss--testmiss);
251 if hsgpa+hscalc+precalc = . then missused = 1 ; else missused = 0;
252
253 format sexmiss -- testmiss missused ynfmt.;
254 label sexmiss = 'Gender and mother tongue missing'
255 coursemiss = 'Course missing'
256 hsgpamiss = 'HS GPA missing'
257 hscalcmiss = 'HS Calculus mark missing'
258 hsenglmiss = 'HS English mark missing'
259 testmiss = 'Diagnostic test scores missing'
260 missused = 'Any of hsgpa hscalc precalc missing';
261
262 /* Checks are commented out
263 proc freq;
264 tables gender*sexmiss / norow nocol nopercent missing;
265 tables course*coursemiss / norow nocol nopercent missing;
266 tables hsgpa*hsgpamiss / norow nocol nopercent missing;
267 tables hscalc*hscalcmiss / norow nocol nopercent missing;
268 tables hsengl*hsenglmiss / norow nocol nopercent missing;
269 tables totscore*testmiss / norow nocol nopercent missing;
270 tables (hsgpamiss hscalcmiss testmiss)*missused
271 / norow nocol nopercent missing;
272 */
273
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
142 at 251:14 204 at 251:21
NOTE: There were 579 observations read from the data set WORK.EXPLORE.
NOTE: The data set WORK.MATHEX2 has 579 observations and 36 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 1316.68k
OS Memory 29612.00k
Timestamp 03/10/2024 07:57:59 PM
Step Count 113 Switch Count 2
Page Faults 0
Page Reclaims 128
Page Swaps 0
Voluntary Context Switches 11
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 528
274 proc freq data=mathex2;
275 title2 'Check usefulness of missing data indicators one at a time';
276 tables (sexmiss -- testmiss nmiss) * passed / nocol nopercent chisq;
277
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.22 seconds
user cpu time 0.22 seconds
system cpu time 0.00 seconds
memory 4457.56k
OS Memory 30896.00k
Timestamp 03/10/2024 07:57:59 PM
Step Count 114 Switch Count 5
Page Faults 0
Page Reclaims 398
Page Swaps 0
Voluntary Context Switches 31
Involuntary Context Switches 14
Block Input Operations 0
Block Output Operations 632
278 proc freq data=mathex2;
279 title2 'Missingness on variables used, and passing the course';
280 tables missused * passed / nocol nopercent chisq;
281 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.04 seconds
user cpu time 0.04 seconds
system cpu time 0.00 seconds
memory 1416.78k
OS Memory 30896.00k
Timestamp 03/10/2024 07:58:00 PM
Step Count 115 Switch Count 5
Page Faults 0
Page Reclaims 257
Page Swaps 0
Voluntary Context Switches 34
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 544
282
283 /* Strategy: If missing on hsgpa, hscalc or precalc, predict they will
284 not pass. If not missing, use the model with hsgpa, hscalc and precalc.
285 Question: Will missingness on Gender/Mother tongue, Course or HS English
286 add to the ability of (hsgpa, hscalc or precalc) to predict?
287
288 However, the following table shows that every student who was missing
289 course was also missing on at least one of the good predictors, so
290 coursemiss is out. */
291
292 proc freq data=mathex2;
293 tables coursemiss*missused / norow nocol nopercent missing;
294
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.02 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 1303.81k
OS Memory 31152.00k
Timestamp 03/10/2024 07:58:00 PM
Step Count 116 Switch Count 4
Page Faults 0
Page Reclaims 191
Page Swaps 0
Voluntary Context Switches 29
Involuntary Context Switches 1
Block Input Operations 0
Block Output Operations 536
295 proc logistic data = mathex2;
296 title3 'HS GPA, HS Calculus and Pre-calculus test';
297 model passed (event='Yes') = hsgpa hscalc precalc
298 sexmiss hsenglmiss;
299 MissingVars: test sexmiss=hsenglmiss=0;
300
301 /* If missing on hsgpa, hscalc or precalc, give them an estimated
302 probabilty of passing = 0.348. If not missing, use the model with
303 hsgpa, hscalc and precalc to calculate the estimated probabilities. */
304
305 quit;
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.08 seconds
user cpu time 0.08 seconds
system cpu time 0.00 seconds
memory 2492.21k
OS Memory 31928.00k
Timestamp 03/10/2024 07:58:00 PM
Step Count 117 Switch Count 1
Page Faults 0
Page Reclaims 222
Page Swaps 0
Voluntary Context Switches 10
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 56
306
307
308
309
310
311 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
323