1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
70
71 /* mathlogreg3.sas */
72
73 /* Goal: Develop a prediction model that uses all the data and makes a
74 prediction for every case. */
75
76 %include '/home/brunner0/441s20/readmath2b.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 244.75k
OS Memory 29860.00k
Timestamp 02/10/2020 12:43:38 AM
Step Count 95 Switch Count 0
Page Faults 0
Page Reclaims 25
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 32
183 title2 'Use missing values for prediction';
184
185 /* Make missing data indicators */
NOTE: The infile '/home/brunner0/441s20/exploremath.data.txt' is:
Filename=/home/brunner0/441s20/exploremath.data.txt,
Owner Name=brunner0,Group Name=oda,
Access Permission=-rw-r--r--,
Last Modified=26Jan2020:18:49:34,
File Size (bytes)=44583
NOTE: 579 records were read from the infile '/home/brunner0/441s20/exploremath.data.txt'.
The minimum record length was 75.
The maximum record length was 75.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
99 at 99:24 99 at 138:13
NOTE: The data set WORK.MATHEX has 579 observations and 35 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 901.87k
OS Memory 30376.00k
Timestamp 02/10/2020 12:43:38 AM
Step Count 96 Switch Count 3
Page Faults 0
Page Reclaims 130
Page Swaps 0
Voluntary Context Switches 25
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
186 data mathex2;
187 set mathex;
188 if gender = . then sexmiss = 1; else sexmiss=0; /* Includes mtongue */
189 if course = . then coursemiss = 1; else coursemiss=0;
190 if hsgpa = . then hsgpamiss = 1; else hsgpamiss=0;
191 if hscalc = . then hscalcmiss = 1; else hscalcmiss=0;
192 if hsengl = . then hsenglmiss = 1; else hsenglmiss=0;
193 if totscore = . then testmiss = 1; else testmiss=0;
194 if hsgpa+hscalc+precalc = . then missused = 1 ; else missused = 0;
195 nmiss = sum(sexmiss--testmiss);
196
197
198 format sexmiss -- missused ynfmt.;
199 label sexmiss = 'Gender and mother tongue missing'
200 coursemiss = 'Course missing'
201 hsgpamiss = 'HS GPA missing'
202 hscalcmiss = 'HS Calculus mark missing'
203 hsenglmiss = 'HS English mark missing'
204 testmiss = 'Diagnostic test scores missing'
205 missused = 'Any of hsgpa hscalc precalc missing';
206
207 /* Checks are commented out
208 proc freq;
209 tables gender*sexmiss / norow nocol nopercent missing;
210 tables course*coursemiss / norow nocol nopercent missing;
211 tables hsgpa*hsgpamiss / norow nocol nopercent missing;
212 tables hscalc*hscalcmiss / norow nocol nopercent missing;
213 tables hsengl*hsenglmiss / norow nocol nopercent missing;
214 tables totscore*testmiss / norow nocol nopercent missing;
215 tables (hsgpamiss hscalcmiss testmiss)*missused
216 / norow nocol nopercent missing;
217 */
218
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
142 at 194:14 204 at 194:21
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: The data set WORK.MATHEX2 has 579 observations and 43 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 1343.84k
OS Memory 30636.00k
Timestamp 02/10/2020 12:43:38 AM
Step Count 97 Switch Count 2
Page Faults 0
Page Reclaims 130
Page Swaps 0
Voluntary Context Switches 11
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
219 proc freq;
220 title2 'Check usefulness of missing data indicators one at a time';
221 tables (sexmiss -- missused nmiss) * passed / nocol nopercent chisq;
222 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.27 seconds
user cpu time 0.27 seconds
system cpu time 0.01 seconds
memory 4144.25k
OS Memory 32176.00k
Timestamp 02/10/2020 12:43:38 AM
Step Count 98 Switch Count 5
Page Faults 0
Page Reclaims 546
Page Swaps 0
Voluntary Context Switches 32
Involuntary Context Switches 5
Block Input Operations 0
Block Output Operations 656
223
224 proc freq;
225 title2 'Missingness on variables used, and passing the course';
226 tables missused * passed / nocol nopercent chisq;
227 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.04 seconds
user cpu time 0.04 seconds
system cpu time 0.00 seconds
memory 1328.03k
OS Memory 32688.00k
Timestamp 02/10/2020 12:43:38 AM
Step Count 99 Switch Count 5
Page Faults 0
Page Reclaims 200
Page Swaps 0
Voluntary Context Switches 32
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 528
228
229 proc freq;
230 title2 'Just those with missing on variables used';
231 where missused = 1;
232 tables (sexmiss coursemiss) * passed / nocol nopercent chisq;
233 run;
NOTE: There were 204 observations read from the data set WORK.MATHEX2.
WHERE missused=1;
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.07 seconds
user cpu time 0.08 seconds
system cpu time 0.00 seconds
memory 1495.15k
OS Memory 32688.00k
Timestamp 02/10/2020 12:43:39 AM
Step Count 100 Switch Count 7
Page Faults 0
Page Reclaims 205
Page Swaps 0
Voluntary Context Switches 38
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 576
234
235 /* Decision: If missing on hsgpa, hscalc or precalc, predict they will
236 not pass. If not missing, use the model with hsgpa, hscalc and precalc. */
237
238
239
240
241
242
243
244 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
255