1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
61
62 /* mathlogreg3.sas */
63
64 /* Goal: Develop a prediction model that uses all the data and makes a
65 prediction for every case. */
66
67 %include '/folders/myfolders/441s18/Lecture/mathread1.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
181 title2 'Use missing values for prediction';
182
183 /* Make missing data indicators */
NOTE: The infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt' is:
Filename=/folders/myfolders/441s18/Lecture/exploremath.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=18Jan2016:17:34:49,
File Size (bytes)=44583
NOTE: 579 records were read from the infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt'.
The minimum record length was 75.
The maximum record length was 75.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
99 at 104:24 99 at 105:18
NOTE: The data set WORK.MATHEX has 579 observations and 25 variables.
NOTE: DATA statement used (Total process time):
real time 0.02 seconds
cpu time 0.00 seconds
184 data mathex2;
185 set mathex;
186 if gender = . then sexmiss = 1; else sexmiss=0; /* Includes mtongue */
187 if course = . then coursemiss = 1; else coursemiss=0;
188 if hsgpa = . then hsgpamiss = 1; else hsgpamiss=0;
189 if hscalc = . then hscalcmiss = 1; else hscalcmiss=0;
190 if hsengl = . then hsenglmiss = 1; else hsenglmiss=0;
191 if totscore = . then testmiss = 1; else testmiss=0;
192 if hsgpa+hscalc+precalc = . then missused = 1 ; else missused = 0;
193 nmiss = sum(sexmiss--testmiss);
194
195
196 format sexmiss -- missused ynfmt.;
197 label sexmiss = 'Gender and mother tongue missing'
198 coursemiss = 'Course missing'
199 hsgpamiss = 'HS GPA missing'
200 hscalcmiss = 'HS Calculus mark missing'
201 hsenglmiss = 'HS English mark missing'
202 testmiss = 'Diagnostic test scores missing'
203 missused = 'Any of hsgpa hscalc precalc missing';
204
205 /* Checks are commented out
206 proc freq;
207 tables gender*sexmiss / norow nocol nopercent missing;
208 tables course*coursemiss / norow nocol nopercent missing;
209 tables hsgpa*hsgpamiss / norow nocol nopercent missing;
210 tables hscalc*hscalcmiss / norow nocol nopercent missing;
211 tables hsengl*hsenglmiss / norow nocol nopercent missing;
212 tables totscore*testmiss / norow nocol nopercent missing;
213 tables (hsgpamiss hscalcmiss testmiss)*missused
214 / norow nocol nopercent missing;
215 */
216
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
142 at 192:14 204 at 192:21
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: The data set WORK.MATHEX2 has 579 observations and 33 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.01 seconds
217 proc freq;
218 title2 'Check usefulness of missing data indicators one at a time';
219 tables (sexmiss -- missused nmiss) * passed / nocol nopercent chisq;
220 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.30 seconds
cpu time 0.30 seconds
221
222 proc freq;
223 title2 'Missingness on variables used, and passing the course';
224 tables missused * passed / norow nocol chisq;
225 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.06 seconds
cpu time 0.06 seconds
226
227 proc freq;
228 title2 'Just those with missing on variables used';
229 where missused = 1;
230 tables (sexmiss coursemiss) * passed / norow nocol chisq;
231 run;
NOTE: There were 204 observations read from the data set WORK.MATHEX2.
WHERE missused=1;
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.09 seconds
cpu time 0.09 seconds
232
233 /* Decision: If missing on hsgpa, hscalc or precalc, predict they will
234 not pass. If not missing, use the model with hsgpa, hscalc and precalc. */
235
236
237
238
239
240
241
242 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
255