1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
61
62 /* mathlogreg4.sas */
63
64 %include '/folders/myfolders/441s18/Lecture/mathread1.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
178 title2 'Use missing values for prediction';
179
180 /* Make missing data indicators */
NOTE: The infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt' is:
Filename=/folders/myfolders/441s18/Lecture/exploremath.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=18Jan2016:17:34:49,
File Size (bytes)=44583
NOTE: 579 records were read from the infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt'.
The minimum record length was 75.
The maximum record length was 75.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
99 at 101:24 99 at 102:18
NOTE: The data set WORK.MATHEX has 579 observations and 25 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
181 data mathex2;
182 set mathex;
183 if hsgpa+hscalc+precalc = . then missused = 1 ;
184 else missused = 0;
185 format missused ynfmt.;
186 label missused = 'Any of hsgpa hscalc precalc missing';
187
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
142 at 183:14 204 at 183:21
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: The data set WORK.MATHEX2 has 579 observations and 26 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
188 proc freq data=mathex2;
189 title2 'Percent passed for the record';
190 tables passed;
191 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.04 seconds
cpu time 0.04 seconds
192
193 proc freq data=mathex2;
194 title2 'Missingness on variables used, as a predictor of passing';
195 tables missused * passed / nocol nopercent chisq;
196 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.05 seconds
cpu time 0.05 seconds
197
198 /* If missing on hsgpa, hscalc or precalc, give them an estimated
199 probabilty of passing of 0.348. If not missing, use the model with
200 hsgpa, hscalc and precalc to calculate the estimated probabilities. */
201
202 proc logistic data = mathex2 noprint;
203 title3 'Look at estimated probabilities';
204 model passed (event='Yes') = hsgpa hscalc precalc;
205 output out=mathex3 prob=pihat;
206 /* The data set mathex3 has everything in mathex2, and also pihat */
207 run;
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: The data set WORK.MATHEX3 has 579 observations and 28 variables.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.00 seconds
cpu time 0.01 seconds
208
209 proc print data=mathex3 (obs=13);
210 /* List only the first 13 observations */
211 var hsgpa hscalc precalc pihat;
212 run;
NOTE: There were 13 observations read from the data set WORK.MATHEX3.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.03 seconds
cpu time 0.03 seconds
213
214 /* Based on invariance and the Law of Total Probability (double expectation),
215 I predict that the mean pihat will be around 0.624, the proportion of
216 students with non-missing data who passed. */
217
218 proc univariate normal plot data=mathex3;
219 title2 'Explore the distribution of estimated probabilities';
220 where missused = 0; /* Should have n=375 */
221 var pihat;
222 run;
NOTE: PROCEDURE UNIVARIATE used (Total process time):
real time 0.66 seconds
cpu time 0.22 seconds
223
224 /* Where should we draw the line, and predict that the student will pass?
225 In the exploratory sample, 52.68% passed overall, so find the
226 pihat that cuts of the bottom 52.68%, and use that as a cutoff.*/
227
228 data mathex4;
229 set mathex3;
230 if pihat = . then pihat = 0.348;
231 /* Proportion with missing data who passed */
232
NOTE: There were 579 observations read from the data set WORK.MATHEX3.
NOTE: The data set WORK.MATHEX4 has 579 observations and 28 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.00 seconds
233 proc freq data = mathex4;
234 title2 'Seek the 52.68 percentile';
235 tables pihat;
236 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX4.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.45 seconds
cpu time 0.41 seconds
237
238 /* There was a pihat right at the 52.68th percentile: 0.4063743825.
239 If pihat > 0.406, predict the student will pass. For the record and
240 before peeking, I predict 75% correct. */
241
242 /* Read and transform the replication data -- same code as mathread1.sas
243 except no keep statement. Creates the data set mathrep. */
244 %include '/folders/myfolders/441s18/Lecture/readexplor.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
358 /* The data step continues ... */
359
360 b0 = -14.7970; b1 = 0.1173; b2 = 0.0638; b3 = 0.2989; /* Exploratory sample values */
361 L = b0 + b1*hsgpa + b2*hscalc + b3*precalc;
362 pihat = exp(L)/(1+exp(L)); /* Some will be missing. */
363 if pihat = . then pihat = 0.348; /* Sample proportion passed in the exploratory sample */
364 if pihat > 0.406 then Prediction = 'Pass '; else Prediction = 'Not Pass';
365
366 /* Commented out
367 proc freq data = mathrep;
368 title2 'Checking Prediction';
369 tables pihat * Prediction / norow nocol nopercent missing;
370 run;
371 */
372
NOTE: The infile '/folders/myfolders/441s18/Lecture/replicmath.data.txt' is:
Filename=/folders/myfolders/441s18/Lecture/replicmath.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=04Feb2018:14:39:27,
File Size (bytes)=38214
NOTE: 579 records were read from the infile '/folders/myfolders/441s18/Lecture/replicmath.data.txt'.
The minimum record length was 64.
The maximum record length was 64.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
81 at 281:24 81 at 282:18 97 at 361:12 30 at 361:23 59 at 361:35 186 at 362:9 186 at 362:18
186 at 362:19
NOTE: The data set WORK.MATHREP has 579 observations and 41 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.01 seconds
373 proc freq data = mathrep;
374 title2 'How good is the prediction?';
375 tables Prediction*passed / nocol;
376
NOTE: There were 579 observations read from the data set WORK.MATHREP.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.02 seconds
cpu time 0.02 seconds
377 proc iml;
NOTE: IML Ready
378 PercentCorrect = 35.75 + 34.54;
379 print PercentCorrect;
380 run;
NOTE: Module MAIN is undefined in IML; cannot be RUN.
381
382 /* Prediction may be better at the extremes. */
383
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
384 proc freq data = mathrep;
385 tables pihat*passed / norow nocol nopercent;
386 run;
NOTE: There were 579 observations read from the data set WORK.MATHREP.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.79 seconds
cpu time 0.78 seconds
387
388
389
390
391
392
393 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
406