1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
70
71 /* mathlogreg4.sas */
72
73 %include '/home/brunner0/441s20/readmath2b.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 244.81k
OS Memory 32420.00k
Timestamp 02/10/2020 12:55:47 AM
Step Count 146 Switch Count 0
Page Faults 0
Page Reclaims 27
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 32
180 title2 'Use missing values for prediction';
181
182 /* Make missing data indicators */
NOTE: The infile '/home/brunner0/441s20/exploremath.data.txt' is:
Filename=/home/brunner0/441s20/exploremath.data.txt,
Owner Name=brunner0,Group Name=oda,
Access Permission=-rw-r--r--,
Last Modified=26Jan2020:18:49:34,
File Size (bytes)=44583
NOTE: 579 records were read from the infile '/home/brunner0/441s20/exploremath.data.txt'.
The minimum record length was 75.
The maximum record length was 75.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
99 at 96:24 99 at 135:13
NOTE: The data set WORK.MATHEX has 579 observations and 35 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 963.50k
OS Memory 32936.00k
Timestamp 02/10/2020 12:55:47 AM
Step Count 147 Switch Count 3
Page Faults 0
Page Reclaims 124
Page Swaps 0
Voluntary Context Switches 27
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
183 data mathex2;
184 set mathex;
185 if hsgpa+hscalc+precalc = . then missused = 1 ;
186 else missused = 0;
187 format missused ynfmt.;
188 label missused = 'Any of hsgpa hscalc precalc missing';
189
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
142 at 185:14 204 at 185:21
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: The data set WORK.MATHEX2 has 579 observations and 36 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.01 seconds
memory 1306.68k
OS Memory 33196.00k
Timestamp 02/10/2020 12:55:47 AM
Step Count 148 Switch Count 2
Page Faults 0
Page Reclaims 114
Page Swaps 0
Voluntary Context Switches 14
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
190 proc freq data=mathex2;
191 title2 'Percent passed for the record';
192 tables passed;
193 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.02 seconds
user cpu time 0.02 seconds
system cpu time 0.00 seconds
memory 2874.53k
OS Memory 33452.00k
Timestamp 02/10/2020 12:55:47 AM
Step Count 149 Switch Count 3
Page Faults 0
Page Reclaims 237
Page Swaps 0
Voluntary Context Switches 16
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 272
194
195 proc freq data=mathex2;
196 title2 'Missingness on variables used, as a predictor of passing';
197 tables missused * passed / nocol nopercent chisq;
198 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.04 seconds
user cpu time 0.04 seconds
system cpu time 0.00 seconds
memory 1820.15k
OS Memory 33968.00k
Timestamp 02/10/2020 12:55:47 AM
Step Count 150 Switch Count 5
Page Faults 0
Page Reclaims 247
Page Swaps 0
Voluntary Context Switches 40
Involuntary Context Switches 1
Block Input Operations 0
Block Output Operations 528
199
200 /* If missing on hsgpa, hscalc or precalc, give them an estimated
201 probabilty of passing of 0.348. If not missing, use the model with
202 hsgpa, hscalc and precalc to calculate the estimated probabilities. */
203
204 proc logistic data = mathex2 noprint;
205 title3 'Look at estimated probabilities';
206 model passed (event='Yes') = hsgpa hscalc precalc;
207 output out=mathex3 prob=pihat;
208 /* The data set mathex3 has everything in mathex2, and also pihat */
209 run;
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX2.
NOTE: The data set WORK.MATHEX3 has 579 observations and 38 variables.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.01 seconds
memory 2342.03k
OS Memory 35004.00k
Timestamp 02/10/2020 12:55:47 AM
Step Count 151 Switch Count 3
Page Faults 0
Page Reclaims 229
Page Swaps 0
Voluntary Context Switches 22
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 552
210
211 proc print data=mathex3 (obs=13);
212 /* List only the first 13 observations */
213 var hsgpa hscalc precalc pihat;
214 run;
NOTE: There were 13 observations read from the data set WORK.MATHEX3.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.02 seconds
user cpu time 0.03 seconds
system cpu time 0.00 seconds
memory 934.43k
OS Memory 33704.00k
Timestamp 02/10/2020 12:55:47 AM
Step Count 152 Switch Count 0
Page Faults 0
Page Reclaims 62
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 24
215
216 /* Based on invariance and the Law of Total Probability (double expectation),
217 I predict that the mean pihat will be around 0.624, the proportion of
218 students with non-missing data who passed. */
219
220 proc univariate normal plot data=mathex3;
221 title2 'Explore the distribution of estimated probabilities';
222 where missused = 0; /* Should have n=375 */
223 var pihat;
224 run;
NOTE: PROCEDURE UNIVARIATE used (Total process time):
real time 0.38 seconds
user cpu time 0.17 seconds
system cpu time 0.01 seconds
memory 15070.68k
OS Memory 46508.00k
Timestamp 02/10/2020 12:55:47 AM
Step Count 153 Switch Count 4
Page Faults 0
Page Reclaims 3677
Page Swaps 0
Voluntary Context Switches 306
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 1008
225
226 /* Where should we draw the line, and predict that the student will pass?
227 In the exploratory sample, 52.68% passed overall, so find the
228 pihat that cuts of the bottom 52.68%, and use that as a cutoff.*/
229
230 data mathex4;
231 set mathex3;
232 if pihat = . then pihat = 0.348;
233 /* Proportion with missing data who passed */
234
NOTE: There were 579 observations read from the data set WORK.MATHEX3.
NOTE: The data set WORK.MATHEX4 has 579 observations and 38 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 1289.59k
OS Memory 46508.00k
Timestamp 02/10/2020 12:55:47 AM
Step Count 154 Switch Count 2
Page Faults 0
Page Reclaims 101
Page Swaps 0
Voluntary Context Switches 13
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
235 proc freq data = mathex4;
236 title2 'Seek the 52.68 percentile';
237 tables pihat;
238 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX4.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.37 seconds
user cpu time 0.37 seconds
system cpu time 0.00 seconds
memory 1617.93k
OS Memory 47276.00k
Timestamp 02/10/2020 12:55:48 AM
Step Count 155 Switch Count 3
Page Faults 0
Page Reclaims 267
Page Swaps 0
Voluntary Context Switches 22
Involuntary Context Switches 1
Block Input Operations 0
Block Output Operations 528
239
240 /* There was a pihat right at the 52.68th percentile: 0.4063743825.
241 If pihat > 0.406, predict the student will pass. For the record and
242 before peeking, I predict 75% correct. */
243
244 /* Read and transform the replication data -- same code as mathread1.sas
245 except no keep statement. Creates the data set mathrep. */
246 %include '/home/brunner0/441s20/readreplic.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 351.71k
OS Memory 46756.00k
Timestamp 02/10/2020 12:55:48 AM
Step Count 156 Switch Count 0
Page Faults 0
Page Reclaims 14
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 0
359 /* The data step continues ... */
360
361 b0 = -14.7970; b1 = 0.1173; b2 = 0.0638; b3 = 0.2989; /* Exploratory sample values */
362 L = b0 + b1*hsgpa + b2*hscalc + b3*precalc;
363 pihat = exp(L)/(1+exp(L)); /* Some will be missing. */
364 if pihat = . then pihat = 0.348; /* Sample proportion passed in the exploratory sample */
365 if pihat > 0.406 then Prediction = 'Pass '; else Prediction = 'Not Pass';
366
367 /* Commented out
368 proc freq data = replic;
369 title2 'Checking Prediction';
370 tables pihat * Prediction / norow nocol nopercent missing;
371 run;
372 */
373
NOTE: The infile '/home/brunner0/441s20/replicmath.data.txt' is:
Filename=/home/brunner0/441s20/replicmath.data.txt,
Owner Name=brunner0,Group Name=oda,
Access Permission=-rw-r--r--,
Last Modified=26Jan2020:18:49:13,
File Size (bytes)=38214
NOTE: 579 records were read from the infile '/home/brunner0/441s20/replicmath.data.txt'.
The minimum record length was 64.
The maximum record length was 64.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
81 at 274:24 81 at 310:18 97 at 362:12 30 at 362:23 59 at 362:35 186 at 363:9 186 at 363:18
186 at 363:19
NOTE: The data set WORK.REPLIC has 579 observations and 42 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.01 seconds
memory 909.65k
OS Memory 47016.00k
Timestamp 02/10/2020 12:55:48 AM
Step Count 157 Switch Count 2
Page Faults 0
Page Reclaims 69
Page Swaps 0
Voluntary Context Switches 19
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 520
374 proc freq data = replic;
375 title2 'How good is the prediction?';
376 tables Prediction*passed / nocol;
377
NOTE: There were 579 observations read from the data set WORK.REPLIC.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.02 seconds
user cpu time 0.03 seconds
system cpu time 0.00 seconds
memory 1170.93k
OS Memory 47536.00k
Timestamp 02/10/2020 12:55:48 AM
Step Count 158 Switch Count 5
Page Faults 0
Page Reclaims 191
Page Swaps 0
Voluntary Context Switches 38
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 536
378 proc iml;
NOTE: IML Ready
379 PercentCorrect = 35.75 + 34.54;
380 print PercentCorrect;
381 run;
NOTE: Module MAIN is undefined in IML; cannot be RUN.
382
383 /* Prediction may be better at the extremes. */
384
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.00 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 540.28k
OS Memory 46756.00k
Timestamp 02/10/2020 12:55:48 AM
Step Count 159 Switch Count 0
Page Faults 0
Page Reclaims 136
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 8
385 proc freq data = replic;
386 tables pihat*passed / norow nocol nopercent;
387 run;
NOTE: There were 579 observations read from the data set WORK.REPLIC.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.89 seconds
user cpu time 0.90 seconds
system cpu time 0.00 seconds
memory 1312.62k
OS Memory 47536.00k
Timestamp 02/10/2020 12:55:49 AM
Step Count 160 Switch Count 4
Page Faults 0
Page Reclaims 192
Page Swaps 0
Voluntary Context Switches 29
Involuntary Context Switches 2
Block Input Operations 0
Block Output Operations 864
388
389
390
391
392
393
394 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
405