/* mathlogreg4.sas */ %include '/folders/myfolders/441s18/Lecture/mathread1.sas'; title2 'Use missing values for prediction'; /* Make missing data indicators */ data mathex2; set mathex; if hsgpa+hscalc+precalc = . then missused = 1 ; else missused = 0; format missused ynfmt.; label missused = 'Any of hsgpa hscalc precalc missing'; proc freq data=mathex2; title2 'Percent passed for the record'; tables passed; run; proc freq data=mathex2; title2 'Missingness on variables used, as a predictor of passing'; tables missused * passed / nocol nopercent chisq; run; /* If missing on hsgpa, hscalc or precalc, give them an estimated probabilty of passing of 0.348. If not missing, use the model with hsgpa, hscalc and precalc to calculate the estimated probabilities. */ proc logistic data = mathex2 noprint; title3 'Look at estimated probabilities'; model passed (event='Yes') = hsgpa hscalc precalc; output out=mathex3 prob=pihat; /* The data set mathex3 has everything in mathex2, and also pihat */ run; proc print data=mathex3 (obs=13); /* List only the first 13 observations */ var hsgpa hscalc precalc pihat; run; /* Based on invariance and the Law of Total Probability (double expectation), I predict that the mean pihat will be around 0.624, the proportion of students with non-missing data who passed. */ proc univariate normal plot data=mathex3; title2 'Explore the distribution of estimated probabilities'; where missused = 0; /* Should have n=375 */ var pihat; run; /* Where should we draw the line, and predict that the student will pass? In the exploratory sample, 52.68% passed overall, so find the pihat that cuts of the bottom 52.68%, and use that as a cutoff.*/ data mathex4; set mathex3; if pihat = . then pihat = 0.348; /* Proportion with missing data who passed */ proc freq data = mathex4; title2 'Seek the 52.68 percentile'; tables pihat; run; /* There was a pihat right at the 52.68th percentile: 0.4063743825. If pihat > 0.406, predict the student will pass. For the record and before peeking, I predict 75% correct. */ /* Read and transform the replication data -- same code as mathread1.sas except no keep statement. Creates the data set mathrep. */ %include '/folders/myfolders/441s18/Lecture/readexplor.sas'; /* The data step continues ... */ b0 = -14.7970; b1 = 0.1173; b2 = 0.0638; b3 = 0.2989; /* Exploratory sample values */ L = b0 + b1*hsgpa + b2*hscalc + b3*precalc; pihat = exp(L)/(1+exp(L)); /* Some will be missing. */ if pihat = . then pihat = 0.348; /* Sample proportion passed in the exploratory sample */ if pihat > 0.406 then Prediction = 'Pass '; else Prediction = 'Not Pass'; /* Commented out proc freq data = mathrep; title2 'Checking Prediction'; tables pihat * Prediction / norow nocol nopercent missing; run; */ proc freq data = mathrep; title2 'How good is the prediction?'; tables Prediction*passed / nocol; proc iml; PercentCorrect = 35.75 + 34.54; print PercentCorrect; run; /* Prediction may be better at the extremes. */ proc freq data = mathrep; tables pihat*passed / norow nocol nopercent; run;