/* mathlogreg3.sas */ /* Goal: Develop a prediction model that uses all the data and makes a prediction for every case. */ %include '/home/u1407221/441s24/SAS08/ReadLabelMath2.sas'; title2 'Try using missing values for prediction'; /* Make missing data indicators */ data mathex2; set explore; if gender = . then sexmiss = 1; else sexmiss=0; /* Includes mtongue */ if course2 = . then coursemiss = 1; else coursemiss=0; if hsgpa = . then hsgpamiss = 1; else hsgpamiss=0; if hscalc = . then hscalcmiss = 1; else hscalcmiss=0; if hsengl = . then hsenglmiss = 1; else hsenglmiss=0; if totscore = . then testmiss = 1; else testmiss=0; nmiss = sum(sexmiss--testmiss); if hsgpa+hscalc+precalc = . then missused = 1 ; else missused = 0; format sexmiss -- testmiss missused ynfmt.; label sexmiss = 'Gender and mother tongue missing' coursemiss = 'Course missing' hsgpamiss = 'HS GPA missing' hscalcmiss = 'HS Calculus mark missing' hsenglmiss = 'HS English mark missing' testmiss = 'Diagnostic test scores missing' missused = 'Any of hsgpa hscalc precalc missing'; /* Checks are commented out proc freq; tables gender*sexmiss / norow nocol nopercent missing; tables course*coursemiss / norow nocol nopercent missing; tables hsgpa*hsgpamiss / norow nocol nopercent missing; tables hscalc*hscalcmiss / norow nocol nopercent missing; tables hsengl*hsenglmiss / norow nocol nopercent missing; tables totscore*testmiss / norow nocol nopercent missing; tables (hsgpamiss hscalcmiss testmiss)*missused / norow nocol nopercent missing; */ proc freq data=mathex2; title2 'Check usefulness of missing data indicators one at a time'; tables (sexmiss -- testmiss nmiss) * passed / nocol nopercent chisq; proc freq data=mathex2; title2 'Missingness on variables used, and passing the course'; tables missused * passed / nocol nopercent chisq; run; /* Strategy: If missing on hsgpa, hscalc or precalc, predict they will not pass. If not missing, use the model with hsgpa, hscalc and precalc. Question: Will missingness on Gender/Mother tongue, Course or HS English add to the ability of (hsgpa, hscalc or precalc) to predict? However, the following table shows that every student who was missing course was also missing on at least one of the good predictors, so coursemiss is out. */ proc freq data=mathex2; tables coursemiss*missused / norow nocol nopercent missing; proc logistic data = mathex2; title3 'HS GPA, HS Calculus and Pre-calculus test'; model passed (event='Yes') = hsgpa hscalc precalc sexmiss hsenglmiss; MissingVars: test sexmiss=hsenglmiss=0; /* If missing on hsgpa, hscalc or precalc, give them an estimated probabilty of passing = 0.348. If not missing, use the model with hsgpa, hscalc and precalc to calculate the estimated probabilities. */ quit;