/* mathlogreg3.sas */ /* Goal: Develop a prediction model that uses all the data and makes a prediction for every case. */ %include '/folders/myfolders/441s18/Lecture/mathread1.sas'; title2 'Use missing values for prediction'; /* Make missing data indicators */ data mathex2; set mathex; if gender = . then sexmiss = 1; else sexmiss=0; /* Includes mtongue */ if course = . then coursemiss = 1; else coursemiss=0; if hsgpa = . then hsgpamiss = 1; else hsgpamiss=0; if hscalc = . then hscalcmiss = 1; else hscalcmiss=0; if hsengl = . then hsenglmiss = 1; else hsenglmiss=0; if totscore = . then testmiss = 1; else testmiss=0; if hsgpa+hscalc+precalc = . then missused = 1 ; else missused = 0; nmiss = sum(sexmiss--testmiss); format sexmiss -- missused ynfmt.; label sexmiss = 'Gender and mother tongue missing' coursemiss = 'Course missing' hsgpamiss = 'HS GPA missing' hscalcmiss = 'HS Calculus mark missing' hsenglmiss = 'HS English mark missing' testmiss = 'Diagnostic test scores missing' missused = 'Any of hsgpa hscalc precalc missing'; /* Checks are commented out proc freq; tables gender*sexmiss / norow nocol nopercent missing; tables course*coursemiss / norow nocol nopercent missing; tables hsgpa*hsgpamiss / norow nocol nopercent missing; tables hscalc*hscalcmiss / norow nocol nopercent missing; tables hsengl*hsenglmiss / norow nocol nopercent missing; tables totscore*testmiss / norow nocol nopercent missing; tables (hsgpamiss hscalcmiss testmiss)*missused / norow nocol nopercent missing; */ proc freq; title2 'Check usefulness of missing data indicators one at a time'; tables (sexmiss -- missused nmiss) * passed / nocol nopercent chisq; run; proc freq; title2 'Missingness on variables used, and passing the course'; tables missused * passed / norow nocol chisq; run; proc freq; title2 'Just those with missing on variables used'; where missused = 1; tables (sexmiss coursemiss) * passed / norow nocol chisq; run; /* Decision: If missing on hsgpa, hscalc or precalc, predict they will not pass. If not missing, use the model with hsgpa, hscalc and precalc. */