/* MathReg2.sas */ %include '/home/u1407221/441s24/SAS08/ReadLabelMath2.sas'; title2 'Basic Regression Diagnostics'; proc reg data = explore plots(only) = ResidualPlot; title3 'Model I: hsgpa hscalc hsengl totscore mtongue'; model grade = hsgpa hscalc hsengl totscore mtongue; output out = Explor H = hatval predicted = yhat residual = resid rstudent = delstud; /* Deleted Studentized Residual */ /* Could have included LCL and UCL for upper and lower limits of a 95% prediction interval for each case in the file */ /* proc contents; */ /* Rules of thumb say investigate x values for outliers if * Hat value > 3p/n (Is residual close to the error term?) * Hat value > 0.2 (Is betahat approximtely multivariate normal?) */ proc iml; n = 289; p = 6; ceiling = 3*p/n; print "Investigate x if hat values are > 0.2 or greater than " ceiling; proc univariate plot data=Explor; var hatval; where grade ne .; proc sgplot; histogram hatval; where grade ne .; proc sort data=Explor; by hatval; proc print data = Explor; where hatval > 0.06228 and grade ne .; var id hatval hsgpa hscalc hsengl totscore mtongue grade delstud; /* Try re-running the analysis without the two suspect observations */ proc reg plots=none data=explore; title3 'Re-running without participants 50 and 340'; model grade = hsgpa hscalc hsengl totscore mtongue; where id ne 340 and id ne 50; /* What is a big (Studentized deleted) residual? If the model is correct, each one has a t distribution with n-p-1 = 283 df (practically standard normal), so the Studentized deleted residual can be treated directly as a t-test statistic. Values that are too big in absolute value will cause rejection of the null hypothesis that x_i*beta is the same for this case and the other n-1 cases. Tests are NOT independent, but use a Bonferroni correction for n = 289 tests. Get the critical value from proc iml. */ proc iml; title3 'Critical value for Joint t-test on Studentized Deleted Residuals'; Alpha = 0.05/289; print Alpha; Critval = tinv(1-Alpha/2,283); print Critval; proc univariate data=Explor normal plot; title3 'Studentized Deleted Residuals'; var delstud; /* Tests for normality indicate residuals are not normal. One st resid greater than critical value. */ proc print data=Explor; title3 'Large Negative Studentized Deleted Residual'; where delstud < -3.81 and delstud ne .; /* Next, a few more plots. */ proc sgplot data=Explor; title3 'Plot of Y-hat by Y'; scatter y=grade x=yhat; proc sgplot data=Explor; title3 'Calculus sub-test by deleted studentized residual'; scatter x=calc y=delstud; proc sgplot data=Explor; title3 'Pre-calculus sub-test by deleted studentized residual'; scatter x=precalc y=delstud; proc sgplot data=Explor; title3 'Mother tongue by deleted studentized residual'; scatter x=mtongue y=delstud; quit;