/************************** senic2.sas ***************************/ title 'Open SENIC Data'; title2 'Descriptive stats with better labels'; proc format; /* Value labels used in data step below */ value yesnofmt 1 = 'Yes' 0 = 'No' ; value regfmt 1 = 'Northeast' 2 = 'North Central' 3 = 'South' 4 = 'West' ; data senic; infile '/home/u1407221/441s24/SAS02/openSENIC2.data.txt' firstobs=2; input id region mdschl $ census nbeds nurses lngstay age xratio culratio infpercent; label region = 'Region of U.S.A.' mdschl = 'Medical school affiliation' census = 'Aver # patients in hospital per day' nbeds = 'Average # beds during study period' nurses = 'Aver # nurses during study period' lngstay = 'Av length of hospital stay, in days' age = 'Average patient age' xratio = '# x-rays / # no signs of pneumonia' culratio = '# cultures / # no hosp acq infect' infpercent = 'Percent acquiring infection in hospital'; /* Fix up problems */ if mdschl = '?' then mdschl = ' '; /* Blank = missing for character vars */ if age = 999 then age = .; if infpercent = 999 then infpercent = .; /***** Recodes, computes & ifs *****/ if 053 then agecat='Over 53'; label agecat = 'Average patient age'; quality=(2*nurses+nbeds+10*culratio +10*xratio-2*lngstay); if mdschl eq 'No' then quality=quality/2; if (region eq 3) then quality=quality-100; label quality = 'Jerry''s bogus hospital quality index'; /* Make dummy variables. Proc reg needs them, proc glm does not. */ if mdschl = 'No' then mschool = 0; else if mdschl = 'Yes' then mschool = 1; /* Region */ if region = 1 then ne = 1; /* Northeast */ else if region = . then ne = .; else ne = 0; if region = 2 then nc = 1; /* North Central */ else if region = . then nc = .; else nc = 0; if region = 3 then s = 1; /* South */ else if region = . then s = .; else s = 0; if region = 4 then w = 1; /* West */ else if region = . then w = .; else w = 0; format nc ne s w yesnofmt.; /* Associate vars with printing format */ format region regfmt.; /* Note the period. */ /* First priority is to check the dummy variables. */ proc freq; tables mschool*mdschl / norow nocol nopercent missing; proc freq; tables (ne nc s w) * region / norow nocol nopercent missing; /* Descriptive Statistics */ proc freq; title2 'Frequency distributions of categorical variables'; tables region mdschl agecat nc ne s w; proc sgplot; /* Bar charts for categorical variables */ title2 'Bar Chart of Region'; vbar region; proc sgplot; title2 'Bar Chart of Medical School Affiliation'; vbar mdschl; proc means; title2 'Table of means and standard deviations'; var census nbeds nurses lngstay age xratio culratio infpercent quality; proc univariate normal plot; title2 'A close look at infection percent'; var infpercent; /* Could be a list of variables */ /* Boxplots for different values of a grouping variable */ /* Need to sort by the grouping variable first */ proc sort; by region; proc boxplot; title2 'Infection Risk by Region'; plot infpercent*region; run;