1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
61
62 /********************* 2101f17senic1.sas ***************************/
63 title 'Open SENIC Data';
64 /* I reluctantly changed NA to . with a text editor. */
65
66 data senic;
67 infile '/folders/myfolders/2101f17/openSENIC.data.txt' firstobs=2;
68 /* Skip the first line that R uses */
69 length region $ 12; /* Otherwise names are truncated to 8 */
70 input id region $ mdschl $ census nbeds nurses lngstay age
71 xratio culratio infpercent;
72 label region = 'Region of U.S.A.'
73 mdschl = 'Medical school affiliation'
74 census = 'Aver # patients in hospital per day'
75 nbeds = 'Average # beds during study period'
76 nurses = 'Aver # nurses during study period'
77 lngstay = 'Av length of hospital stay, in days'
78 age = 'Average patient age'
79 xratio = '# x-rays / # no signs of pneumonia'
80 culratio = '# cultures / # no hosp acq infect'
81 infpercent = 'Percent acquiring infection in hospital';
82 /* Make dummy variables. Proc reg needs them, proc glm does not. */
83 if mdschl = 'No' then mschool = 0;
84 else if mdschl = 'Yes' then mschool = 1;
85 /* Region */
86 if region = 'NorthCentral' then nc = 1;
87 else if region = ' ' then nc = .;
88 else nc = 0;
89 if region = 'Northeast' then ne = 1;
90 else if region = ' ' then ne = .;
91 else ne = 0;
92 if region = 'South' then s = 1;
93 else if region = ' ' then s = .;
94 else s = 0;
95 if region = 'West' then w = 1;
96 else if region = ' ' then w = .;
97 else w = 0;
98
99 /* First priority is to check the dummy varables. */
100
NOTE: The infile '/folders/myfolders/2101f17/openSENIC.data.txt' is:
Filename=/folders/myfolders/2101f17/openSENIC.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=07Sep2017:19:15:05,
File Size (bytes)=8585
NOTE: 100 records were read from the infile '/folders/myfolders/2101f17/openSENIC.data.txt'.
The minimum record length was 83.
The maximum record length was 83.
NOTE: The data set WORK.SENIC has 100 observations and 16 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
cpu time 0.01 seconds
101 proc freq;
102 tables mschool*mdschl / norow nocol nopercent missing;
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.05 seconds
cpu time 0.04 seconds
103 proc freq;
104 tables (nc ne s w) * region / norow nocol nopercent missing;
105
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.07 seconds
cpu time 0.08 seconds
106 proc freq;
107 title2 'Frequency distributions of categorical variables';
108 tables region mdschl;
109
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.03 seconds
cpu time 0.03 seconds
110 proc means;
111 title2 'Table of means and standard deviations';
112 var census nbeds nurses lngstay age xratio culratio infpercent;
113
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE MEANS used (Total process time):
real time 0.05 seconds
cpu time 0.05 seconds
114 proc univariate plot;
115 title2 'Detailed descriptive statistics with boxplots and histograms';
116 var census -- infpercent;
117 histogram;
118
NOTE: PROCEDURE UNIVARIATE used (Total process time):
real time 3.15 seconds
cpu time 1.68 seconds
119 proc sgplot; /* Bar charts for categorical variables */
120 vbar region;
121 proc sgplot;
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 0.19 seconds
cpu time 0.08 seconds
NOTE: There were 100 observations read from the data set WORK.SENIC.
122 vbar mdschl;
123
124 /* Boxplots for different values of a grouping variable */
125 /* Need to sort by the grouping variable first */
126 proc sort;
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 0.16 seconds
cpu time 0.08 seconds
NOTE: There were 100 observations read from the data set WORK.SENIC.
127 by mdschl;
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: The data set WORK.SENIC has 100 observations and 16 variables.
NOTE: PROCEDURE SORT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
128 proc boxplot;
129 plot infpercent*mdschl;
NOTE: Since an input data set was not specified, data set SENIC is assumed as a DATA= data set.
NOTE: Processing beginning for PLOT statement number 1.
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE BOXPLOT used (Total process time):
real time 0.20 seconds
cpu time 0.10 seconds
130 proc freq;
131 title2 'Relationship between region and medical school affiliation';
132 tables mdschl*region / norow nopercent chisq;
133
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.05 seconds
cpu time 0.05 seconds
134 proc ttest;
135 title2 'Less risk at Hospitals with Med School Affiliation?';
136 title3 'Compare t = -2.542';
137 class mdschl;
138 var infpercent;
139
NOTE: PROCEDURE TTEST used (Total process time):
real time 0.67 seconds
cpu time 0.31 seconds
140 proc glm;
141 title2 'Regional differences in average infection risk?';
142 title3 'Compare F = 2.674, p = 0.0519';
143 class region;
144 model infpercent = region;
145 means region;
146
147 /* Could get the means from proc means, with no extra boxplots */
148
NOTE: PROCEDURE GLM used (Total process time):
real time 0.46 seconds
cpu time 0.24 seconds
149 proc means;
150 class region;
151 var infpercent;
152
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE MEANS used (Total process time):
real time 0.03 seconds
cpu time 0.02 seconds
153 proc corr;
154 title2 'Correlation matrix of quantitative variables';
155 var census nbeds nurses lngstay age xratio culratio infpercent;
156 /* The nomiss option gives casewise deletion of missing values. */
157
NOTE: PROCEDURE CORR used (Total process time):
real time 0.12 seconds
cpu time 0.11 seconds
158 proc reg plots=none; /* Suppress diagnostic plots for now*/
159 title2 'Simple regression (One explanatory variable)';
160 model infpercent = nurses;
161
NOTE: PROCEDURE REG used (Total process time):
real time 0.10 seconds
cpu time 0.08 seconds
162 proc reg plots=none;
163 title2 'Just the hospital size variables';
164 model infpercent = census nbeds nurses;
165
166 /* Fit the model with all quantitative variables, and test the size
167 variables (census nbeds nurses) simultaneously */
168
NOTE: PROCEDURE REG used (Total process time):
real time 0.06 seconds
cpu time 0.05 seconds
169 proc reg plots=none;
170 title2 'All the quantitative explanatory variables';
171 title3 'Compare F-test for size: F = 2.7662';
172 model infpercent = census nbeds nurses lngstay age xratio culratio;
173 size: test census = nbeds = nurses = 0;
174
NOTE: PROCEDURE REG used (Total process time):
real time 0.08 seconds
cpu time 0.08 seconds
175 proc reg plots=none;
176 title2 'Full model including categorical variables';
177 title3 'Reference category for region is South';
178 model infpercent = census nbeds nurses lngstay age xratio culratio
179 mschool nc ne w;
180 region: test nc = ne = w = 0;
181 size: test census=nbeds=nurses = 0;
182
183 /* Proc glm will make dummy variables for you. */
184
NOTE: PROCEDURE REG used (Total process time):
real time 0.11 seconds
cpu time 0.09 seconds
185 proc glm;
186 title2 'Full model with proc glm';
187 title3 'Compare tests of region (F=4.0769) and size (F=3.5856)';
188 class region mdschl;
189 model infpercent = census nbeds nurses lngstay age xratio culratio
190 mdschl region / solution;
191 /* The solution option requests the regression coefficients;
192 /* Test region and size*/
193 contrast 'Test of Region'
194 region 1 -1 0 0,
195 region 0 1 -1 0,
196 region 0 0 1 -1;
197 contrast 'Test of size'
198 census 1,
199 nbeds 1,
200 nurses 1;
201 lsmeans region;
202
NOTE: PROCEDURE GLM used (Total process time):
real time 0.28 seconds
cpu time 0.20 seconds
203 proc glm;
204 title2 'Try lsmeans another way';
205 class region;
206 model infpercent = census nbeds nurses lngstay age xratio culratio
207 mschool region;
208 lsmeans region / at mschool=0;
209
NOTE: PROCEDURE GLM used (Total process time):
real time 0.25 seconds
cpu time 0.15 seconds
210 proc reg plots=none;
211 title2 'Just hospitals with no medical school affiliation';
212 where mdschl = 'No';
213 model infpercent = census nbeds nurses lngstay age xratio culratio
214 nc ne w;
215
216
217
218 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
231