1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
68
69 /************************** senic2.sas ***************************/
70 title 'Open SENIC Data';
71 title2 'Descriptive stats with better labels';
72
73 proc format;
73 ! /* Value labels used in data step below */
74 value yesnofmt 1 = 'Yes' 0 = 'No' ;
NOTE: Format YESNOFMT is already on the library WORK.FORMATS.
NOTE: Format YESNOFMT has been output.
75 value regfmt 1 = 'Northeast'
76 2 = 'North Central'
77 3 = 'South'
78 4 = 'West' ;
NOTE: Format REGFMT is already on the library WORK.FORMATS.
NOTE: Format REGFMT has been output.
79
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 264.37k
OS Memory 22944.00k
Timestamp 12/21/2023 06:56:44 PM
Step Count 147 Switch Count 0
Page Faults 0
Page Reclaims 14
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 32
80 data senic;
81 infile '/home/u1407221/441s24/SAS02/openSENIC2.data.txt' firstobs=2;
82 input id region mdschl $ census nbeds nurses lngstay age
83 xratio culratio infpercent;
84 label region = 'Region of U.S.A.'
85 mdschl = 'Medical school affiliation'
86 census = 'Aver # patients in hospital per day'
87 nbeds = 'Average # beds during study period'
88 nurses = 'Aver # nurses during study period'
89 lngstay = 'Av length of hospital stay, in days'
90 age = 'Average patient age'
91 xratio = '# x-rays / # no signs of pneumonia'
92 culratio = '# cultures / # no hosp acq infect'
93 infpercent = 'Percent acquiring infection in hospital';
94
95 /* Fix up problems */
96 if mdschl = '?' then mdschl = ' '; /* Blank = missing for character vars */
97 if age = 999 then age = .;
98 if infpercent = 999 then infpercent = .;
99
100 /***** Recodes, computes & ifs *****/
101 if 0<age<=53 then agecat = '53 & under';
102 else if age>53 then agecat='Over 53';
103 label agecat = 'Average patient age';
104 quality=(2*nurses+nbeds+10*culratio
105 +10*xratio-2*lngstay);
106 if mdschl eq 'No' then quality=quality/2;
107 if (region eq 3) then quality=quality-100;
108 label quality = 'Jerry''s bogus hospital quality index';
109
110 /* Make dummy variables. Proc reg needs them, proc glm does not. */
111 if mdschl = 'No' then mschool = 0;
112 else if mdschl = 'Yes' then mschool = 1;
113 /* Region */
114 if region = 1 then ne = 1; /* Northeast */
115 else if region = . then ne = .;
116 else ne = 0;
117 if region = 2 then nc = 1; /* North Central */
118 else if region = . then nc = .;
119 else nc = 0;
120 if region = 3 then s = 1; /* South */
121 else if region = . then s = .;
122 else s = 0;
123 if region = 4 then w = 1; /* West */
124 else if region = . then w = .;
125 else w = 0;
126
127 format nc ne s w yesnofmt.; /* Associate vars with printing format */
128 format region regfmt.; /* Note the period. */
129
130 /* First priority is to check the dummy variables. */
131
NOTE: The infile '/home/u1407221/441s24/SAS02/openSENIC2.data.txt' is:
Filename=/home/u1407221/441s24/SAS02/openSENIC2.data.txt,
Owner Name=u1407221,Group Name=oda,
Access Permission=-rw-r--r--,
Last Modified=21Dec2023:11:37:19,
File Size (bytes)=8585
NOTE: 100 records were read from the infile '/home/u1407221/441s24/SAS02/openSENIC2.data.txt'.
The minimum record length was 83.
The maximum record length was 83.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
3 at 104:16 3 at 106:44 2 at 107:43
NOTE: The data set WORK.SENIC has 100 observations and 18 variables.
NOTE: DATA statement used (Total process time):
real time 0.02 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 822.25k
OS Memory 23204.00k
Timestamp 12/21/2023 06:56:44 PM
Step Count 148 Switch Count 2
Page Faults 0
Page Reclaims 117
Page Swaps 0
Voluntary Context Switches 20
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 272
132 proc freq;
133 tables mschool*mdschl / norow nocol nopercent missing;
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.01 seconds
user cpu time 0.02 seconds
system cpu time 0.00 seconds
memory 2084.78k
OS Memory 23724.00k
Timestamp 12/21/2023 06:56:44 PM
Step Count 149 Switch Count 4
Page Faults 0
Page Reclaims 191
Page Swaps 0
Voluntary Context Switches 22
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 536
134 proc freq;
135 tables (ne nc s w) * region / norow nocol nopercent missing;
136
137 /* Descriptive Statistics */
138
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.04 seconds
user cpu time 0.05 seconds
system cpu time 0.00 seconds
memory 1291.75k
OS Memory 23980.00k
Timestamp 12/21/2023 06:56:44 PM
Step Count 150 Switch Count 4
Page Faults 0
Page Reclaims 204
Page Swaps 0
Voluntary Context Switches 24
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 544
139 proc freq;
140 title2 'Frequency distributions of categorical variables';
141 tables region mdschl agecat nc ne s w;
142
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.04 seconds
user cpu time 0.04 seconds
system cpu time 0.00 seconds
memory 1201.06k
OS Memory 23720.00k
Timestamp 12/21/2023 06:56:44 PM
Step Count 151 Switch Count 3
Page Faults 0
Page Reclaims 177
Page Swaps 0
Voluntary Context Switches 21
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 280
143 proc sgplot; /* Bar charts for categorical variables */
144 title2 'Bar Chart of Region';
145 vbar region;
146 proc sgplot;
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 0.10 seconds
user cpu time 0.04 seconds
system cpu time 0.01 seconds
memory 8073.84k
OS Memory 31276.00k
Timestamp 12/21/2023 06:56:45 PM
Step Count 152 Switch Count 3
Page Faults 0
Page Reclaims 2222
Page Swaps 0
Voluntary Context Switches 153
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 616
NOTE: There were 100 observations read from the data set WORK.SENIC.
147 title2 'Bar Chart of Medical School Affiliation';
148 vbar mdschl;
149
150 proc means;
NOTE: PROCEDURE SGPLOT used (Total process time):
real time 0.07 seconds
user cpu time 0.02 seconds
system cpu time 0.00 seconds
memory 2330.43k
OS Memory 31660.00k
Timestamp 12/21/2023 06:56:45 PM
Step Count 153 Switch Count 3
Page Faults 0
Page Reclaims 579
Page Swaps 0
Voluntary Context Switches 152
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 360
NOTE: There were 100 observations read from the data set WORK.SENIC.
151 title2 'Table of means and standard deviations';
152 var census nbeds nurses lngstay age xratio culratio infpercent quality;
153
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE MEANS used (Total process time):
real time 0.03 seconds
user cpu time 0.03 seconds
system cpu time 0.01 seconds
memory 6423.12k
OS Memory 35512.00k
Timestamp 12/21/2023 06:56:45 PM
Step Count 154 Switch Count 2
Page Faults 0
Page Reclaims 1362
Page Swaps 0
Voluntary Context Switches 24
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 8
154 proc univariate normal plot;
155 title2 'A close look at infection percent';
156 var infpercent; /* Could be a list of variables */
157
158 /* Boxplots for different values of a grouping variable */
159 /* Need to sort by the grouping variable first */
NOTE: PROCEDURE UNIVARIATE used (Total process time):
real time 0.14 seconds
user cpu time 0.08 seconds
system cpu time 0.00 seconds
memory 3929.43k
OS Memory 32856.00k
Timestamp 12/21/2023 06:56:45 PM
Step Count 155 Switch Count 1
Page Faults 0
Page Reclaims 520
Page Swaps 0
Voluntary Context Switches 224
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 440
160 proc sort;
161 by region;
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: The data set WORK.SENIC has 100 observations and 18 variables.
NOTE: PROCEDURE SORT used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 933.78k
OS Memory 31912.00k
Timestamp 12/21/2023 06:56:45 PM
Step Count 156 Switch Count 2
Page Faults 0
Page Reclaims 122
Page Swaps 0
Voluntary Context Switches 12
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
162 proc boxplot;
163 title2 'Infection Risk by Region';
164 plot infpercent*region;
165
166 run;
NOTE: Since an input data set was not specified, data set SENIC is assumed as a DATA= data set.
NOTE: Processing beginning for PLOT statement number 1.
NOTE: There were 100 observations read from the data set WORK.SENIC.
NOTE: PROCEDURE BOXPLOT used (Total process time):
real time 0.13 seconds
user cpu time 0.06 seconds
system cpu time 0.01 seconds
memory 9437.37k
OS Memory 39184.00k
Timestamp 12/21/2023 06:56:45 PM
Step Count 157 Switch Count 1
Page Faults 0
Page Reclaims 2002
Page Swaps 0
Voluntary Context Switches 520
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 368
167
168
169
170
171
172
173
174
175
176 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
186