1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
70
71 /********************* cars2.sas ***************************/
72 title 'Regression on Metric Cars Data';
73
74 /* Read data directly from Excel spreadsheet */
75 proc import datafile="/home/brunner0/441s20/mcars4.xlsx"
76 out=cars dbms=xlsx replace;
77 getnames=yes;
78 /* Input data file is mcars4.xlsx
79 Ouput data set is called cars
80 dbms=xlsx The input file is an Excel spreadsheet.
81 Necessary to read an Excel spreadsheet directly under unix/linux
82 Works in PC environment too except for Excel 4.0 spreadsheets
83 If there are multiple sheets, use sheet="sheet1" or something.
84 replace If the data set cars already exists, replace it.
85 getnames=yes Use column names as variable names. */
86
NOTE: One or more variables were converted because the data type is not supported by the V9 engine. For more details, run with
options MSGLEVEL=I.
NOTE: The import data set has 100 observations and 4 variables.
NOTE: WORK.CARS data set was successfully created.
NOTE: PROCEDURE IMPORT used (Total process time):
real time 0.01 seconds
user cpu time 0.00 seconds
system cpu time 0.01 seconds
memory 2791.81k
OS Memory 29608.00k
Timestamp 01/20/2020 02:01:43 AM
Step Count 24 Switch Count 2
Page Faults 0
Page Reclaims 842
Page Swaps 0
Voluntary Context Switches 15
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
87 proc print;
88 title2 'Look at input data set';
89
NOTE: There were 100 observations read from the data set WORK.CARS.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.13 seconds
user cpu time 0.12 seconds
system cpu time 0.00 seconds
memory 2732.09k
OS Memory 29864.00k
Timestamp 01/20/2020 02:01:43 AM
Step Count 25 Switch Count 1
Page Faults 0
Page Reclaims 865
Page Swaps 0
Voluntary Context Switches 6
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 48
90 data auto;
91 set cars;
92 mpg = 100/lper100k * 0.6214/0.2642;
93 Country = Cntry; /* I just like the spelling more */
94 label Country = 'Location of Head Office'
95 lper100k = 'Litres per 100 kilometers'
96 mpg = 'Miles per Gallon'
97 weight = 'Weight in kg'
98 length = 'Length in meters';
99 /* Indicator dummy vars: Ref category is Japanese */
100 if country = 'US' then c1=1; else c1=0;
101 if country = 'Europ' then c2=1; else c2=0;
102 label c1 = 'US = 1'
103 c2 = 'Europe = 1';
104 /* Interaction Terms */
105 cw1 = c1*weight; cw2 = c2*weight;
106 cL1 = c1*length; cL2 = c2*length;
107 /* This way of creating dummy variables is safe only because
108 Country is never missing. If it could be missing, better is
109 if country = ' ' then c1 = .;
110 else if country = 'US' then c1=1;
111 else c1=0;
112 if country = ' ' then c2 = .;
113 else if country = 'Europ' then c2=1;
114 else c2=0;
115 Note that a blank space is the missing value code for character variables,
116 while a period is missing for numeric variables. */
117
NOTE: There were 100 observations read from the data set WORK.CARS.
NOTE: The data set WORK.AUTO has 100 observations and 12 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 976.28k
OS Memory 30380.00k
Timestamp 01/20/2020 02:01:43 AM
Step Count 26 Switch Count 2
Page Faults 0
Page Reclaims 158
Page Swaps 0
Voluntary Context Switches 17
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
118 proc freq;
119 title2 'Check dummy variables';
120 tables (c1 c2)*country / norow nocol nopercent;
121
122 /* First an analysis with country only. */
123
124 /* Questions for every significance test:
125 * What is E(y|x) for the model SAS is using?
126 * Give the null hypothesis in symbols.
127 * Do you reject H0 at alpha = 0.05? Answer Yes or No.
128 * In plain, non-statistical language, what do you conclude? */
129
130
NOTE: There were 100 observations read from the data set WORK.AUTO.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.04 seconds
user cpu time 0.04 seconds
system cpu time 0.00 seconds
memory 1771.15k
OS Memory 31152.00k
Timestamp 01/20/2020 02:01:43 AM
Step Count 27 Switch Count 5
Page Faults 0
Page Reclaims 538
Page Swaps 0
Voluntary Context Switches 27
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 528
131 proc means;
132 title2 'Litres per 100 k Broken Down by Country';
133 class Country;
134 var lper100k;
135
NOTE: There were 100 observations read from the data set WORK.AUTO.
NOTE: PROCEDURE MEANS used (Total process time):
real time 0.02 seconds
user cpu time 0.02 seconds
system cpu time 0.01 seconds
memory 9036.65k
OS Memory 40124.00k
Timestamp 01/20/2020 02:01:43 AM
Step Count 28 Switch Count 2
Page Faults 0
Page Reclaims 2400
Page Swaps 0
Voluntary Context Switches 35
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 24
136 proc reg plots = none; /* Suppress diagnostic plots for now*/
137 title2 'Regression with Just Country';
138 model lper100k = c1 c2;
139 USvsEURO: test c1=c2;
140
NOTE: PROCEDURE REG used (Total process time):
real time 0.06 seconds
user cpu time 0.06 seconds
system cpu time 0.01 seconds
memory 2579.96k
OS Memory 34752.00k
Timestamp 01/20/2020 02:01:43 AM
Step Count 29 Switch Count 2
Page Faults 0
Page Reclaims 847
Page Swaps 0
Voluntary Context Switches 19
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 64
141 proc glm;
142 title2 'Compare Oneway with proc glm';
143 class country;
144 model lper100k = country;
145
NOTE: PROCEDURE GLM used (Total process time):
real time 2.56 seconds
user cpu time 0.12 seconds
system cpu time 0.02 seconds
memory 14954.23k
OS Memory 45240.00k
Timestamp 01/20/2020 02:01:46 AM
Step Count 30 Switch Count 3
Page Faults 0
Page Reclaims 4304
Page Swaps 0
Voluntary Context Switches 579
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 912
146 proc reg plots = none data = auto;
147 title2 'Country, Weight and Length';
148 model lper100k = c1 c2 weight length;
149 country: test c1 = c2 = 0; /* Country controlling for wgt, length */
150 USvsEURO: test c1=c2; /* US vs. Europe controlling for wgt, length */
151 wgt_len: test weight=length=0; /* wgt, length controlling for Country */
152
153 /* Proportions of remaining variation, using a = sF/(n-p+sF) */
154
NOTE: PROCEDURE REG used (Total process time):
real time 0.08 seconds
user cpu time 0.08 seconds
system cpu time 0.00 seconds
memory 2463.31k
OS Memory 46528.00k
Timestamp 01/20/2020 02:01:46 AM
Step Count 31 Switch Count 2
Page Faults 0
Page Reclaims 354
Page Swaps 0
Voluntary Context Switches 20
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 88
155 proc iml;
NOTE: IML Ready
156 title2 'Proportion of remaining variation';
157 print "Country controlling for Weight and Length";
158 n = 100;
158 ! p = 5;
158 ! s = 2;
159 F = 6.90;
159 ! a = s*F/(n-p + s*F);
160 print a;
161
162 print "Weight and Length controlling for Country";
163 F = 115.16;
163 ! a = s*F/(n-p + s*F);
164 print a;
165
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.01 seconds
user cpu time 0.02 seconds
system cpu time 0.00 seconds
memory 678.21k
OS Memory 44708.00k
Timestamp 01/20/2020 02:01:46 AM
Step Count 32 Switch Count 1
Page Faults 0
Page Reclaims 266
Page Swaps 0
Voluntary Context Switches 11
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 0
166 proc glm data=auto plots=none;
167 title2 'Country, weight and length with proc glm';
168 class country;
169 model lper100k = weight length country;
170 lsmeans country / pdiff tdiff adjust = bon;
171
NOTE: PROCEDURE GLM used (Total process time):
real time 0.08 seconds
user cpu time 0.09 seconds
system cpu time 0.00 seconds
memory 2293.25k
OS Memory 46520.00k
Timestamp 01/20/2020 02:01:46 AM
Step Count 33 Switch Count 3
Page Faults 0
Page Reclaims 340
Page Swaps 0
Voluntary Context Switches 25
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 312
172 proc reg plots = none;
173 title2 'Country, Weight and Length with Interactions';
174 model lper100k = c1 c2 weight length cw1 cw2 cL1 cL2;
175 country: test c1 = c2 = 0; /* Is it really still country? */
176 Interactions: test cw1 = cw2 = cL1 = cL2 = 0;
177
178 /* Centering an explanatory variable by subtracting off the mean affects the
179 intercept, but not the relationships among variables. I want to create a new
180 data set with weight and length centered, and to avoid confusion
181 I will make sure the variables are nicely labelled. */
182
NOTE: PROCEDURE REG used (Total process time):
real time 0.07 seconds
user cpu time 0.08 seconds
system cpu time 0.00 seconds
memory 2396.28k
OS Memory 47040.00k
Timestamp 01/20/2020 02:01:46 AM
Step Count 34 Switch Count 2
Page Faults 0
Page Reclaims 282
Page Swaps 0
Voluntary Context Switches 18
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 80
183 proc standard mean=0 data=auto out=cntrd;
184 var weight length;
185
186 /* In the new data set "cntrd," weight and length are adjusted to have mean
187 zero (the sample means have been subtracted from each observation). If I had
188 said mean=0 std=1, they would have been converted to z-scores. All the other
189 variables (including the product terms) are as they were before, and the
190 labels are the same as before too. */
191
NOTE: The data set WORK.CNTRD has 100 observations and 12 variables.
NOTE: PROCEDURE STANDARD used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 841.75k
OS Memory 45740.00k
Timestamp 01/20/2020 02:01:46 AM
Step Count 35 Switch Count 2
Page Faults 0
Page Reclaims 118
Page Swaps 0
Voluntary Context Switches 14
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
192 data centered;
193 set cntrd; /* Now centered has everything in cntrd */
194 /* Re-create Interaction Terms and re-label explanatory vars*/
195 cw1 = c1*weight; cw2 = c2*weight;
196 cL1 = c1*length; cL2 = c2*length;
197 label weight = 'Weight in kg (Centered)'
198 length = 'Length in cm (Centered)';
199
200 /* By default, SAS procedures use the most recently created data set,
201 but specify it anyway. */
202
NOTE: There were 100 observations read from the data set WORK.CNTRD.
NOTE: The data set WORK.CENTERED has 100 observations and 12 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.01 seconds
memory 958.03k
OS Memory 45996.00k
Timestamp 01/20/2020 02:01:46 AM
Step Count 36 Switch Count 2
Page Faults 0
Page Reclaims 139
Page Swaps 0
Voluntary Context Switches 14
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
203 proc reg plots=none simple data=centered;
204 title2 'Weight and length are now centered: Mean=0';
205 model lper100k = c1 c2 weight length cw1 cw2 cL1 cL2;
206 country: test c1 = c2 = 0; /* Does this make better sense? */
207 Interactions: test cw1 = cw2 = cL1 = cL2 = 0;
208
209 quit;
NOTE: PROCEDURE REG used (Total process time):
real time 0.10 seconds
user cpu time 0.11 seconds
system cpu time 0.00 seconds
memory 2398.71k
OS Memory 47296.00k
Timestamp 01/20/2020 02:01:46 AM
Step Count 37 Switch Count 2
Page Faults 0
Page Reclaims 263
Page Swaps 0
Voluntary Context Switches 18
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 104
210
211
212
213
214
215
216
217 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
228