1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
70
71 /********************* cars2.sas ***************************/
72 title 'Regression on Metric Cars Data';
73
74 /* Read data directly from Excel spreadsheet */
75 proc import datafile="/home/brunner0/441s18/mcars4.xlsx"
76 out=cars dbms=xlsx replace;
77 getnames=yes;
78 /* Input data file is mcars4.xlsx
79 Ouput data set is called cars
80 dbms=xlsx The input file is an Excel spreadsheet.
81 Necessary to read an Excel spreadsheet directly under unix/linux
82 Works in PC environment too except for Excel 4.0 spreadsheets
83 If there are multiple sheets, use sheet="sheet1" or something.
84 replace If the data set cars already exists, replace it.
85 getnames=yes Use column names as variable names. */
86
NOTE: One or more variables were converted because the data type is not supported by the V9 engine. For more details, run with
options MSGLEVEL=I.
NOTE: The import data set has 100 observations and 4 variables.
NOTE: WORK.CARS data set was successfully created.
NOTE: PROCEDURE IMPORT used (Total process time):
real time 0.01 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 2618.75k
OS Memory 28840.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 18 Switch Count 1
Page Faults 0
Page Reclaims 934
Page Swaps 0
Voluntary Context Switches 12
Involuntary Context Switches 0
Block Input Operations 24
Block Output Operations 264
87 proc print;
88
NOTE: There were 100 observations read from the data set WORK.CARS.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.12 seconds
user cpu time 0.13 seconds
system cpu time 0.00 seconds
memory 2548.46k
OS Memory 29096.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 19 Switch Count 0
Page Faults 0
Page Reclaims 902
Page Swaps 0
Voluntary Context Switches 0
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 48
89 data auto;
90 set cars;
91 mpg = 100/lper100k * 0.6214/0.2642;
92 Country = Cntry; /* I just like the spelling more */
93 label Country = 'Location of Head Office'
94 lper100k = 'Litres per 100 kilometers'
95 mpg = 'Miles per Gallon'
96 weight = 'Weight in kg'
97 length = 'Length in meters';
98 /* Indicator dummy vars: Ref category is Japanese */
99 if country = 'US' then c1=1; else c1=0;
100 if country = 'Europ' then c2=1; else c2=0;
101 /* Interaction Terms */
102 cw1 = c1*weight; cw2 = c2*weight;
103 cL1 = c1*length; cL2 = c2*length;
104 /* This way of creating dummy variables is safe only because
105 Country is never missing. If it could be missing, better is
106 if country = ' ' then c1 = .;
107 else if country = 'US' then c1=1;
108 else c1=0;
109 if country = ' ' then c2 = .;
110 else if country = 'Europ' then c2=1;
111 else c2=0;
112 Note that a blank space is the missing value code for character variables,
113 while a period is missing for numeric variables. */
114
NOTE: There were 100 observations read from the data set WORK.CARS.
NOTE: The data set WORK.AUTO has 100 observations and 12 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 809.96k
OS Memory 29612.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 20 Switch Count 1
Page Faults 0
Page Reclaims 183
Page Swaps 0
Voluntary Context Switches 7
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
115 proc freq;
116 title2 'Check dummy variables';
117 tables (c1 c2)*country / norow nocol nopercent;
118
119 /* First an analysis with country only. */
120
121 /* Questions for every significance test:
122 * What is E(y|x) for the model SAS is using?
123 * Give the null hypothesis in symbols.
124 * Do you reject H0 at alpha = 0.05? Answer Yes or No.
125 * In plain, non-statistical language, what do you conclude? */
126
127
NOTE: There were 100 observations read from the data set WORK.AUTO.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.04 seconds
user cpu time 0.04 seconds
system cpu time 0.00 seconds
memory 1772.59k
OS Memory 30384.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 21 Switch Count 3
Page Faults 0
Page Reclaims 557
Page Swaps 0
Voluntary Context Switches 15
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 528
128 proc means;
129 title2 'Litres per 100 k Broken Down by Country';
130 class Country;
131 var lper100k;
132
NOTE: There were 100 observations read from the data set WORK.AUTO.
NOTE: PROCEDURE MEANS used (Total process time):
real time 0.02 seconds
user cpu time 0.03 seconds
system cpu time 0.01 seconds
memory 9047.03k
OS Memory 39356.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 22 Switch Count 2
Page Faults 0
Page Reclaims 2364
Page Swaps 0
Voluntary Context Switches 23
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 24
133 proc reg plots = none; /* Suppress diagnostic plots for now*/
134 title2 'Regression with Just Country';
135 model lper100k = c1 c2;
136 USvsEURO: test c1=c2;
137
NOTE: PROCEDURE REG used (Total process time):
real time 0.06 seconds
user cpu time 0.06 seconds
system cpu time 0.00 seconds
memory 2629.78k
OS Memory 33984.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 23 Switch Count 2
Page Faults 0
Page Reclaims 870
Page Swaps 0
Voluntary Context Switches 17
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 56
138 proc reg plots = none;
139 title2 'Country, Weight and Length';
140 model lper100k = c1 c2 weight length;
141 country: test c1 = c2 = 0; /* Country controlling for wgt, length */
142 USvsEURO: test c1=c2; /* US vs. Europe controlling for wgt, length */
143 wgt_len: test weight=length=0; /* wgt, length controlling for Country */
144
145 /* Proportions of remaining variation, using a = sF/(n-p+sF) */
146
NOTE: PROCEDURE REG used (Total process time):
real time 0.08 seconds
user cpu time 0.08 seconds
system cpu time 0.00 seconds
memory 2371.21k
OS Memory 33984.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 24 Switch Count 2
Page Faults 0
Page Reclaims 264
Page Swaps 0
Voluntary Context Switches 12
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 96
147 proc iml;
NOTE: IML Ready
148 title2 'Proportion of remaining variation';
149 print "Country controlling for Weight and Length";
150 n = 100;
150 ! p = 3;
150 ! s = 2;
151 F = 6.90;
151 ! a = s*F/(n-p + s*F);
152 print a;
153
154 print "Weight and Length controlling for Country";
155 F = 115.16;
155 ! a = s*F/(n-p + s*F);
156 print a;
157
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.01 seconds
user cpu time 0.02 seconds
system cpu time 0.00 seconds
memory 418.56k
OS Memory 32164.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 25 Switch Count 1
Page Faults 0
Page Reclaims 196
Page Swaps 0
Voluntary Context Switches 8
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 0
158 proc reg plots = none;
159 title2 'Country, Weight and Length with Interactions';
160 model lper100k = c1 c2 weight length cw1 cw2 cL1 cL2;
161 country: test c1 = c2 = 0; /* Is it really still country? */
162 Interactions: test cw1 = cw2 = cL1 = cL2 = 0;
163
164 /* Centering an explanatory variable by subtracting off the mean affects the
165 intercept, but not the relationships among variables. I want to create a new
166 data set with weight and length centered, and to avoid confusion
167 I will make sure the variables are nicely labelled. */
168
NOTE: PROCEDURE REG used (Total process time):
real time 0.07 seconds
user cpu time 0.08 seconds
system cpu time 0.00 seconds
memory 2526.53k
OS Memory 33984.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 26 Switch Count 2
Page Faults 0
Page Reclaims 262
Page Swaps 0
Voluntary Context Switches 14
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 72
169 proc standard mean=0 data=auto out=cntrd;
170 var weight length;
171
172 /* In the new data set "cntrd," weight and length are adjusted to have mean
173 zero (the sample means have been subtracted from each observation). If I had
174 said mean=0 std=1, they would have been converted to z-scores. All the other
175 variables (including the product terms) are as they were before, and the
176 labels are the same as before too. */
177
NOTE: The data set WORK.CNTRD has 100 observations and 12 variables.
NOTE: PROCEDURE STANDARD used (Total process time):
real time 0.00 seconds
user cpu time 0.00 seconds
system cpu time 0.00 seconds
memory 690.50k
OS Memory 32684.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 27 Switch Count 1
Page Faults 0
Page Reclaims 120
Page Swaps 0
Voluntary Context Switches 8
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
178 data centered;
179 set cntrd; /* Now centered has everything in cntrd */
180 /* Re-create Interaction Terms and re-label explanatory vars*/
181 cw1 = c1*weight; cw2 = c2*weight;
182 cL1 = c1*length; cL2 = c2*length;
183 label weight = 'Weight in kg (Centered)'
184 length = 'Length in cm (Centered)';
185
186 /* By default, SAS procedures use the most recently created data set,
187 but specify it anyway. */
188
NOTE: There were 100 observations read from the data set WORK.CNTRD.
NOTE: The data set WORK.CENTERED has 100 observations and 12 variables.
NOTE: DATA statement used (Total process time):
real time 0.00 seconds
user cpu time 0.01 seconds
system cpu time 0.00 seconds
memory 808.18k
OS Memory 32684.00k
Timestamp 01/06/2018 03:04:54 AM
Step Count 28 Switch Count 1
Page Faults 0
Page Reclaims 123
Page Swaps 0
Voluntary Context Switches 8
Involuntary Context Switches 0
Block Input Operations 0
Block Output Operations 264
189 proc reg plots=none simple data=centered;
190 title2 'Weight and length are now centered: Mean=0';
191 model lper100k = c1 c2 weight length cw1 cw2 cL1 cL2;
192 country: test c1 = c2 = 0; /* Does this make better sense? */
193 Interactions: test cw1 = cw2 = cL1 = cL2 = 0;
194
195
196
197
198
199
200
201
202 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
214