1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
NOTE: ODS statements in the SAS Studio environment may disable some output features.
62
63 /* mathlogreg1.sas */
64 %include '/folders/myfolders/441s18/Lecture/mathread2.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
191
NOTE: The infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt' is:
Filename=/folders/myfolders/441s18/Lecture/exploremath.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=18Jan2016:17:34:49,
File Size (bytes)=44583
NOTE: 579 records were read from the infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt'.
The minimum record length was 75.
The maximum record length was 75.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
99 at 101:24 99 at 102:18 142 at 172:15 2 at 172:22
NOTE: The data set WORK.MATHEX has 579 observations and 27 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.02 seconds
192 proc freq;
193 title2 'Course by passed with proc freq';
194 tables course * passed / nocol nopercent chisq;
195
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.07 seconds
cpu time 0.06 seconds
196 proc logistic;
197 title2 'Course by passed with dummy vars: Compare LR Chisq = 34.4171';
198 model passed (event='Yes') = c1 c3; /* Omit c2 so Mainstream is reference category */
199 /* Wald chi-squared tests */
200 course: test c1=c3=0;
201 Course1_vs_2: test c1=0;
202 Course1_vs_3: test c1=c3;
203 Course2_vs_3: test c3=0;
204
205 /*
206 Question: The estimated odds of passing the course are ___ times as great for a
207 student in the elite course, compared to a student in the mainstream course.
208
209 Question: With 95% confidence, the chances of a student passing the catch-up
210 course are between ___% and ___% as great as the chances of passing the
211 mainstream course.
212
213 Note the deliberately vague but useful word "chances."
214
215 A few details about the output :
216
217 The higher the minus 2 Log Likelihood, the lower the (estimated) maximum
218 probability of observing these responses. It is a meaure of lack of
219 model fit. The Akaike information criterion and Schwarz's Bayesian
220 criterion both impose a further penalty for number of explanatory
221 variables. Small is good.
222
223 "Association of Predicted Probabilities and Observed Responses":
224 * Every case has Y=0 or Y=1.
225 * Every case has a p-hat.
226 * Pick a case with Y=0, and another case with Y=1. That's a pair.
227 * If the case with Y=0 has a lower p-hat than the case with Y=1,
228 the pair is concordant.
229 */
230
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.10 seconds
cpu time 0.09 seconds
231 proc iml;
NOTE: IML Ready
232 title2 'Estimate prob. of passing for for course=3: Compare 31/39 = 0.7949';
233 b0 = 0.4077;
233 ! b1 = -1.4838;
233 ! b2 = 0.9468;
234 c1 = 0;
234 ! c3=1;
235 lcombo = b0 + b1*c1 + b2*c3;
236 probpass = exp(lcombo) / (1+exp(lcombo));
237 print "Estimated probability of passing course 3 (Elite) is " probpass;
238
239
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
240 proc logistic;
241 title2 'Use the class and contrast statements';
242 class course / param=ref; /* This param option makes the ALPHABETICALLY
243 last category (Mainstream) the reference
244 category. Default is effect coding. */
245 model passed (event='Yes') = course;
246 contrast 'Catch-up vs Mainstream' course 1 0;
247 contrast 'Elite vs Mainstream' course 0 1;
248 contrast 'Catch-up vs Elite' course 1 -1;
249
250 /* Contrast is a little tricky in proc logistic compared to proc glm.
251 It lets you specify a set of linear combinations of regression
252 coefficients to test against zero. It is essential to know exactly
253 what the dummy variable coding scheme is. This can still be more
254 convenient than defining your own dummy variables in the data step. */
255
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.11 seconds
cpu time 0.10 seconds
256 proc logistic;
257 title2 'Course controlling for score on diagnostic test';
258 class course / param=ref;
259 model passed (event='Yes') = course totscore;
260 contrast 'Course controlling for totscore' course 1 0,
261 course 0 1;
262 run;
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.11 seconds
cpu time 0.10 seconds
263
264 /* Estimate a probability of passing without typing in the
265 * estimated regression coefficients: Use the Output Delivery
266 * System (ODS). All the tables in a SAS results file have names.
267 * You can find out what they are with a web search on
268 * "proc logistic ods table names" , which will take you to the
269 * manual. Easier is to do a preliminary run with ods trace on,
270 * which writes the table names on the log file as they are produced.
271 */
272
273 ods trace on;
274 proc logistic data=mathex;
275 title2 'What are the ods table names?';
276 model passed (event='Yes') = c1 c3 totscore;
277 run;
Output Added:
-------------
Name: ModelInfo
Label: Model Information
Template: Stat.Logistic.ModelInfo
Path: Logistic.ModelInfo
-------------
Output Added:
-------------
Name: NObs
Label: Observations Summary
Template: Stat.Logistic.NObs
Path: Logistic.NObs
-------------
Output Added:
-------------
Name: ResponseProfile
Label: Response Profile
Template: Stat.Logistic.ResponseProfile
Path: Logistic.ResponseProfile
-------------
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
Output Added:
-------------
Name: ConvergenceStatus
Label: Convergence Status
Template: Stat.Logistic.MConvergenceStatus
Path: Logistic.ConvergenceStatus
-------------
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
Output Added:
-------------
Name: FitStatistics
Label: Fit Statistics
Template: Stat.Logistic.FitStatistics
Path: Logistic.FitStatistics
-------------
Output Added:
-------------
Name: GlobalTests
Label: Global Tests
Template: Stat.Logistic.GlobalTests
Path: Logistic.GlobalTests
-------------
Output Added:
-------------
Name: ParameterEstimates
Label: Parameter Estimates
Template: Stat.Logistic.ParameterEstimates
Path: Logistic.ParameterEstimates
-------------
Output Added:
-------------
Name: OddsRatios
Label: Odds Ratios
Template: Stat.Logistic.OddsRatios
Path: Logistic.OddsRatios
-------------
Output Added:
-------------
Name: Association
Label: Association Statistics
Template: Stat.Logistic.Association
Path: Logistic.Association
-------------
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.09 seconds
cpu time 0.08 seconds
277 ! /* Need run with ods trace */
278 ods trace off;
279
280 ods output ParameterEstimates = estimout;
281 /* The ParameterEstimates table will be written to a SAS data
282 set called estimout. */
283 proc logistic data=mathex;
284 title2 'Save parameter estimates using ods';
285 model passed (event='Yes') = c1 c3 totscore;
286
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: The data set WORK.ESTIMOUT has 4 observations and 7 variables.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.09 seconds
cpu time 0.09 seconds
287 proc print data=estimout;
288
NOTE: There were 4 observations read from the data set WORK.ESTIMOUT.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.02 seconds
cpu time 0.02 seconds
289 proc iml;
NOTE: IML Ready
290 title2 'Estimated Probabilty of Passing';
291 use estimout;
292 read all var {Estimate} into b;
293 print "Estimated regression coefficients";
294 print b;
295 /* Student in the catch-up class who got 10 right out of 20 */
296 x1 = {1, 1, 0, 10};
296 ! /* Rows are separated by commas */
297 pihat1 = exp(x1`*b)/(1+exp(x1`*b));
298 print "Student in the catch-up class who got 10 right out of 20" pihat1;
299 /* Student in the elite class who got all 20 right */
300 x2 = {1, 0, 1, 20};
300 ! /* Rows are separated by commas */
301 Pihat2 = exp(x2`*b)/(1+exp(x2`*b));
302 print "Student in the elite class who got 20 right out of 20" pihat2;
303
304
305
306 /********************** Output not shown ****************************/
307
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.02 seconds
cpu time 0.02 seconds
308 proc logistic data=mathex noprint;
309 title2 'Course controlling for score on diagnostic test';
310 class course / param=ref;
311 model passed (event='Yes') = course totscore;
312 contrast 'Course controlling for totscore' course 1 0,
313 course 0 1 / e;
314 /* The e option gives the "effect" matrix C in H0: C beta = 0 */
315
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.00 seconds
cpu time 0.01 seconds
316 proc logistic data=mathex noprint;
317 title2 'Course controlling for score on diagnostic test';
318 class course / param=ref;
319 model passed (event='Yes') = course totscore;
320 contrast 'Course controlling for totscore' course -19.7 0,
321 course 0 9;
322
323
324
325 run;
NOTE: PROC LOGISTIC is modeling the probability that passed='Yes'.
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.00 seconds
cpu time 0.01 seconds
326
327
328 /********************************************************************/
329
330
331
332
333 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
346