1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
72
73 /* multilogit.sas */
74 title2 'Multinomial Logit Model';
75 %include '/folders/myfolders/441s18/Lecture/mathread2.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
196 /* mathread2.sas creates missused (any of hsgpa hscalc precalc missing)
197 and outcome (Pass-Fail-Gone)*/
198
NOTE: The infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt' is:
Filename=/folders/myfolders/441s18/Lecture/exploremath.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=January 18, 2016 17:34:49,
File Size (bytes)=44583
NOTE: 579 records were read from the infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt'.
The minimum record length was 75.
The maximum record length was 75.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
99 at 112:24 99 at 113:18 142 at 183:14 204 at 183:21
NOTE: The data set WORK.MATHEX has 579 observations and 36 variables.
NOTE: DATA statement used (Total process time):
real time 0.02 seconds
cpu time 0.01 seconds
199 proc freq data=mathex;
200 title3 'Outcome by passed just to check';
201 tables outcome*passed
202 / norow nocol nopercent missing;
203 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.10 seconds
cpu time 0.09 seconds
204
205 proc freq data=mathex;
206 title3 'Missused by Outcome with proc freq';
207 tables missused * outcome / nocol nopercent chisq;
208 run;
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.08 seconds
cpu time 0.07 seconds
209
210 /* Multinomial Logit model for missused by outcome is
211
212 ln(pi1/pi3) = beta01 + beta11 x Fail vs. Pass
213 ln(pi2/pi3) = beta02 + beta12 x Gone vs. Pass
214 */
215
216
217 proc logistic data=mathex outest=ParmNames;
218 title3 'Misssused by Outcome with proc logistic';
219 model outcome (ref='Pass') = missused / link = glogit;
220 contrast 'Hsgpa, hscalc or precalc missing' missused 1;
221 run;
NOTE: PROC LOGISTIC is fitting the generalized logit model. The logits modeled contrast each response category against the
reference category (outcome='Pass').
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: The data set WORK.PARMNAMES has 1 observations and 10 variables.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.18 seconds
cpu time 0.16 seconds
222
223 /* Find out the parameter names, written with outest. */
224
225 proc transpose data=ParmNames;
226 run;
NOTE: There were 1 observations read from the data set WORK.PARMNAMES.
NOTE: The data set WORK.DATA2 has 5 observations and 3 variables.
NOTE: PROCEDURE TRANSPOSE used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
227 proc print;
228 run;
NOTE: There were 5 observations read from the data set WORK.DATA2.
NOTE: PROCEDURE PRINT used (Total process time):
real time 0.04 seconds
cpu time 0.04 seconds
229
230 proc logistic data = mathex;
231 title3 'Contrast versus test';
232 model outcome (ref='Pass') = missused / link = glogit;
233 contrast 'Missused method 1' missused 1;
234 MissusedMethod2: test missused_Fail = missused_Gone = 0;
235 run;
NOTE: PROC LOGISTIC is fitting the generalized logit model. The logits modeled contrast each response category against the
reference category (outcome='Pass').
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.19 seconds
cpu time 0.18 seconds
236
237 proc iml;
NOTE: IML Ready
238 title3 'Estimate Probabilities using output from proc logistic';
239 b01 = -1.4123;
239 ! b11 = 0.6461;
239 ! /* Fail */
240 b02 = -1.0245;
240 ! b12 = 1.3670;
240 ! /* Gone */
241 missused = 0;
242 L1 = b01 + b11*missused;
243 L2 = b02 + b12*missused;
244 denom = 1 + exp(L1) + exp(L2);
245 Fail = exp(L1)/denom;
245 ! Gone = exp(L2)/denom;
245 ! Pass = 1/denom;
246 print "Not missing:" Fail Gone Pass;
247 missused = 1;
248 L1 = b01 + b11*missused;
249 L2 = b02 + b12*missused;
250 denom = 1 + exp(L1) + exp(L2);
251 Fail = exp(L1)/denom;
251 ! Gone = exp(L2)/denom;
251 ! Pass = 1/denom;
252 print "Yes Missing:" Fail Gone Pass;
253
NOTE: Exiting IML.
NOTE: PROCEDURE IML used (Total process time):
real time 0.08 seconds
cpu time 0.07 seconds
254 proc freq data = mathex;
255 title3 'Missused by outcome again for comparison';
256 tables missused * outcome / nocol nopercent;
257
258 /* Lots of exploration is not shown here. */
259
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.09 seconds
cpu time 0.08 seconds
260 proc logistic data = mathex;
261 title3 'hsgpa hscalc precalc mtongue';
262 model outcome (ref='Pass') =
263 hsgpa hscalc precalc mtongue / link = glogit;
264 run;
NOTE: PROC LOGISTIC is fitting the generalized logit model. The logits modeled contrast each response category against the
reference category (outcome='Pass').
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.20 seconds
cpu time 0.19 seconds
265
266 /* Allowing for academic background, students whose first language is English
267 are more likely to fail the course as opposed to passing, and less likely to
268 disappear as opposed to passing. If this is replicated, it will be very
269 interesting. Explore further.
270
271 We want to know whether failing is different from disappearing in terms
272 of their relationship to the explanatory variables. We are getting
273 advanced here. What is H0?
274
275 Recall the response categories are 1=Fail 2=Gone 3=Pass.
276
277 Model (using b instead of beta) is
278
279 ln(pi1/pi3) = b01 + b11 hsgpa + b21 hscalc + b31 precalc + b41 mtongue
280 ln(pi2/pi3) = b02 + b12 hsgpa + b22 hscalc + b32 precalc + b42 mtongue
281
282 The null hypothesis is b11=b12, b21=b22, b31=b32, b41=b42
283
284 Parameter names are easy to guess. */
285
286 proc logistic data = mathex;
287 title3 'Different coefficients for Gone and Fail?';
288 model outcome (ref='Pass') = hsgpa hscalc precalc mtongue / link = glogit;
289 DiffOverall: test hsgpa_Fail = hsgpa_Gone, hscalc_Fail = hscalc_Gone,
290 precalc_Fail = precalc_Gone, mtongue_Fail = mtongue_Gone;
291 Diff_hsgpa: test hsgpa_Fail = hsgpa_Gone;
292 Diff_hscalc: test hscalc_Fail = hscalc_Gone;
293 Diff_precalc: test precalc_Fail = precalc_Gone;
294 Diff_mtongue: test mtongue_Fail = mtongue_Gone;
295 run;
NOTE: PROC LOGISTIC is fitting the generalized logit model. The logits modeled contrast each response category against the
reference category (outcome='Pass').
NOTE: Convergence criterion (GCONV=1E-8) satisfied.
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE LOGISTIC used (Total process time):
real time 0.24 seconds
cpu time 0.24 seconds
296
297 /************************** Replication ***********************
298 For interpretation, want to replicate 8 findings:
299 Gone vs. Pass and Fail vs. Pass for each explanatory variable.
300 ***************************************************************/
301
302 %include '/folders/myfolders/441s18/Lecture/readreplic.sas';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
416 if (0<=mark<=49) then outcome = 'Fail';
417 else if (50<=mark<=100) then outcome = 'Pass';
418 else outcome = 'Gone';
419
NOTE: The infile '/folders/myfolders/441s18/Lecture/replicmath.data.txt' is:
Filename=/folders/myfolders/441s18/Lecture/replicmath.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=February 04, 2018 14:39:27,
File Size (bytes)=38214
NOTE: 579 records were read from the infile '/folders/myfolders/441s18/Lecture/replicmath.data.txt'.
The minimum record length was 64.
The maximum record length was 64.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
81 at 339:24 81 at 340:18
NOTE: The data set WORK.MATHREP has 579 observations and 35 variables.
NOTE: DATA statement used (Total process time):
real time 0.02 seconds
cpu time 0.01 seconds
420 proc logistic data = mathrep;
421 title2 'Replicate hsgpa hscalc precalc calc mtongue 0.05/8 = .00625';
422 model outcome (ref='Pass') = hsgpa hscalc precalc mtongue / link = glogit;
423 Diff_mtongue: test mtongue_Fail = mtongue_Gone;
424
425
426
427 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
440