1 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
61
62 /************************ math0.3.sas *************************/
63 title 'Prediction of Performance in First-year Calculus';
64 title2 'Read exploratory data, create and test new variables';
65
66 proc format;
67 value ynfmt 0 = 'No' 1 = 'Yes';
NOTE: Format YNFMT is already on the library WORK.FORMATS.
NOTE: Format YNFMT has been output.
68 value crsfmt 1 = 'Catch-up' 2 = 'Mainstrm' 3 = 'Elite' 4 = 'No Resp';
NOTE: Format CRSFMT is already on the library WORK.FORMATS.
NOTE: Format CRSFMT has been output.
69 value nfmt
70 1 = 'Asian'
71 2 = 'Eastern European'
72 3 = 'European not Eastern'
73 4 = 'Middle-Eastern and Pakistani'
74 5 = 'East Indian'
75 6 = 'Other and DK' ;
NOTE: Format NFMT is already on the library WORK.FORMATS.
NOTE: Format NFMT has been output.
76
NOTE: PROCEDURE FORMAT used (Total process time):
real time 0.00 seconds
cpu time 0.00 seconds
77 data mathex;
78 infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt';
79 input id course precalc calc gpa calculus english mark lang $ sex $
80 nation1 nation2 sample;
81 /****** Fix problems located in first run ******/
82 if course = 4 then course = .; /* No response is missing */
83 /* Missing HS marks were zeros */
84 if 60 le gpa le 100 then hsgpa = gpa; /* Else missing is automatic */
85 if 0 < calculus < 101 then hscalc = calculus;
86 if 0 < english < 101 then hsengl = english;
87 /* Some missing university calculus marks were zero,
88 and 998=SDF and 999=WDR */
89 if mark=0 then grade=.;
90 else if mark > 100 then grade=.;
91 else grade=mark;
92 /* There were just a few French speakers */
93 if lang='French' then tongue='Other '; else tongue=lang;
94 label tongue = 'Mother Tongue (Eng or Other)';
95
96 /****** Create and check more new variables ******/
97 totscore = precalc+calc; /* Total score on diagnostic test */
98 if (50<=mark<=100) then passed=1; else passed=0;
99 /* Rater 1 knows Middle Eastern names -- otherwise believe Rater 2 */
100 if nation1=4 then ethnic=nation1; else ethnic=nation2;
101 if 0 le grade le 60 then gsplit='60orLess';
102 else if 60 lt grade le 100 then gsplit='Over60';
103 /* Got median=60 from proc univariate */
104 /* Dummy variables for ethnic background:
105 Only use 2 of these if the model has an intercept */
106 if ethnic=. then e1=.;
107 else if ethnic=1 then e1=1;
108 else e1=0;
109 if ethnic=. then e2=.;
110 else if ethnic=2 then e2=1;
111 else e2=0;
112 if ethnic=. then e3=.;
113 else if ethnic=3 then e3=1;
114 else e3=0;
115 if ethnic=. then e4=.;
116 else if ethnic=4 then e4=1;
117 else e4=0;
118 if ethnic=. then e5=.;
119 else if ethnic=5 then e5=1;
120 else e5=0;
121 if ethnic=. then e6=.;
122 else if ethnic=6 then e6=1;
123 else e6=0;
124 label e1 = 'Asian'
125 e2 = 'East Eur.'
126 e3 = 'Other Eur.'
127 e4 = 'Mid. East & Pak.'
128 e5 = 'East Ind.'
129 e6 = 'Other/DK';
130 /* gender is a dummy variable for sex */
131 if sex = 'Female' then gender=1; else if sex = 'Male' then gender=0;
132 /* Course: Only use 2 of these if the model has an intercept */
133 if course=. then c1=.; else if course=1 then c1=1; else c1=0;
134 if course=. then c2=.; else if course=2 then c2=1; else c2=0;
135 if course=. then c3=.; else if course=3 then c3=1; else c3=0;
136 label c1 = 'Catch-up' c2 = 'Mainstream' c3 = 'Elite';
137
138
139
140
141 format course crsfmt.;
142 format nation1 nation2 ethnic nfmt.;
143 format passed ynfmt.;
144
145 label
146 precalc = 'Number precalculus correct'
147 calc = 'Number calculus correct'
148 totscore = 'Total # right on diagnostic test'
149 passed = 'Passed the course'
150 grade = 'Final mark (if any)'
151 hsgpa = 'High School GPA'
152 hscalc = 'HS Calculus'
153 hsengl = 'HS English'
154 lang = 'Mother Tongue'
155 nation1 = 'Nationality of name acc to rater1'
156 nation2 = 'Nationality of name acc to rater2'
157 tongue = 'Mother Tongue (Eng or Other)'
158 ethnic = 'Judged Nationality of name'
159 gsplit = 'Median split on final grade'
160 ;
161
NOTE: The infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt' is:
Filename=/folders/myfolders/441s18/Lecture/exploremath.data.txt,
Owner Name=root,Group Name=vboxsf,
Access Permission=-rwxrwx---,
Last Modified=January 18, 2016 17:34:49 o'clock,
File Size (bytes)=44583
NOTE: 579 records were read from the infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt'.
The minimum record length was 75.
The maximum record length was 75.
NOTE: Missing values were generated as a result of performing an operation on missing values.
Each place is given by: (Number of times) at (Line):(Column).
99 at 97:24
NOTE: The data set WORK.MATHEX has 579 observations and 32 variables.
NOTE: DATA statement used (Total process time):
real time 0.01 seconds
cpu time 0.01 seconds
162 proc freq data=mathex;
163 title3 'Check created variables';
164 tables lang * tongue / norow nocol nopercent missing;
165 tables grade * (passed gsplit) / norow nocol nopercent missing;
166 tables (nation1 nation2) * ethnic / norow nocol nopercent missing;
167 tables ethnic * (e1-e6) / norow nocol nopercent missing;
168 tables sex * gender / norow nocol nopercent missing;
169 tables (c1-c3) * course / norow nocol nopercent missing;
170
171
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE FREQ used (Total process time):
real time 1.14 seconds
cpu time 1.12 seconds
172 proc freq data=mathex;
173 title3 'Frequency distributions of variables to be used';
174 tables course sex nation1 nation2 calc precalc
175 hsgpa -- ethnic;
176 /* Note hsgpa -- ethnic = hsgpa hscalc hsengl
177 grade tongue totscore passed ethnic */
178
179
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.83 seconds
cpu time 0.82 seconds
180 proc freq;
181 title3 'Frequency distributions of categorical variables';
182 tables course sex ethnic passed / missing;
183
NOTE: There were 579 observations read from the data set WORK.MATHEX.
NOTE: PROCEDURE FREQ used (Total process time):
real time 0.10 seconds
cpu time 0.09 seconds
184 proc means;
185 title3 'Quantitative Variables';
186 var hsgpa hscalc hsengl calc precalc totscore grade;
187
188
189
190
191 OPTIONS NONOTES NOSTIMER NOSOURCE NOSYNTAXCHECK;
204