/* readreplic.sas Just read the exploratory math data and do basic transformations. This version is based on readmath2c.sas. IT READS FROM replicmath2.data, WHICH HAS ID VALUES DIFFERENT FROM THOSE IN THE EXPLORATORY SAMPLE! */ title 'Prediction of Performance in First-year Calculus'; proc format; value ynfmt 0 = 'No' 1 = 'Yes'; value crsfmt 1 = 'Catch-up' 2 = 'Mainstrm' 3 = 'Elite' 4 = 'No Resp'; value nfmt 1 = 'Asian' 2 = 'Eastern European' 3 = 'European not Eastern' 4 = 'Middle-Eastern and Pakistani' 5 = 'East Indian' 6 = 'Other and DK' ; data replic; infile '/home/brunner0/441s20/replicmath.data.txt'; input id course precalc calc gpa calculus english mark lang $ sex $ nation1 nation2 sample; /* Computed Variables: totscore, passed, grade, hsgpa, hscalc, hsengl, tongue, ethnic */ totscore = precalc+calc; if (50<=mark<=100) then passed=1; else passed=0; if 0 100 then grade=.; else grade=mark; /* Missing HS marks were zeros */ if 65 le gpa le 100 then hsgpa = gpa; /* Else missing is automatic */ if 0 < calculus < 101 then hscalc = calculus; if 0 < english < 101 then hsengl = english; /* There were just a few French speakers */ if lang='French' then tongue='Other '; else tongue=lang; label tongue = 'Mother Tongue (Eng or Other)'; /* Rater 1 knows Middle Eastern names -- otherwise believe Rater 2 */ if nation1=4 then ethnic=nation1; else ethnic=nation2; /********************************************************************/ label precalc = 'Number precalculus correct' calc = 'Number calculus correct' totscore = 'Total # right on diagnostic test' passed = 'Passed the course' grade = 'Final mark (if any)' hsgpa = 'High School GPA' hscalc = 'HS Calculus' hsengl = 'HS English' lang = 'Mother Tongue' nation1 = 'Nationality of name acc to rater1' nation2 = 'Nationality of name acc to rater2' tongue = 'Mother Tongue (Eng or Other)' ethnic = 'Judged Nationality of name'; diff = (100 * precalc/9) - (100 * calc/11); label diff = 'Percentage correct: Precalc minus calc'; /* And a couple more useful variables */ if course=4 then course2=.; else course2=course; /* Eliminate 'No Resp' */ if 0 le grade le 60 then gsplit='60orLess'; else if 60 lt grade le 100 then gsplit='Over60'; /* Got median=60 from proc univariate */ label gsplit = 'Median split on final grade'; format course course2 crsfmt.; format passed ynfmt.; format nation1 nation2 ethnic nfmt.; /* Dummy variables for ethnic background */ if ethnic=. then e1=.; else if ethnic=1 then e1=1; else e1=0; if ethnic=. then e2=.; else if ethnic=2 then e2=1; else e2=0; if ethnic=. then e3=.; else if ethnic=3 then e3=1; else e3=0; if ethnic=. then e4=.; else if ethnic=4 then e4=1; else e4=0; if ethnic=. then e6=.; else if ethnic=6 then e6=1; else e6=0; label e1 = 'Asian vs East Ind.' e2 = 'East Eur. vs East Ind.' e3 = 'Other Eur. vs East Ind.' e4 = 'Mid. East & Pak. vs East Ind.' e6 = 'Other/DK vs East Ind.'; if sex = 'Female' then gender=1; else if sex = 'Male' then gender=0; if tongue = 'English' then mtongue=1; else if tongue='Other' then mtongue=0; label mtongue = 'English vs. Other'; /* Only use 2 of these if the model has an intercept! */ if course2=. then c1=.; else if course2=1 then c1=1; else c1=0; if course2=. then c2=.; else if course2=2 then c2=1; else c2=0; if course2=. then c3=.; else if course2=3 then c3=1; else c3=0; label c1 = 'Catch-up' c2 = 'Mainstream' c3 = 'Elite';