/***************** mathread2.sas: The 2018 version *************************/ /* Read exploratory data, create and test new variables */ title 'Prediction of Performance in First-year Calculus: Exploratory Sample'; proc format; value ynfmt 0 = 'No' 1 = 'Yes'; value crsfmt 1 = 'Catch-up' 2 = 'Mainstrm' 3 = 'Elite' 4 = 'No Resp'; value nfmt 1 = 'Asian' 2 = 'Eastern European' 3 = 'European not Eastern' 4 = 'Middle-Eastern and Pakistani' 5 = 'East Indian' 6 = 'Other and DK' ; data mathex; infile '/folders/myfolders/441s18/Lecture/exploremath.data.txt'; input id course precalc calc gpa calculus english mark lang $ sex $ nation1 nation2 sample; /****** Fix problems located in first run ******/ if course = 4 then course = .; /* No response is missing */ /* Missing HS marks were zeros */ if 60 le gpa le 100 then hsgpa = gpa; /* Else missing is automatic */ if 0 < calculus < 101 then hscalc = calculus; if 0 < english < 101 then hsengl = english; /* Some missing university calculus marks were zero, and 998=SDF and 999=WDR */ if mark=0 then grade=.; else if mark > 100 then grade=.; else grade=mark; /* There were just a few French speakers */ if lang='French' then tongue='Other '; else tongue=lang; label tongue = 'Mother Tongue (Eng or Other)'; /****** Create and check more new variables ******/ totscore = precalc+calc; /* Total score on diagnostic test */ diff = (100 * precalc/9) - (100 * calc/11); label diff = 'Percentage correct: Precalc minus calc'; if (50<=mark<=100) then passed=1; else passed=0; /* Rater 1 knows Middle Eastern names -- otherwise believe Rater 2 */ if nation1=4 then ethnic=nation1; else ethnic=nation2; if 0 le grade le 60 then gsplit='60orLess'; else if 60 lt grade le 100 then gsplit='Over60'; /* Got median=60 from proc univariate */ /* Dummy variables for ethnic background: Only use 2 of these if the model has an intercept */ if ethnic=. then e1=.; else if ethnic=1 then e1=1; else e1=0; if ethnic=. then e2=.; else if ethnic=2 then e2=1; else e2=0; if ethnic=. then e3=.; else if ethnic=3 then e3=1; else e3=0; if ethnic=. then e4=.; else if ethnic=4 then e4=1; else e4=0; if ethnic=. then e5=.; else if ethnic=5 then e5=1; else e5=0; if ethnic=. then e6=.; else if ethnic=6 then e6=1; else e6=0; label e1 = 'Asian' e2 = 'East Eur.' e3 = 'Other Eur.' e4 = 'Mid. East & Pak.' e5 = 'East Ind.' e6 = 'Other/DK'; /* gender is a dummy variable for sex */ if sex = 'Female' then gender=1; else if sex = 'Male' then gender=0; /* mtongue is a dummy variable for tongue */ if tongue = 'English' then mtongue=1; else if tongue='Other' then mtongue=0; label mtongue = 'English vs. Other'; /* Course: Only use 2 of these if the model has an intercept */ if course=. then c1=.; else if course=1 then c1=1; else c1=0; if course=. then c2=.; else if course=2 then c2=1; else c2=0; if course=. then c3=.; else if course=3 then c3=1; else c3=0; label c1 = 'Catch-up' c2 = 'Mainstream' c3 = 'Elite'; format course crsfmt.; format nation1 nation2 ethnic nfmt.; format passed ynfmt.; label precalc = 'Number precalculus correct' calc = 'Number calculus correct' totscore = 'Total # right on diagnostic test' passed = 'Passed the course' grade = 'Final mark (if any)' hsgpa = 'High School GPA' hscalc = 'HS Calculus' hsengl = 'HS English' lang = 'Mother Tongue' nation1 = 'Nationality of name acc to rater1' nation2 = 'Nationality of name acc to rater2' tongue = 'Mother Tongue (Eng or Other)' ethnic = 'Judged Nationality of name' gsplit = 'Median split on final grade' ; /*********** Variables added to mathread1.sas ***********/ if hsgpa+hscalc+precalc = . then missused = 1 ; else missused = 0; format missused ynfmt.; label missused = 'Any of hsgpa hscalc precalc missing'; /* Outcome: Passed-Failed-Gone */ if (0<=mark<=49) then outcome = 'Fail'; else if (50<=mark<=100) then outcome = 'Pass'; else outcome = 'Gone';