sasfiles.txt These are the sas programs from STA442/1008, UTM Second term 2002. The programs are in aphabetical order. They include all the examples from the online class notes, plus a few more. You may want to save this file to a floppy disk and keep it. appdwaine1.sas appkenton1.sas mcars.sas sampvar2b.sas testor.sas appdwaine2.sas appkenton2.sas popvar.sas senic0.1.sas appgreen1.sas basicsenic.sas reading.sas senic0.sas appgreen2.sas gh91bread.sas sampvar1.sas senicread.sas appgreen3.sas gh91read.sas sampvar2.sas statmarks.sas ---------------------------------------------------------- /* appdwaine1.sas */ options linesize=79 noovp formdlim=' '; title 'Dwaine Studios Example from Chapter 6 (Section 6.9) of Neter et al'; title2 'Just the defaults'; data portrait; infile 'dwaine.dat'; input kids income sales; proc reg; model sales = kids income; /* model DV(s) = IV(s); */ /* appdwaine2.sas */ options linesize=79 pagesize=35 noovp formdlim=' '; title 'Dwaine Studios Example from Chapter 6 (Section 6.9) of Neter et al'; title2 'With bells and whistles'; data portrait; infile 'dwaine.dat'; input kids income sales; proc reg simple corr; /* "simple" prints simple descriptive statistics */ model sales = kids income / ss1; /* "ss1" prints Sequential SS */ output out=resdata predicted=presale residual=resale; /* Creates new SAS data set with Y-hat and e as additional variables*/ /* Now all the default F-test, in order */ allivs: test kids = 0, income = 0; inter: test intercept=0; child: test kids=0; money: test income=0; proc iml; /* Income controlling for kids: Full vs reduced by "hand" */ fcrit = finv(.95,1,18); print fcrit; /* Had to look at printout from an earlier run to get these numbers*/ f = 643.475809 / 121.16263; /* Using the first F formula */ pval = 1-probf(f,1,18); tsq = 2.305**2; /* t-squared should equal F*/ a = 643.475809/(26196.20952 - 23372); print f tsq pval; print "Proportion of remaining variation is " a; proc glm; /* Use proc glm to get a y-hat more easily */ model sales=kids income; estimate 'Xh p249' intercept 1 kids 65.4 income 17.6; proc print; /* To see the new data set with residuals*/ proc univariate normal plot; var resale; proc plot; plot resale * (kids income sales); /* appgreen1.sas */ %include 'gh91read.sas'; options pagesize=100; proc freq; tables plant*mcg /norow nocol nopercent; proc glm; class plant mcg; model meanlng = plant|mcg; means plant|mcg; proc tabulate; class mcg plant; var meanlng ; table (mcg all),(plant all) * (mean*meanlng); /* Replicate tests for main effects and interactions, using contrasts on a combination variable. This is the hard way to do it, but if you can do this, you understand interactions and you can test any collection of contrasts. The definition of the variable combo could have been in gh91read.sas */ data slime; set mould; /* mould was created by ghread91.sas */ if plant=1 and mcg=1 then combo = 1; else if plant=1 and mcg=2 then combo = 2; else if plant=1 and mcg=3 then combo = 3; else if plant=1 and mcg=7 then combo = 4; else if plant=1 and mcg=8 then combo = 5; else if plant=1 and mcg=9 then combo = 6; else if plant=2 and mcg=1 then combo = 7; else if plant=2 and mcg=2 then combo = 8; else if plant=2 and mcg=3 then combo = 9; else if plant=2 and mcg=7 then combo = 10; else if plant=2 and mcg=8 then combo = 11; else if plant=2 and mcg=9 then combo = 12; else if plant=3 and mcg=1 then combo = 13; else if plant=3 and mcg=2 then combo = 14; else if plant=3 and mcg=3 then combo = 15; else if plant=3 and mcg=7 then combo = 16; else if plant=3 and mcg=8 then combo = 17; else if plant=3 and mcg=9 then combo = 18; label combo = 'Plant-MCG Combo'; /* Getting main effects and the interaction with CONTRAST statements */ proc glm; class combo; model meanlng = combo; contrast 'Plant Main Effect' combo 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1 0 0 0 0 0 0, combo 0 0 0 0 0 0 1 1 1 1 1 1 -1 -1 -1 -1 -1 -1; contrast 'MCG Main Effect' combo 1 -1 0 0 0 0 1 -1 0 0 0 0 1 -1 0 0 0 0, combo 0 1 -1 0 0 0 0 1 -1 0 0 0 0 1 -1 0 0 0, combo 0 0 1 -1 0 0 0 0 1 -1 0 0 0 0 1 -1 0 0, combo 0 0 0 1 -1 0 0 0 0 1 -1 0 0 0 0 1 -1 0, combo 0 0 0 0 1 -1 0 0 0 0 1 -1 0 0 0 0 1 -1; contrast 'Plant by MCG Interaction' combo -1 1 0 0 0 0 1 -1 0 0 0 0 0 0 0 0 0 0, combo 0 0 0 0 0 0 -1 1 0 0 0 0 1 -1 0 0 0 0, combo 0 -1 1 0 0 0 0 1 -1 0 0 0 0 0 0 0 0 0, combo 0 0 0 0 0 0 0 -1 1 0 0 0 0 1 -1 0 0 0, combo 0 0 -1 1 0 0 0 0 1 -1 0 0 0 0 0 0 0 0, combo 0 0 0 0 0 0 0 0 -1 1 0 0 0 0 1 -1 0 0, combo 0 0 0 -1 1 0 0 0 0 1 -1 0 0 0 0 0 0 0, combo 0 0 0 0 0 0 0 0 0 -1 1 0 0 0 0 1 -1 0, combo 0 0 0 0 -1 1 0 0 0 0 1 -1 0 0 0 0 0 0, combo 0 0 0 0 0 0 0 0 0 0 -1 1 0 0 0 0 1 -1; /* proc reg's test statement may be easier, but first we need to make 16 dummy variables for cell means coding. This will illustrate arrays and loops, too */ data yucky; set slime; array mu{18} mu1-mu18; do i=1 to 18; if combo=. then mu{i}=.; else if combo=i then mu{i}=1; else mu{i}=0; end; proc reg; model meanlng = mu1-mu18 / noint; alleq: test mu1=mu2=mu3=mu4=mu5=mu6=mu7=mu8=mu9=mu10=mu11=mu12 = mu13=mu14=mu15=mu16=mu17=mu18; plant: test mu1+mu2+mu3+mu4+mu5+mu6 = mu7+mu8+mu9+mu10+mu11+mu12, mu7+mu8+mu9+mu10+mu11+mu12 = mu13+mu14+mu15+mu16+mu17+mu18; fungus: test mu1+mu7+mu13 = mu2+mu8+mu14 = mu3+mu9+mu15 = mu4+mu10+mu16 = mu5+mu11+mu17 = mu6+mu12+mu18; p_by_f: test mu2-mu1=mu8-mu7=mu14-mu13, mu3-mu2=mu9-mu8=mu15-mu14, mu4-mu3=mu10-mu9=mu16-mu15, mu5-mu4=mu11-mu10=mu17-mu16, mu6-mu5=mu12-mu11=mu18-mu17; /* Now illustrate effect coding, with the interaction represented by a collection of product terms. */ data nasty; set yucky; /* Two dummy variables for plant */ if plant=. then p1=.; else if plant=1 then p1=1; else if plant=3 then p1=-1; else p1=0; if plant=. then p2=.; else if plant=2 then p2=1; else if plant=3 then p2=-1; else p2=0; /* Five dummy variables for mcg */ if mcg=. then f1=.; else if mcg=1 then f1=1; else if mcg=9 then f1=-1; else f1=0; if mcg=. then f2=.; else if mcg=2 then f2=1; else if mcg=9 then f2=-1; else f2=0; if mcg=. then f3=.; else if mcg=3 then f3=1; else if mcg=9 then f3=-1; else f3=0; if mcg=. then f4=.; else if mcg=7 then f4=1; else if mcg=9 then f4=-1; else f4=0; if mcg=. then f5=.; else if mcg=8 then f5=1; else if mcg=9 then f5=-1; else f5=0; /* Product terms for interactions */ p1f1 = p1*f1; p1f2=p1*f2 ; p1f3=p1*f3 ; p1f4=p1*f4; p1f5=p1*f5; p2f1 = p2*f1; p2f2=p2*f2 ; p2f3=p2*f3 ; p2f4=p2*f4; p2f5=p2*f5; proc reg; model meanlng = p1 -- p2f5; plant: test p1=p2=0; mcg: test f1=f2=f3=f4=f5=0; p_by_f: test p1f1=p1f2=p1f3=p1f4=p1f5=p2f1=p2f2=p2f3=p2f4=p2f5 = 0; /* appgreen2.sas: */ %include 'gh91bread.sas'; options pagesize=100; proc glm; title 'Repeating initial Plant by MCG ANOVA, full design'; class plant mcg; model meanlng = plant|mcg; means plant|mcg; /* A. Pairwise comparisons of marginal means for plant, full design B. Test all GP159 means equal, full design C. Test profiles for Hanna & Westar parallel, full design */ proc reg; model meanlng = mu1-mu18 / noint; A_GvsH: test mu1+mu2+mu3+mu4+mu5+mu6 = mu7+mu8+mu9+mu10+mu11+mu12; A_GvsW: test mu1+mu2+mu3+mu4+mu5+mu6 = mu13+mu14+mu15+mu16+mu17+mu18; A_HvsW: test mu7+mu8+mu9+mu10+mu11+mu12 = mu13+mu14+mu15+mu16+mu17+mu18; B_G159eq: test mu1=mu2=mu3=mu4=mu5=mu6; C_HWpar: test mu8-mu7=mu14-mu13, mu9-mu8=mu15-mu14, mu10-mu9=mu16-mu15, mu11-mu10=mu17-mu16, mu12-mu11=mu18-mu17; /* D. Oneway on mcg, GP158 subset */ data just159; /* This data set will have just GP159 */ set mould; if plant=1; proc glm data=just159; title 'D. Oneway on mcg, GP158 subset'; class mcg; model meanlng = mcg; /* E. Plant by MCG, Hanna-Westar subset */ data hanstar; /* This data set will have just Hanna and Westar */ set mould; if plant ne 1; proc glm data=hanstar; title 'E. Plant by MCG, Hanna-Westar subset'; class plant mcg; model meanlng = plant|mcg; /* F. Plant by MCG followup, Hanna-Westar subset Interaction: Follow with all pairwise differences of Westar minus Hanna differences G. Differences within Hanna? H. Differences within Westar? */ proc reg data=hanstar; model meanlng = mu7-mu18 / noint; F_inter: test mu13-mu7=mu14-mu8=mu15-mu9 = mu16-mu10=mu17-mu11=mu18-mu12; F_1vs2: test mu13-mu7=mu14-mu8; F_1vs3: test mu13-mu7=mu15-mu9; F_1vs7: test mu13-mu7=mu16-mu10; F_1vs8: test mu13-mu7=mu17-mu11; F_1vs9: test mu13-mu7=mu18-mu12; F_2vs3: test mu14-mu8=mu15-mu9; F_2vs7: test mu14-mu8=mu16-mu10; F_2vs8: test mu14-mu8=mu17-mu11; F_2vs9: test mu14-mu8=mu18-mu12; F_3vs7: test mu15-mu9=mu16-mu10; F_3vs8: test mu15-mu9=mu17-mu11; F_3vs9: test mu15-mu9=mu18-mu12; F_7vs8: test mu16-mu10=mu17-mu11; F_7vs9: test mu16-mu10=mu18-mu12; F_8vs9: test mu17-mu11=mu18-mu12; G_Hanaeq: test mu7=mu8=mu9=mu10=mu11=mu12; H_Westeq: test mu13=mu14=mu15=mu16=mu17=mu18; proc glm data=hanstar; class combo; model meanlng = combo; means combo / scheffe; proc iml; /* Critical values for Scheffe tests */ interac = finv(.95,5,60) ; print interac; oneway = finv(.95,11,60); print oneway; /* Limit discussion to just the Hanna-Westar subset. We've done Overall test comparing the 12 means PLANT MCG PLANT*MCG 15 pairwise comparisons of the Hanna-Westar difference One comparison of the 6 means for Hanna One comparison of the 6 means for Westar /* appgreen3.sas */ %include 'gh91read.sas'; options pagesize=100; proc mixed; class plant mcg; model meanlng=; random plant|mcg; /********************** appkenton1.sas **************************/ options linesize=79 pagesize=35 noovp formdlim=' '; title 'Kenton Oneway Example From Neter et al.'; proc format; value pakfmt 1 = '3Colour Cartoon' 2 = '3Col No Cartoon' 3 = '5Colour Cartoon' 4 = '5Col No Cartoon'; data food; infile 'kenton.dat'; input package sales; label package = 'Package Design' sales = 'Number of Cases Sold'; format package pakfmt.; /* Define ncolours and cartoon */ if package = 1 or package = 2 then ncolours = 3; else if package = 3 or package = 4 then ncolours = 5; if package = 1 or package = 3 then cartoon = 'No '; else if package = 2 or package = 4 then cartoon = 'Yes'; /* Indicator Coding for package: Use p4 only if no intercept */ if package = . then p1 = .; else if package = 1 then p1 = 1; else p1 = 0; if package = . then p2 = .; else if package = 2 then p2 = 1; else p2 = 0; if package = . then p3 = .; else if package = 3 then p3 = 1; else p3 = 0; if package = . then p4 = .; else if package = 4 then p4 = 1; else p4 = 0; /* Basic one-way ANOVA -- well, fairly basic */ proc glm; class package; model sales = package; means package; means package / bon tukey scheffe; estimate '3Colourvs5Colour' package 1 1 -1 -1 / divisor = 2; /* Now oneway using proc reg and dummy variables. First with intercept */ proc reg; model sales = p1 p2 p3; ncolour: test p1+p2 = p3; /* 3 vs 5 colours */ /* Special tests are easier with cell means coding: No intercept => No algebra */ proc reg; model sales = p1 p2 p3 p4 / noint; alleq: test p1=p2=p3=p4; numcol: test p1+p2 = p3+p4; cartoon: test p1+p3 = p2+p4; inter1: test p1-p2 = p3-p4; /* Effect of cartoon depends on ncolours */ inter2: test p1-p3 = p2-p4; /* Effect of ncolours depends on cartoon */ Y3_N3: test p1=p2; /* All pairwise tests */ Y3_Y5: test p1=p3; Y3_N5: test p1=p4; N3_Y5: test p2=p3; N3_N5: test p2=p4; Y5_N5: test p3=p4; /* Actually it's a two-way ANOVA */ proc glm; class ncolours cartoon; model sales = ncolours|cartoon; /* The model statement could have been model sales = ncolours cartoon ncolours*cartoon; */ /* Commented Out */ /* Checking the new variables */ proc freq; tables package * (ncolours cartoon p1-p4) / missprint nocol norow nopercent ; proc iml; /* Critical value for Scheffe tests */ fcrit = finv(.95,3,15); print fcrit; /* Single contrasts are just as convenient with the ESTIMATE statement of proc glm. Illustrate all pairwise. Note F = t-squared */ proc glm; class package; model sales=package; estimate 'Y3_N3' package 1 -1 0 0; estimate 'Y3_Y5' package 1 0 -1 0; estimate 'Y3_N5' package 1 0 0 -1; estimate 'N3_Y5' package 0 1 -1 0; estimate 'N3_N5' package 0 1 0 -1; estimate 'Y5_N5' package 0 0 1 -1; /********** Comments ************ Notice the if package = 1 or package = 3 then cartoon = 'No '; extra space Divisor on estimate does not affect the tests. I did it so ESTIMATE would really produce an estimate With no intercept, total sum of squares is no longer corrected - r^2 radically affected overall test is for whether ALL the betas are zero - usually uninteresting /********************** kenton2.sas **************************/ options linesize=79 pagesize=35 noovp formdlim=' '; title 'STA 320F98 Kenton HW'; data food; infile 'kenton.dat'; input package sales; label package = 'Package Design' sales = 'Number of Cases Sold'; if package eq 1 then p1 = 1 ; else if package = 4 then p1 = -1; else p1 = 0; if package eq 2 then p2 = 1 ; else if package = 4 then p2 = -1; else p2 = 0; if package eq 3 then p3 = 1 ; else if package = 4 then p3 = -1; else p3 = 0; proc reg; model sales = p1 p2 p3; /******************* basicsenic.sas ****************/ /* Basic stats on SENIC Data */ /***************************************************/ %include 'senicread.sas'; /* senicread.sas reads data, etc. */ proc univariate plot normal ; /* Plots and a test for normality */ title2 'Describe Quantitative Variables'; var stay -- nbeds census nurses service; /* single dash only works with numbered lists, like item1-item50 */ proc freq; title2 'Frequency distributions of categorical variables'; tables medschl region agecat; proc chart; title2 'Vertical bar charts'; vbar region medschl agecat /discrete ; proc chart ; title2 'Pie chart'; pie region/type=freq; proc chart; title2 'Pseudo 3-d chart - just playing around'; block region / sumvar=infrisk type=mean group=medschl discrete; /* Now elementary tests */ proc freq; /* use freq to do crosstabs */ tables region*medschl / nocol nopercent expected chisq; proc ttest; class medschl; var infrisk age ; proc glm; /* one-way anova */ class region; model infrisk=region; means region/ snk scheffe; proc plot; plot infrisk * nurses infrisk * nurses = medschl; proc corr; var stay -- nbeds census nurses service; proc glm; /* simple regression with glm*/ model infrisk=nurses; title '1991 Greenhouse Study'; /* Data definition file, with additions */ options linesize=79 noovp formdlim='-'; proc format; /* Value labels used in data step below */ value cultfmt 1 = 'GP159 ' 2 = 'HANNA ' 3 = 'WESTAR'; value flwrfmt 0 = 'No Flowers' 1 = 'Big Buds' 2 = 'Flowers' 3 = 'Flowers & Pods' 4 = 'Flowrs & DevPods'; value hlthfmt 0 = 'Alive' 1 = 'wilt' 2 = 'WILT!' 3 = 'Dead'; value ynfmt 0 = 'No' 1 = 'Yes'; value nevfmt 15 = 'Never'; data mould; infile 'ghouse91.dat'; input row1 plantn1 day1 mcg length1 width1 flower1 stem1 health1 plant row2 plantn2 day2 mcg2 length2 width2 flower2 stem2 health2 plant2 row3 plantn3 day3 mcg3 length3 width3 flower3 stem3 health3 plant3 row4 plantn4 day4 mcg4 length4 width4 flower4 stem4 health4 plant4 row5 plantn5 day5 mcg5 length5 width5 flower5 stem5 health5 plant5 row6 plantn6 day6 mcg6 length6 width6 flower6 stem6 health6 plant6 row7 plantn7 day7 mcg7 length7 width7 flower7 stem7 health7 plant7 row8 plantn8 day8 mcg8 length8 width8 flower8 stem8 health8 plant8 row9 plantn9 day9 mcg9 length9 width9 flower9 stem9 health9 plant9 row10 plantn10 day10 mcg10 length10 width10 flower10 stem10 health10 plant10 row11 plantn11 day11 mcg11 length11 width11 flower11 stem11 health11 plant11 row12 plantn12 day12 mcg12 length12 width12 flower12 stem12 health12 plant12 row13 plantn13 day13 mcg13 length13 width13 flower13 stem13 health13 plant13 row14 plantn14 day14 mcg14 length14 width14 flower14 stem14 health14 plant14 row15 plantn15 day15 mcg15 girdling timegird timedied; /* Of course there is no day 15 */ /********* Fix-ups & Added variables *********/ if mcg=4 then mcg=7; if mcg=5 then mcg=8; if mcg=6 then mcg=9; if timegird=0 then timegird=.; died=1; if timedied = . then died = 0; whendied=timedied; if whendied=. then whendied=15; whengird=timegird; if whengird=. then whengird=15; /*** Uncensored Lesion Length in leslng1 - leslng14 **/ array lng{14} length1-length14; array leslng{14} leslng1-leslng14; do i=1 to 14; lng{i} = lng{i}*10; /* Convert to mm */ if (lng{i}=.) then leslng{i}=leslng{i-1}; else leslng{i}=lng{i}; /* OK because there are no plants with lesion length missing on day 1 */ end; /*** Uncensoring flowering status: Status is last status recorded ***/ array flwr{14} flower1-flower14; do i=2 to 14; if (flwr{i}=.) then flwr{i}=flwr{i-1}; end; /*** Progression in flowering status: Increase at end or two consecutive ***/ progr=0; if (flower14 gt flower13) then progr=1; do i=2 to 13; if (flwr{i} gt flwr{i-1}) and (flwr{i+1} gt flwr{i-1}) then progr=1; end; /* if plantn1 ne 516; */ /* Getting rid of "Charlie Brown" */ /****************************************************/ meanlng=mean(of leslng1-leslng14); /**** Computing linear growth rate: regression through the origin with no plateau *****/ ngood = 0; sumyt = 0 ; sumtsq = 0; last = 0; maxles = max(of leslng1-leslng14); array y {14} y1-y14; array t {14} t1-t14; do i=1 to 14; if (leslng{i} ne . and last ne maxles) then do; ngood = ngood+1 ; sumyt = sumyt + leslng{i}*i ; sumtsq = sumtsq + i**2 ; end ; /* End if */ last = leslng{i} ; end ; /* Next i */ if ngood = 0 then growrate = .; else growrate = sumyt/sumtsq; /* End definition of growth rate (without plateau) */ /**** Computing grate2: regression through the origin with plateau *****/ ngood = 0; sumyt = 0 ; sumtsq = 0; last = 0; do i=1 to 14; if (leslng{i} ne .) then do; ngood = ngood+1 ; sumyt = sumyt + leslng{i}*i ; sumtsq = sumtsq + i**2 ; end ; /* End if */ end ; /* Next i */ if ngood = 0 then grate2 = .; else grate2 = sumyt/sumtsq; /* End definition of grate2: growth rate (with plateau) */ /**** Now average growth rate without plateau. Note that leslng14/14 would be average growth rate WITH plateau. ***/ do i = 1 to 14; if (leslng{i}=maxles) then avrate=maxles/i; end ; /* Next i */ label avrate = 'Average Growth Rate in mm.' /**********************************/ /***** Variable labels ********/ label mcg = 'Mycelial Compatibility Group'; label plant = 'Type of Plant'; label girdling = 'Was Plant Ever Girdled?'; label timegird = 'Time Plant Girdled (never is missing)'; label timedied = 'Time Plant Died (never is missing)'; label whengird = 'Day on which Plant was Girdled'; label whendied = 'Day on which Plant Died'; label died = 'Did Plant Ever die?'; label progr = 'Progression in flowering status?'; label growrate = 'Linear growth rate (No Plateau)'; label meanlng = 'Average Lesion length'; label grate2 = 'Growth rate (With Plateau)'; label avrate = 'Average growth rate (No Plateau)'; /****** Value labels defined above in proc format ******/ format flower1-flower14 flwrfmt.; format health1-health14 hlthfmt.; format girdling died progr ynfmt.; format plant plant2-plant14 cultfmt.; format whendied whengird nevfmt.; /******************* Additions, copied from appgreen1.sas ***************/ if plant=1 and mcg=1 then combo = 1; else if plant=1 and mcg=2 then combo = 2; else if plant=1 and mcg=3 then combo = 3; else if plant=1 and mcg=7 then combo = 4; else if plant=1 and mcg=8 then combo = 5; else if plant=1 and mcg=9 then combo = 6; else if plant=2 and mcg=1 then combo = 7; else if plant=2 and mcg=2 then combo = 8; else if plant=2 and mcg=3 then combo = 9; else if plant=2 and mcg=7 then combo = 10; else if plant=2 and mcg=8 then combo = 11; else if plant=2 and mcg=9 then combo = 12; else if plant=3 and mcg=1 then combo = 13; else if plant=3 and mcg=2 then combo = 14; else if plant=3 and mcg=3 then combo = 15; else if plant=3 and mcg=7 then combo = 16; else if plant=3 and mcg=8 then combo = 17; else if plant=3 and mcg=9 then combo = 18; label combo = 'Plant-MCG Combo'; /* Make 16 dummy variables for cell means coding. This will illustrate arrays and loops, too */ array mu{18} mu1-mu18; do i=1 to 18; if combo=. then mu{i}=.; else if combo=i then mu{i}=1; else mu{i}=0; end; /* Now illustrate effect coding, with the interaction represented by a collection of product terms. */ /* Two dummy variables for plant */ if plant=. then p1=.; else if plant=1 then p1=1; else if plant=3 then p1=-1; else p1=0; if plant=. then p2=.; else if plant=2 then p2=1; else if plant=3 then p2=-1; else p2=0; /* Five dummy variables for mcg */ if mcg=. then f1=.; else if mcg=1 then f1=1; else if mcg=9 then f1=-1; else f1=0; if mcg=. then f2=.; else if mcg=2 then f2=1; else if mcg=9 then f2=-1; else f2=0; if mcg=. then f3=.; else if mcg=3 then f3=1; else if mcg=9 then f3=-1; else f3=0; if mcg=. then f4=.; else if mcg=7 then f4=1; else if mcg=9 then f4=-1; else f4=0; if mcg=. then f5=.; else if mcg=8 then f5=1; else if mcg=9 then f5=-1; else f5=0; /* Product terms for interactions */ p1f1 = p1*f1; p1f2=p1*f2 ; p1f3=p1*f3 ; p1f4=p1*f4; p1f5=p1*f5; p2f1 = p2*f1; p2f2=p2*f2 ; p2f3=p2*f3 ; p2f4=p2*f4; p2f5=p2*f5; title '1991 Greenhouse Study'; /* Data definition file */ options linesize=79 noovp formdlim='-'; proc format; /* Value labels used in data step below */ value cultfmt 1 = 'GP159 ' 2 = 'HANNA ' 3 = 'WESTAR'; value flwrfmt 0 = 'No Flowers' 1 = 'Big Buds' 2 = 'Flowers' 3 = 'Flowers & Pods' 4 = 'Flowrs & DevPods'; value hlthfmt 0 = 'Alive' 1 = 'wilt' 2 = 'WILT!' 3 = 'Dead'; value ynfmt 0 = 'No' 1 = 'Yes'; value nevfmt 15 = 'Never'; data mould; infile 'ghouse91.dat'; input row1 plantn1 day1 mcg length1 width1 flower1 stem1 health1 plant row2 plantn2 day2 mcg2 length2 width2 flower2 stem2 health2 plant2 row3 plantn3 day3 mcg3 length3 width3 flower3 stem3 health3 plant3 row4 plantn4 day4 mcg4 length4 width4 flower4 stem4 health4 plant4 row5 plantn5 day5 mcg5 length5 width5 flower5 stem5 health5 plant5 row6 plantn6 day6 mcg6 length6 width6 flower6 stem6 health6 plant6 row7 plantn7 day7 mcg7 length7 width7 flower7 stem7 health7 plant7 row8 plantn8 day8 mcg8 length8 width8 flower8 stem8 health8 plant8 row9 plantn9 day9 mcg9 length9 width9 flower9 stem9 health9 plant9 row10 plantn10 day10 mcg10 length10 width10 flower10 stem10 health10 plant10 row11 plantn11 day11 mcg11 length11 width11 flower11 stem11 health11 plant11 row12 plantn12 day12 mcg12 length12 width12 flower12 stem12 health12 plant12 row13 plantn13 day13 mcg13 length13 width13 flower13 stem13 health13 plant13 row14 plantn14 day14 mcg14 length14 width14 flower14 stem14 health14 plant14 row15 plantn15 day15 mcg15 girdling timegird timedied; /* Of course there is no day 15 */ /********* Fix-ups & Added variables *********/ if mcg=4 then mcg=7; if mcg=5 then mcg=8; if mcg=6 then mcg=9; if timegird=0 then timegird=.; died=1; if timedied = . then died = 0; whendied=timedied; if whendied=. then whendied=15; whengird=timegird; if whengird=. then whengird=15; /*** Uncensored Lesion Length in leslng1 - leslng14 **/ array lng{14} length1-length14; array leslng{14} leslng1-leslng14; do i=1 to 14; lng{i} = lng{i}*10; /* Convert to mm */ if (lng{i}=.) then leslng{i}=leslng{i-1}; else leslng{i}=lng{i}; /* OK because there are no plants with lesion length missing on day 1 */ end; /*** Uncensoring flowering status: Status is last status recorded ***/ array flwr{14} flower1-flower14; do i=2 to 14; if (flwr{i}=.) then flwr{i}=flwr{i-1}; end; /*** Progression in flowering status: Increase at end or two consecutive ***/ progr=0; if (flower14 gt flower13) then progr=1; do i=2 to 13; if (flwr{i} gt flwr{i-1}) and (flwr{i+1} gt flwr{i-1}) then progr=1; end; /* if plantn1 ne 516; */ /* Getting rid of "Charlie Brown" */ /****************************************************/ meanlng=mean(of leslng1-leslng14); /**** Computing linear growth rate: regression through the origin with no plateau *****/ ngood = 0; sumyt = 0 ; sumtsq = 0; last = 0; maxles = max(of leslng1-leslng14); array y {14} y1-y14; array t {14} t1-t14; do i=1 to 14; if (leslng{i} ne . and last ne maxles) then do; ngood = ngood+1 ; sumyt = sumyt + leslng{i}*i ; sumtsq = sumtsq + i**2 ; end ; /* End if */ last = leslng{i} ; end ; /* Next i */ if ngood = 0 then growrate = .; else growrate = sumyt/sumtsq; /* End definition of growth rate (without plateau) */ /**** Computing grate2: regression through the origin with plateau *****/ ngood = 0; sumyt = 0 ; sumtsq = 0; last = 0; do i=1 to 14; if (leslng{i} ne .) then do; ngood = ngood+1 ; sumyt = sumyt + leslng{i}*i ; sumtsq = sumtsq + i**2 ; end ; /* End if */ end ; /* Next i */ if ngood = 0 then grate2 = .; else grate2 = sumyt/sumtsq; /* End definition of grate2: growth rate (with plateau) */ /**** Now average growth rate without plateau. Note that leslng14/14 would be average growth rate WITH plateau. ***/ do i = 1 to 14; if (leslng{i}=maxles) then avrate=maxles/i; end ; /* Next i */ label avrate = 'Average Growth Rate in mm.' /**********************************/ /***** Variable labels ********/ label mcg = 'Mycelial Compatibility Group'; label plant = 'Type of Plant'; label girdling = 'Was Plant Ever Girdled?'; label timegird = 'Time Plant Girdled (never is missing)'; label timedied = 'Time Plant Died (never is missing)'; label whengird = 'Day on which Plant was Girdled'; label whendied = 'Day on which Plant Died'; label died = 'Did Plant Ever die?'; label progr = 'Progression in flowering status?'; label growrate = 'Linear growth rate (No Plateau)'; label meanlng = 'Average Lesion length'; label grate2 = 'Growth rate (With Plateau)'; label avrate = 'Average growth rate (No Plateau)'; /****** Value labels defined above in proc format ******/ format flower1-flower14 flwrfmt.; format health1-health14 hlthfmt.; format girdling died progr ynfmt.; format plant plant2-plant14 cultfmt.; format whendied whengird nevfmt.; /********************** mcars.sas **************************/ options linesize=79 pagesize=100 noovp formdlim='-'; title 'Metric Cars Data: Dummy Vars and Interactions'; proc format; /* Used to label values of the categorical variables */ value carfmt 1 = 'US' 2 = 'Japanese' 3 = 'European' ; data auto; infile 'mcars.dat'; input id country kpl weight length; /* Indicator dummy vars: Ref category is Japanese */ if country = 1 then c1=1; else c1=0; if country = 3 then c2=1; else c2=0; /* Interaction Terms */ cw1 = c1*weight; cw2 = c2*weight; label country = 'Country of Origin' kpl = 'Kilometers per Litre'; format country carfmt.; proc means; class country; var weight kpl; proc glm; title 'One-way ANOVA'; class country; model kpl = country; means country / tukey; proc reg; title 'ANCOVA'; model kpl = weight c1 c2; country: test c1 = c2 = 0; proc reg; title 'Test parallel slopes (Interaction)'; model kpl = weight c1 c2 cw1 cw2; interac: test cw1 = cw2 = 0; useuro: test cw1=cw2; country: test c1 = c2 = 0; eqreg: test c1=c2=cw1=cw2=0; proc iml; /* Critical value for Scheffe tests */ critval = finv(.95,4,94) ; print critval; /* Could do most of it with proc glm: ANCOVA, then test interaction */ proc glm; class country; model kpl = weight country; lsmeans country; proc glm; class country; model kpl = weight country weight*country; /* Commented Out: proc reg; model mpg = weight length c1 c2; dvar: test c1 = 0, c2 = 0; proc reg; model mpg = weight length cm1-cm3 / noint; No intercept cellmean: test cm1=cm2=cm3; proc reg; model mpg = weight length ec1 ec2; effcode: test ec1=ec2=0; /*********************** popvar.sas *****************************/ options linesize = 79 pagesize = 200; data fpower; /* Replace alpha, s, p, and wantpow below */ alpha = 0.05; /* Significance level */ s = 6; /* Numerator df = # IVs being tested */ p = 26; /* There are p beta parameters */ a = .10 ; /* Effect size */ wantpow = .80; /* Find n to yield this power. */ power = 0; n = p+1; oneminus = 1-alpha; /* Initializing ... */ do until (power >= wantpow); ncp = (n-p)*a/(1-a); df2 = n-p; power = 1-probf(finv(oneminus,s,df2),s,df2,ncp); n = n+1 ; end; n = n-1; put ' *********************************************************'; put ' '; put ' For a multiple regression model with ' p 'betas, '; put ' testing ' s 'independent variables using alpha = ' alpha ','; put ' a sample size of ' n 'is needed'; put ' in order to have probability ' wantpow 'of rejecting H0'; put ' for an effect of size a = ' a ; put ' '; put ' *********************************************************'; /******************* reading.sas ********************** * Simple SAS job to illustrate a two-sample t-test * *******************************************************/ options linesize=79 noovp formdlim=' '; title 'More & McCabe (1993) textbook t-test Example 7.8'; data reading; infile 'drp.dat'; input group $ score; label group = 'Get Directed Reading Programme?' score = 'Degree of Reading Power Test Score'; proc ttest; class group; var score; /************************** sampvar1.sas **************************/ /* Finds n needed for significance, for a given proportion of */ /* remaining variation */ /*******************************************************************/ options linesize = 79 pagesize = 200; data explvar; /* Can replace alpha, s, p, and a below. */ alpha = 0.05; /* Significance level. */ s = 6; /* Numerator df = # IVs being tested. */ p = 26; /* There are p beta parameters. */ a = .10 ; /* Proportion of remaining variation after */ /* controlling for all other variables. */ /* Initializing ... */ pval = 1; n = p+1; do until (pval <= alpha); F = (n-p)/s * a/(1-a); df2 = n-p; pval = 1-probf(F,s,df2); n = n+1 ; end; /* When finished, n is one too many */ n = n-1; F = (n-p)/s * a/(1-a); df2 = n-p; pval = 1-probf(F,s,df2); put ' *********************************************************'; put ' '; put ' For a multiple regression model with ' p 'betas, '; put ' testing ' s ' variables controlling for the others,'; put ' a sample size of ' n 'is needed for significance at the'; put ' alpha = ' alpha 'level, when the effect explains a = ' a ; put ' of the remaining variation after allowing for all other '; put ' variables in the model. '; put ' F = ' F ',df = (' s ',' df2 '), p = ' pval; put ' '; put ' *********************************************************'; /************************** sampvar2.sas ****************************/ /* Finds proportion of remaining variation needed for significance, */ /* given sample size n */ /*********************************************************************/ options linesize = 79 pagesize = 200; data explvar; /* Replace alpha, s, p, and a below. */ alpha = 0.05; /* Significance level. */ s = 6; /* Numerator df = # IVs being tested. */ p = 26; /* There are p beta parameters. */ n = 120 ; /* Sample size */ /* Initializing ... */ pval = 1; a = 0; df2 = n-p; do until (pval <= alpha); F = (n-p)/s * a/(1-a); pval = 1-probf(F,s,df2); a = a + .001 ; end; /* When finished, a is .001 too much */ a = a-.001; F = (n-p)/s * a/(1-a); pval = 1-probf(F,s,df2); put ' ******************************************************'; put ' '; put ' For a multiple regression model with ' p 'betas, '; put ' testing ' s ' variables at significance level '; put ' alpha = ' alpha ' controlling for the other variables,'; put ' and a sample size of ' n', the variables need to explain'; put ' a = ' a ' of the remaining variation to be significant.'; put ' F = ' F ', df = (' s ',' df2 '), p = ' pval; put ' '; put ' *******************************************************'; /************************** sampvar2.sas ****************************/ /* Finds proportion of remaining variation needed for significance, */ /* given sample size n */ /*********************************************************************/ options linesize = 79 pagesize = 200; data explvar; /* Replace alpha, s, p, and a below. */ alpha = 0.05; /* Significance level. */ s = 6; /* Numerator df = # IVs being tested. */ p = 26; /* There are p beta parameters. */ n = 155 ; /* Sample size */ /* Initializing ... */ pval = 1; a = 0; df2 = n-p; do until (pval <= alpha); F = (n-p)/s * a/(1-a); pval = 1-probf(F,s,df2); a = a + .001 ; end; /* When finished, a is .001 too much */ a = a-.001; F = (n-p)/s * a/(1-a); pval = 1-probf(F,s,df2); put ' ******************************************************'; put ' '; put ' For a multiple regression model with ' p 'betas, '; put ' testing ' s ' variables at significance level '; put ' alpha = ' alpha ' controlling for the other variables,'; put ' and a sample size of ' n', the variables need to explain'; put ' a = ' a ' of the remaining variation to be significant.'; put ' F = ' F ', df = (' s ',' df2 '), p = ' pval; put ' '; put ' *******************************************************'; /* senic0.1.sas */ options linesize = 79 noovp formdlim=' '; data simple; infile 'senic.dat'; input id stay age infrisk culratio xratio nbeds medschl region census nurses service; /*** sas doesn't like numeric missing value codes. a period . is best for missing. however .... ***/ if stay eq 9999 then stay = . ; if age eq 9999 then age = . ; if xratio eq 9999 then xratio = . ; if culratio eq 9999 then culratio = . ; if infrisk = 999 then infrisk = . ; if nbeds = 9 then nbeds = . ; if medschl = 9 then medschl = . ; if region = 9 then region = . ; if census = 9 then census = . ; if service = 9 then service = . ; if nurses eq (0 or .999) then nurses = . ; proc freq; tables _all_; /* senic0.sas */ options linesize = 79 noovp formdlim=' '; data simple; infile 'senic.dat'; input id stay age infrisk culratio xratio nbeds medschl region census nurses service; proc freq; tables _all_; /******* senicread.sas just reads and labels data ***********/ title 'SENIC data'; options linesize=79 noovp formdlim=' '; proc format; /* value labels used in data step below */ value yesnofmt 1 = 'Yes' 2 = 'No' ; value regfmt 1 = 'Northeast' 2 = 'North Central' 3 = 'South' 4 = 'West' ; value acatfmt 1 = '53 & under' 2 = 'Over 53'; data senic; infile 'senic.raw' missover ; /* in senic.raw, missing=blank */ /* missover causes blanks to be missing */ input #1 id 1-5 stay 7-11 age 13-16 infrisk 18-20 culratio 22-25 xratio 27-31 nbeds 33-35 medschl 37 region 39 census 41-43 nurses 45-47 service 49-52 ; label id = 'Hospital identification number' stay = 'Av length of hospital stay, in days' age = 'Average patient age' infrisk = 'Prob of acquiring infection in hospital' culratio = '# cultures / # no hosp acq infect' xratio = '# x-rays / # no signs of pneumonia' nbeds = 'Average # beds during study period' medschl = 'Medical school affiliation' region = 'Region of country (usa)' census = 'Aver # patients in hospital per day' nurses = 'Aver # nurses during study period' service = '% of 35 potential facil. & services' ; /* associating variables with their value labels */ format medschl yesnofmt.; format region regfmt.; /***** recodes, computes & ifs *****/ if 053 then agecat=2; label agecat = 'av patient age category'; format agecat acatfmt.; /* compute ad hoc index of hospital quality */ quality=(2*service+nurses+nbeds+10*culratio +10*xratio-2*stay)/medschl; if (region eq 3) then quality=quality-100; label quality = 'jerry''s bogus hospital quality index'; /* Commented out proc freq; tables _all_; */options linesize=79 pagesize=35 noovp formdlim=' '; title 'Grades from STA3000 at Roosevelt University: Fall, 1957'; title2 'Illustrate Elementary Tests'; proc format; /* Used to label values of the categorical variables */ value sexfmt 0 = 'Male' 1 = 'Female'; value ethfmt 1 = 'Chinese' 2 = 'European' 3 = 'Other' ; data grades; infile 'statclass.dat'; input sex ethnic quiz1-quiz8 comp1-comp9 midterm final; /* Drop lowest score for quiz & computer */ quizave = ( sum(of quiz1-quiz8) - min(of quiz1-quiz8) ) / 7; compave = ( sum(of comp1-comp9) - min(of comp1-comp9) ) / 8; label ethnic = 'Apparent ethnic background (ancestry)' quizave = 'Quiz Average (drop lowest)' compave = 'Computer Average (drop lowest)'; mark = .3*quizave*10 + .1*compave*10 + .3*midterm + .3*final; label mark = 'Final Mark'; diff = quiz8-quiz1; /* To illustrate matched t-test */ label diff = 'Quiz 8 minus Quiz 1'; format sex sexfmt.; /* Associates sex & ethnic */ format ethnic ethfmt.; /* with formats defined above */ proc freq; tables sex ethnic; proc means n mean std; var quiz1 -- mark; /* single dash only works with numbered lists, like quiz1-quiz8 */ proc ttest; title 'Independent t-test'; class sex; var mark; proc means n mean std t; title 'Matched t-test: Quiz 1 versus 8'; var quiz1 quiz8 diff; proc glm; title 'One-way anova'; class ethnic; model mark = ethnic; means ethnic; means ethnic / Tukey Bon Scheffe; proc freq; title 'Chi-squared Test of Independence'; tables sex*ethnic / chisq; proc freq; /* Added after seeing warning from chisq test above */ title 'Chi-squared Test of Independence: Version 2'; tables sex*ethnic / norow nopercent chisq expected; proc corr; title 'Correlation Matrix'; var final midterm quizave compave; proc plot; title 'Scatterplot'; plot final*midterm; /* Really should do all combinations */ proc reg; title 'Simple regression'; model final=midterm; /* Predict final exam score from midterm, quiz & computer */ proc reg simple; title 'Multiple Regression'; model final = midterm quizave compave / ss1; smalstuf: test quizave = 0, compave = 0; %include 'senicread.sas'; if _n_ < 6 then region = .; /* Indicator dummy variables for region */ if region = . then r1=.; else if region = 1 then r1 = 1; else r1 = 0; if region = . then r2=.; else if region = 2 then r2 = 1; else r2 = 0; if region = . then r3=.; else if region = 3 then r3 = 1; else r3 = 0; proc freq; tables (r1 r2 r3) * region / missprint norow nocol nopercent;