# boot1.R Working on the bootstrap # On unix, run with R --vanilla < boot1.R > boot1.out & # grades.data has 4 columns: ID, Verbal SAT, Math SAT and 1st year GPA marks <- read.table("grades.data") n <- length(marks$verbal) n marks[1:10,] obscorr <- cor(marks) obscorr # Question: Is the correlation between Verbal SAT and GPA the same as # the correlation between math SAT and GPA? # What is the sampling distribution of the difference between correlation # coefficients? # obsdiff <- obscorr[3,1]-obscorr[3,2] # Verbal minus math obsdiff # The strategy will be to obtain a 95% bootstrap confidence interval for # the difference between verbal correlation and math correlation. This # confidence interval will be approximately centered around the observed # difference obsdiff = .128. If the confidence interval does not include # zero, we will conclude that the observed difference is significantly # different from zero. BOOT <- 100 ; bsdiff <- NULL ; set.seed(9999) # Accumulate bootstrap values in bsdiff # For clarity, do operations in several separate steps inside the loop for(i in 1:BOOT) { bootmarks <- marks[sample(1:n,replace=TRUE),] # sample rows with # replacement bcorr <- cor(bootmarks) # Correlation matrix of bootstrap sample bdiffer <- bcorr[3,1]-bcorr[3,2] # Differencce between correlation # coefficients bsdiff <- c(bsdiff,bdiffer) # Add bdiffer to the end of bsdiff } # Next bootstrap sample bsdiff <- sort(bsdiff) # Now get lower and upper limits of 95% CI low <- bsdiff[.025*BOOT] ; up <- bsdiff[.975*BOOT + 1] low ; up (low+up)/2 obsdiff write(bsdiff,"bsdiff.dat") # Maybe for later analysis hist(bsdiff)