# boot1.R    Working on the bootstrap
# On unix, run with    R --vanilla < boot1.R > boot1.out &
# grades.data has 4 columns: ID, Verbal SAT, Math SAT and 1st year GPA

marks <- read.table("grades.data")
n <- length(marks$verbal)
n
marks[1:10,]
obscorr <- cor(marks)
obscorr
# Question: Is the correlation between Verbal SAT and GPA the same as
# the correlation between math SAT and GPA?
# What is the sampling distribution of the difference between correlation
# coefficients?
#
obsdiff <- obscorr[3,1]-obscorr[3,2] # Verbal minus math
obsdiff
# The strategy will be to obtain a 95% bootstrap confidence interval for
# the difference between verbal correlation and math correlation. This
# confidence interval will be approximately centered around the observed
# difference obsdiff = .128.  If the confidence interval does not include
# zero, we will conclude that the observed difference is significantly
# different from zero.

BOOT <- 100  ;  bsdiff <- NULL ; set.seed(9999)
# Accumulate bootstrap values in bsdiff
# For clarity, do operations in several separate steps inside the loop
for(i in 1:BOOT)
    {
    bootmarks <- marks[sample(1:n,replace=TRUE),] # sample rows with
                                                  # replacement
    bcorr <- cor(bootmarks)  # Correlation matrix of bootstrap sample
    bdiffer <- bcorr[3,1]-bcorr[3,2] # Differencce between correlation
                                     # coefficients
    bsdiff <- c(bsdiff,bdiffer) # Add bdiffer to the end of bsdiff
    } # Next bootstrap sample
bsdiff <- sort(bsdiff)
# Now get lower and upper limits of 95% CI
low <- bsdiff[.025*BOOT] ; up <- bsdiff[.975*BOOT + 1]
low ; up
(low+up)/2
obsdiff
write(bsdiff,"bsdiff.dat") # Maybe for later analysis
hist(bsdiff)