######################################################### # Wine1.R.txt Repeated measures randomization test: # # Compare F = 57.5, df=3,15, p<.0001 # ######################################################### ####################################################################### # Define margin of error functions merror <- function(phat,M,alpha) # (1-alpha)*100% merror for a proportion { z <- qnorm(1-alpha/2) merror <- z * sqrt(phat*(1-phat)/M) # M is (Monte Carlo) sample size merror } mmargin <- function(p,cc,alpha) # Choose m to get (1-alpha)*100% margin of error equal to cc # If true p-value is p { mmargin <- p*(1-p)*qnorm(1-alpha/2)^2/cc^2 mmargin <- trunc(mmargin+1) # Round up to next integer mmargin } # End definition of function mmargin ####################################################################### wine <- read.table("Wine.data",header=T) means <- apply(wine[,2:5],2,mean) ; means T1 <- var(means) ; T1 # This will be our test statistic # How many values are n the permutation distribution? # (4!)^6 = 24^6 = 191,102,976 Could do them all, but ... # How many random permutations should we use? # If true p-value is 0.06, want 99% margin of error for estimated p to be # less than 0.01 mmargin(p=0.06,cc=0.01,alpha=0.01) M <- 3800 winemat <- as.matrix(wine)[,2:5] # Data frames are funny - can't # randomize rows separately wine; winemat # Before doing it, show how it works simdat <- NULL for(j in 1:6) simdat <- rbind(simdat,sample(winemat[j,])) simdat T1 ; var(apply(simdat,2,mean)) # Okay, here we go. t1rand <- numeric(M) # Save random T1 values here. for(i in 1:M) { simdat <- NULL for(j in 1:6) simdat <- rbind(simdat,sample(winemat[j,])) t1rand[i] <- var(apply(simdat,2,mean)) } hist(t1rand) length(t1rand[t1rand>T1]) # Oh well means # Compare wines 1 and 2 T2 <- abs(means[1]-means[2]) ; T2 winemat <- as.matrix(wine)[,2:3] ; # Just cols 2 and 3 winemat 2^6 # Could do them all 1:5/64 # Only the top 3 could be significant. Look at winemat. It is in a # 4-way tie for first place. Ouch. Proportion Greater than or equal # to 2 (results this good or better) is 4/64 = 0.0625, not significant. # Do by simulation as an example. t2rand <- numeric(M) # Save random T2 values here. for(i in 1:M) { simdat <- NULL for(j in 1:6) simdat <- rbind(simdat,sample(winemat[j,])) meanz <- apply(simdat,2,mean) t2rand[i] <- abs(meanz[1]-meanz[2]) } hist(t2rand) randp <- length(t2rand[t2rand >= T2])/M margin <- merror(randp,M,.01) cat ("\n") cat ("Randomization p-value = ",randp,"\n") cat("99% CI from ",(randp-margin)," to ",(randp+margin),"\n") cat ("\n")