Permutation Test Handout One


Suppose we want to test differences between means. We have a small sample and are unable to justify parametric assumptions. Permutation tests are distribution free under H0. Here the null hypothesis will be that the two samples come from the same distribution. There are lot of alternatives hypotheses. You pick the test statistic to suit the one(s) you believe in. You believe the means may be different, so ...


expl <- rexp(5) ; contr <- rnorm(2,3,1) 
group <- c(1,1,1,1,1,2,2) ; dv <- c(expl,contr)
data <- cbind(group,dv) ; data
     group        dv
[1,]     1 2.0520230
[2,]     1 0.2533341
[3,]     1 1.7989084
[4,]     1 1.7394712
[5,]     1 1.5314931
[6,]     2 1.8975719
[7,]     2 6.0948437

summary(lm(dv ~ group))

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)   -1.046      1.677  -0.624   0.5600
group          2.521      1.230   2.049   0.0957 .

mdiff <- mean(expl)-mean(contr) ; mdiff

[1] -2.521162


> indices # all 7 choose 2 combinations

      [,1] [,2] [,3] [,4] [,5] [,6] [,7]
 [1,]    2    2    1    1    1    1    1
 [2,]    2    1    2    1    1    1    1
 [3,]    2    1    1    2    1    1    1
 [4,]    2    1    1    1    2    1    1
 [5,]    2    1    1    1    1    2    1
 [6,]    2    1    1    1    1    1    2
 [7,]    1    2    2    1    1    1    1
 [8,]    1    2    1    2    1    1    1
 [9,]    1    2    1    1    2    1    1
[10,]    1    2    1    1    1    2    1
[11,]    1    2    1    1    1    1    2
[12,]    1    1    2    2    1    1    1
[13,]    1    1    2    1    2    1    1
[14,]    1    1    2    1    1    2    1
[15,]    1    1    2    1    1    1    2
[16,]    1    1    1    2    2    1    1
[17,]    1    1    1    2    1    2    1
[18,]    1    1    1    2    1    1    2
[19,]    1    1    1    1    2    2    1
[20,]    1    1    1    1    2    1    2
[21,]    1    1    1    1    1    2    2

pdist <- numeric(21)
for(i in 1:21) pdist[i] <- 
               mean(dv[indices[i,]==1]) - mean(dv[indices[i,]==2])


> sort(pdist)
 [1] -2.6292776 -2.5211619 -2.4520974 -2.4104913 -2.2649066 -1.3701954
 [7]  0.3088126  0.3778771  0.4194831  0.4859929  0.5275989  0.5650678
[13]  0.5966634  0.6731836  0.7422481  0.7838541  1.4597791  1.5678948
[19]  1.6369593  1.6785654  1.8241501
> 2/21
[1] 0.0952381


Now an example where getting the exact permutation distribution is not feasible.


> expl <- rnorm(80,8,3) ; contr <- rpois(225,8)
> mean(expl) ; var(expl)
[1] 7.847295
[1] 9.690027
> mean(contr) ; var(contr)
[1] 7.915556
[1] 7.783016
> var(expl)/var(contr)
[1] 1.245022


> group <- c(numeric(80)+1,numeric(225)+2) ; dv <- c(expl,contr)
> F <- var(dv[group==1])/var(dv[group==2]) ; F
[1] 1.245022

> m <- 1000; rperm <- numeric(m) # How SHOULD we choose MC sample size?
> for(sim in 1:m)
+     {
+     rdat <- sample(dv) # Sample whole thing without replacement
+     v1 <- var(rdat[group==1]) ; v2 <- var(rdat[group==2])
+     Frand <- max(v1,v2)/min(v1,v2) # Use upper tail
+     rperm[sim] <- Frand
+     }# Next sim
>
> pval <- length(rperm[rperm>=F])/m
> cat("\n F = ",F," and p = ",pval," based on ", m, " randomizations.\n")

 F =  1.245022  and p =  0.256  based on  1000  randomizations.