Power Calculation Handout 1


Approximating Probabilities

Here are some power calculations with S, for the following problem. This handout should make sense in conjunction with the lecture. Otherwise, it probably will not.

The infant mortality rate for babies with birth weight under 1 kg. is thought to be approximately 30%. A new treatment is expected to cut this in half. If a randomized clinical trial with treatment and control groups is run to confirm the effectiveness of the treatment, what sample size is required?



> 
> g <- function(x) # Assign k first
+    {
+    theta <- .15*(x/k+1)
+    g <- 1.645*sqrt( theta*(1-theta)*k/(x*(k-x)) )
+    g <- (g-.15)/sqrt( .21/x + .1275/(k-x) )
+    g
+    }
> 
> k <- 100 ; n1 <- seq(from=10,to=k-10,by=10)
> cbind(n1,g(n1))
             [,1]        [,2]
 [1,]          10  0.35753383
 [2,]          20  0.07271961
 [3,]          30 -0.08279654
 [4,]          40 -0.15408311
 [5,]          50 -0.15354993
 [6,]          60 -0.08061208
 [7,]          70  0.07587374
 [8,]          80  0.34340572
 [9,]          90  0.79409033
> 
> # Not minimal at 50, but pretty close. Try larger sample.
> 
> k <- 200 ; n1 <- seq(from=10,to=k-10,by=10)
> cbind(n1,g(n1))
             [,1]        [,2]
 [1,]          10  0.30170864
 [2,]          20 -0.05744893
 [3,]          30 -0.30583290
 [4,]          40 -0.49226300
 [5,]          50 -0.63536844
 [6,]          60 -0.74432131
 [7,]          70 -0.82413290
 [8,]          80 -0.87757649
 [9,]          90 -0.90601786
[10,]         100 -0.90979697
[11,]         110 -0.88838338
[12,]         120 -0.84038476
[13,]         130 -0.76342183
[14,]         140 -0.65383000
[15,]         150 -0.50606772
[16,]         160 -0.31152343
[17,]         170 -0.05585308
[18,]         180  0.28818773
[19,]         190  0.78830154
> 
> 
> cbind(90:110,g(90:110))
            [,1]       [,2]
 [1,]         90 -0.9060179
 [2,]         91 -0.9075077
 [3,]         92 -0.9087515
 [4,]         93 -0.9097490
 [5,]         94 -0.9104999
 [6,]         95 -0.9110037
 [7,]         96 -0.9112600
 [8,]         97 -0.9112681
 [9,]         98 -0.9110275
[10,]         99 -0.9105374
[11,]        100 -0.9097970
[12,]        101 -0.9088053
[13,]        102 -0.9075614
[14,]        103 -0.9060643
[15,]        104 -0.9043126
[16,]        105 -0.9023053
[17,]        106 -0.9000409
[18,]        107 -0.8975181
[19,]        108 -0.8947353
[20,]        109 -0.8916910
[21,]        110 -0.8883834
> 
> # Minimal at 97 -- not too bad. So we'll assume equal n's and 
> # then re-visit the issue after we select a total sample size.


> 
> power1 <- function(n)
+     {
+     n1 <- n/2 ; n2 <- n/2
+     theta <- (.3*n1+.15*n2)/(n1+n2)
+     num <-  1.645*sqrt(theta*(1-theta)*(1/n1+1/n2))   -.15
+     den <- sqrt( .21/n1 + .1275/n2 )
+     power1 <- 1-pnorm(num/den)
+     power1
+     }
> nvec <- seq(from=50,to=300,by=10) 
> cbind(nvec,power1(nvec))
           [,1]      [,2]
 [1,]        50 0.3515284
 [2,]        60 0.3982118
 [3,]        70 0.4424870
 [4,]        80 0.4843660
 [5,]        90 0.5238660
 [6,]       100 0.5610177
 [7,]       110 0.5958665
 [8,]       120 0.6284716
 [9,]       130 0.6589041
[10,]       140 0.6872445
[11,]       150 0.7135808
[12,]       160 0.7380057
[13,]       170 0.7606156
[14,]       180 0.7815085
[15,]       190 0.8007825
[16,]       200 0.8185352
[17,]       210 0.8348623
[18,]       220 0.8498571
[19,]       230 0.8636099
[20,]       240 0.8762075
[21,]       250 0.8877329
[22,]       260 0.8982652
[23,]       270 0.9078792
[24,]       280 0.9166459
[25,]       290 0.9246317
[26,]       300 0.9318992
> 
> cbind(180:190,power1(180:190))
           [,1]      [,2]
 [1,]       180 0.7815085
 [2,]       181 0.7835071
 [3,]       182 0.7854896
 [4,]       183 0.7874562
 [5,]       184 0.7894068
 [6,]       185 0.7913417
 [7,]       186 0.7932609
 [8,]       187 0.7951644
 [9,]       188 0.7970525
[10,]       189 0.7989252
[11,]       190 0.8007825
> 
> 
> k <- 190 ; cbind(90:100,g(90:100))
            [,1]       [,2]
 [1,]         90 -0.8460242
 [2,]         91 -0.8462407
 [3,]         92 -0.8461893
 [4,]         93 -0.8458692
 [5,]         94 -0.8452796
 [6,]         95 -0.8444196
 [7,]         96 -0.8432881
 [8,]         97 -0.8418840
 [9,]         98 -0.8402061
[10,]         99 -0.8382531
[11,]        100 -0.8360235
> # So n1 = 91 and n2 = 99 may be best. How much better than 0.8007825?
> power1a <- function(n1,n2)
+     {
+     theta <- (.3*n1+.15*n2)/(n1+n2)
+     num <-  1.645*sqrt(theta*(1-theta)*(1/n1+1/n2))   -.15
+     den <- sqrt( .21/n1 + .1275/n2 )
+     power1a <- 1-pnorm(num/den)
+     power1a
+     }

> power1a(91,99)
[1] 0.8012908
> Not much better at all. n1=n2=95 is okay.


> # This is approximate power. Check with simulation
> n1 <- 95 ; n2 <- 95
> nsim <- 1000 # Simulations
> theta1 <- .3 ; theta2 <- .15
> xbar <- rbinom(nsim,n1,theta1)/n1 
> ybar <- rbinom(nsim,n2,theta2)/n2
> m <- (xbar*n1+ybar*n2)/(n1+n2)
> Z <- (xbar-ybar) / sqrt(m*(1-m)*(1/n1+1/n2))
> power <- length(Z[Z>1.645])/nsim ; power
[1] 0.828


> nsim <- 10000 # Simulations
> theta1 <- .3 ; theta2 <- .15
> xbar <- rbinom(nsim,n1,theta1)/n1 
> ybar <- rbinom(nsim,n2,theta2)/n2
> m <- (xbar*n1+ybar*n2)/(n1+n2)
> Z <- (xbar-ybar) / sqrt(m*(1-m)*(1/n1+1/n2))
> power <- length(Z[Z>1.645])/nsim ; power
[1] 0.8074