STA313 F 2004 Handout 2

Large-Sample Likelihood Ratio Tests with R


Test a single proportion

Let X1, ..., Xn be a random sample from a Bernoulli distribution with parameter theta. Want to test H0: theta=theta0. When theta0=1/2, this is called a sign test. Here is a function that takes the sample proportion theta-hat, the sample size n and theta0 as input, and returns G, df (always 1) and the p-value.


proptest <- function(thetahat,nn,theta0=1/2) 
            # Test whether a sample proportion thetahat differs from theta0; 
            # nn is the sample size
    {
    proptest <- numeric(3)
    names(proptest) <- c("G","df","p-value")
    proptest <- cbind(proptest) # Makes it a 3x1 matrix - nicer for display
    sumx <- nn*thetahat ;xbar <- thetahat
    proptest[1] <- -2*( sumx*log(theta0) + (nn-sumx)*log(1-theta0) ) -
                   -2*( sumx*log(xbar) +   (nn-sumx)*log(1-xbar)   )
    proptest[2] <-1 
    proptest[3] <- 1-pchisq(proptest[1],1)
    proptest
    } # End of function proptest

>
> proptest(.6,100,.75)
            proptest
G       10.823064182
df       1.000000000
p-value  0.001002434
>
> proptest(.6,10,.5)
         proptest
G       0.4027103
df      1.0000000
p-value 0.5256929
>
> proptest(.6,10)
         proptest
G       0.4027103
df      1.0000000
p-value 0.5256929
>
> proptest(.6,100)
          proptest
G       4.02710271
df      1.00000000
p-value 0.04477477
>
> proptest(.6,1000)
            proptest
G       4.027103e+01
df      1.000000e+00
p-value 2.210632e-10
>

Test Difference between two proportions


###########################################################################
# berntest2.R   Two independent Bernoulli samples. Test H0: theta1=theta2 #
#               Execute with   source("berntest2.R")                      #
###########################################################################
#
# Re-use the function BernLL
#
BernLL <- function(theta,xx) # -2 Log likelihood for Bernoulli distribution
                             # Data are in xx
   {
   n <- length(xx) ; sumx <- sum(xx)
   BernLL <- -2*sumx*log(theta) - 2*(n-sumx)*log(1-theta)
   BernLL
   } # end of function BernLL
#
# Read the data. The file bern2.dat has two columns. The first column is
# 1 for Group 1 or 2 for Group 2. The second column has 0s and 1s. We
# could pretend that 1 means the customer bought the product, and 0 means
# she did not.
#
datamat <- read.table("bern2.dat")
alldat <- datamat[,2] # Just the second column
x <- alldat[datamat[,1]==1]
# x gets numbers in the second column such that the number in the first column
# that the number in the first column equals one.
y <- alldat[datamat[,1]==2]

xbar1 <- mean(x) ; xbar2 <- mean(y) # Unrestricted MLEs
xbar <- mean(alldat)                # Restricted MLE
cat("\n")
cat("             Group 1                Group 2 \n")
cat("Thetahat   ",xbar1,"               ",xbar2,"\n")
cat("n          ",length(x),"                      ",length(y),"\n")
cat("\n")
rest <- BernLL(xbar,alldat)                 # -2LL at unrestricted MLE
unrest <- BernLL(xbar1,x) + BernLL(xbar2,y) # -2LL at restricted MLE
G <- rest-unrest ; p <- 1-pchisq(G,1)       # Test statistic & p-value
cat("G = ",G,"   p = ",p,"\n")
cat("\n")
cat("\n")
################# End of program berntest2.R ####################

> source("berntest2.R")

             Group 1                Group 2
Thetahat    0.2333333                 0.08
n           30                        100

G =  4.63282    p =  0.03136596


Two independent Gamma samples: Test H0: alpha1=alpha2

#######################################################################
# gammatest.R   Two independent Gamma samples. Test H0: alpha1=alpha2 #
#               Execute with   source("gammatest.R")                  #
#######################################################################
#
# Define functions
#
GamLL <- function(theta,xx) # -2 Log likelihood for a simgle Gamma distribution
                            # Data are in xx
   {
   alpha <- theta[1] ; beta <- theta[2]
   n <- length(xx) ; sumx <- sum(xx) ; sumlogx <- sum(log(xx))
   GamLL <- 2*n*alpha*log(beta) + 2*n*lgamma(alpha) + 2*sumx/beta -
            2*(alpha-1)*sumlogx
    # Fix up to avoid negative alpha, beta
    # The nlm function will replace Inf with the largest machine value
    if(alpha <= 0) GamLL <- Inf ;  if(beta <= 0) GamLL <- Inf
   GamLL
   } # end of function GamLL

alphaeq <- function(theta,xx,yy)  # Restricted MLE for two independent Gamma
                                  # samples. H0: alpha1=alpha2
    {
    alpha <- theta[1] ; beta1 <- theta[2] ; beta2 <- theta[3]
    n1 <- length(xx) ; n2 <- length(yy)
    sumx <- sum(xx) ; sumy <- sum(yy)
    sumlogx <- sum(log(xx)) ;  sumlogy <- sum(log(yy))
    alphaeq <- 2*alpha*(n1*log(beta1)+n2*log(beta2)) +
               2*(n1+n2)*lgamma(alpha) +
               (2/beta1)*sumx + (2/beta2)*sumy -
               2*(alpha-1)*(sumlogx+sumlogy)
    # Fix up to avoid negative alpha, betas
    # The nlm function will replace Inf with the largest machine value
    if(alpha <= 0) alphaeq <- Inf
    if(beta1 <= 0) alphaeq <- Inf ; if(beta2 <= 0) alphaeq <- Inf
    alphaeq
    } # End of function alphaeq
#
# Read the data
#
datta <- read.table("gammatest.dat")
datta[1:10,]    # Look at first 10 rows
datta[120:125,] # Last 6 rows
x <- datta[,2][datta[,1]==1] # x gets numbers in the second column such
                             # that the number in the first column equals one.
y <- datta[,2][datta[,1]==2]
#
# Compute and print MLEs
#
#    First, unrestricted. Get starting values with method of moments
#
xbar <- mean(x) ; s2 <- var(x)
astart <- xbar^2/s2 ; bstart <- s2/xbar
samp1 <- nlm(GamLL,c(astart,bstart),xx=x)[1:2]
         # First element is minimum value
         # Second element is pair (alphahat,betahat)
xbar <- mean(y) ; s2 <- var(y)
astart <- xbar^2/s2 ; bstart <- s2/xbar
samp2 <- nlm(GamLL,c(astart,bstart),xx=y)[1:2]
alphahat1 <- samp1$estimate[1] ; betahat1 <- samp1$estimate[2]
alphahat2 <- samp2$estimate[1] ; betahat2 <- samp2$estimate[2]
unrest <- samp1$minimum + samp2$minimum # -2LL at unrestricted MLE
#
#     Now restricted. Use crude modification of the unrestricted MLEs
#     as starting values.
#
astart <- (samp1$estimate[1]+samp2$estimate[1])/2 # Mean of unrestricted MLEs
b1start <- samp1$estimate[2] ; b2start <- samp1$estimate[2]
together <- nlm(alphaeq,c(astart,b1start,b2start),xx=x,yy=y)[1:2]
alphahathat <- together$estimate[1] ; betahathat1 <- together$estimate[2]
betahathat2 <- together$estimate[3]
rest <- together$minimum # -2LL at restricted MLE
#
cat("\n")
cat("Unrestricted MLE: \n")
cat("  alphahat1 = ",alphahat1," betahat1 = ",betahat1,"\n")
cat("  alphahat2 = ",alphahat2," betahat2 = ",betahat2,"\n")
cat("        -2LL at this parameter value = ",unrest,"\n")
cat("\n")
cat("\n")
cat("Restricted MLE: \n")
cat("  alphahathat = ",alphahathat,"\n")
cat("  betahathat1 = ",betahathat1,"  betahathat2 = ",betahathat2,"\n")
cat("        -2LL at this parameter value = ",rest,"\n")
cat("\n")
cat("\n")
# Test statistic & p-value
G <- rest-unrest ; p <- 1-pchisq(G,1)
cat("G = ",G,"   p = ",p,"\n")
cat("\n")
cat("\n")
################# End of program gammatest.R ####################

Here is the output.

> source("gammatest.R")

Unrestricted MLE:
  alphahat1 =  1.204890  betahat1 =  2.693892
  alphahat2 =  1.631417  betahat2 =  3.544673
        -2LL at this parameter value =  620.5435

Restricted MLE:
  alphahathat =  1.426550
  betahathat1 =  2.27531   betahathat2 =  4.053725
        -2LL at this parameter value =  622.253

G =  1.709518    p =  0.1910479

Warning messages:
1: NA/Inf replaced by maximum positive value
2: NaNs produced in: log(x)
3: NA/Inf replaced by maximum positive value