STA313 F 2004 Handout 2
Large-Sample Likelihood Ratio Tests with R
Let X1, ..., Xn be a random sample from a Bernoulli distribution with parameter theta. Want to test H0: theta=theta0. When theta0=1/2, this is called a sign test. Here is a function that takes the sample proportion theta-hat, the sample size n and theta0 as input, and returns G, df (always 1) and the p-value.
proptest <- function(thetahat,nn,theta0=1/2)
# Test whether a sample proportion thetahat differs from theta0;
# nn is the sample size
{
proptest <- numeric(3)
names(proptest) <- c("G","df","p-value")
proptest <- cbind(proptest) # Makes it a 3x1 matrix - nicer for display
sumx <- nn*thetahat ;xbar <- thetahat
proptest[1] <- -2*( sumx*log(theta0) + (nn-sumx)*log(1-theta0) ) -
-2*( sumx*log(xbar) + (nn-sumx)*log(1-xbar) )
proptest[2] <-1
proptest[3] <- 1-pchisq(proptest[1],1)
proptest
} # End of function proptest
>
> proptest(.6,100,.75)
proptest
G 10.823064182
df 1.000000000
p-value 0.001002434
>
> proptest(.6,10,.5)
proptest
G 0.4027103
df 1.0000000
p-value 0.5256929
>
> proptest(.6,10)
proptest
G 0.4027103
df 1.0000000
p-value 0.5256929
>
> proptest(.6,100)
proptest
G 4.02710271
df 1.00000000
p-value 0.04477477
>
> proptest(.6,1000)
proptest
G 4.027103e+01
df 1.000000e+00
p-value 2.210632e-10
>
###########################################################################
# berntest2.R Two independent Bernoulli samples. Test H0: theta1=theta2 #
# Execute with source("berntest2.R") #
###########################################################################
#
# Re-use the function BernLL
#
BernLL <- function(theta,xx) # -2 Log likelihood for Bernoulli distribution
# Data are in xx
{
n <- length(xx) ; sumx <- sum(xx)
BernLL <- -2*sumx*log(theta) - 2*(n-sumx)*log(1-theta)
BernLL
} # end of function BernLL
#
# Read the data. The file bern2.dat has two columns. The first column is
# 1 for Group 1 or 2 for Group 2. The second column has 0s and 1s. We
# could pretend that 1 means the customer bought the product, and 0 means
# she did not.
#
datamat <- read.table("bern2.dat")
alldat <- datamat[,2] # Just the second column
x <- alldat[datamat[,1]==1]
# x gets numbers in the second column such that the number in the first column
# that the number in the first column equals one.
y <- alldat[datamat[,1]==2]
xbar1 <- mean(x) ; xbar2 <- mean(y) # Unrestricted MLEs
xbar <- mean(alldat) # Restricted MLE
cat("\n")
cat(" Group 1 Group 2 \n")
cat("Thetahat ",xbar1," ",xbar2,"\n")
cat("n ",length(x)," ",length(y),"\n")
cat("\n")
rest <- BernLL(xbar,alldat) # -2LL at unrestricted MLE
unrest <- BernLL(xbar1,x) + BernLL(xbar2,y) # -2LL at restricted MLE
G <- rest-unrest ; p <- 1-pchisq(G,1) # Test statistic & p-value
cat("G = ",G," p = ",p,"\n")
cat("\n")
cat("\n")
################# End of program berntest2.R ####################
> source("berntest2.R")
Group 1 Group 2
Thetahat 0.2333333 0.08
n 30 100
G = 4.63282 p = 0.03136596
#######################################################################
# gammatest.R Two independent Gamma samples. Test H0: alpha1=alpha2 #
# Execute with source("gammatest.R") #
#######################################################################
#
# Define functions
#
GamLL <- function(theta,xx) # -2 Log likelihood for a simgle Gamma distribution
# Data are in xx
{
alpha <- theta[1] ; beta <- theta[2]
n <- length(xx) ; sumx <- sum(xx) ; sumlogx <- sum(log(xx))
GamLL <- 2*n*alpha*log(beta) + 2*n*lgamma(alpha) + 2*sumx/beta -
2*(alpha-1)*sumlogx
# Fix up to avoid negative alpha, beta
# The nlm function will replace Inf with the largest machine value
if(alpha <= 0) GamLL <- Inf ; if(beta <= 0) GamLL <- Inf
GamLL
} # end of function GamLL
alphaeq <- function(theta,xx,yy) # Restricted MLE for two independent Gamma
# samples. H0: alpha1=alpha2
{
alpha <- theta[1] ; beta1 <- theta[2] ; beta2 <- theta[3]
n1 <- length(xx) ; n2 <- length(yy)
sumx <- sum(xx) ; sumy <- sum(yy)
sumlogx <- sum(log(xx)) ; sumlogy <- sum(log(yy))
alphaeq <- 2*alpha*(n1*log(beta1)+n2*log(beta2)) +
2*(n1+n2)*lgamma(alpha) +
(2/beta1)*sumx + (2/beta2)*sumy -
2*(alpha-1)*(sumlogx+sumlogy)
# Fix up to avoid negative alpha, betas
# The nlm function will replace Inf with the largest machine value
if(alpha <= 0) alphaeq <- Inf
if(beta1 <= 0) alphaeq <- Inf ; if(beta2 <= 0) alphaeq <- Inf
alphaeq
} # End of function alphaeq
#
# Read the data
#
datta <- read.table("gammatest.dat")
datta[1:10,] # Look at first 10 rows
datta[120:125,] # Last 6 rows
x <- datta[,2][datta[,1]==1] # x gets numbers in the second column such
# that the number in the first column equals one.
y <- datta[,2][datta[,1]==2]
#
# Compute and print MLEs
#
# First, unrestricted. Get starting values with method of moments
#
xbar <- mean(x) ; s2 <- var(x)
astart <- xbar^2/s2 ; bstart <- s2/xbar
samp1 <- nlm(GamLL,c(astart,bstart),xx=x)[1:2]
# First element is minimum value
# Second element is pair (alphahat,betahat)
xbar <- mean(y) ; s2 <- var(y)
astart <- xbar^2/s2 ; bstart <- s2/xbar
samp2 <- nlm(GamLL,c(astart,bstart),xx=y)[1:2]
alphahat1 <- samp1$estimate[1] ; betahat1 <- samp1$estimate[2]
alphahat2 <- samp2$estimate[1] ; betahat2 <- samp2$estimate[2]
unrest <- samp1$minimum + samp2$minimum # -2LL at unrestricted MLE
#
# Now restricted. Use crude modification of the unrestricted MLEs
# as starting values.
#
astart <- (samp1$estimate[1]+samp2$estimate[1])/2 # Mean of unrestricted MLEs
b1start <- samp1$estimate[2] ; b2start <- samp1$estimate[2]
together <- nlm(alphaeq,c(astart,b1start,b2start),xx=x,yy=y)[1:2]
alphahathat <- together$estimate[1] ; betahathat1 <- together$estimate[2]
betahathat2 <- together$estimate[3]
rest <- together$minimum # -2LL at restricted MLE
#
cat("\n")
cat("Unrestricted MLE: \n")
cat(" alphahat1 = ",alphahat1," betahat1 = ",betahat1,"\n")
cat(" alphahat2 = ",alphahat2," betahat2 = ",betahat2,"\n")
cat(" -2LL at this parameter value = ",unrest,"\n")
cat("\n")
cat("\n")
cat("Restricted MLE: \n")
cat(" alphahathat = ",alphahathat,"\n")
cat(" betahathat1 = ",betahathat1," betahathat2 = ",betahathat2,"\n")
cat(" -2LL at this parameter value = ",rest,"\n")
cat("\n")
cat("\n")
# Test statistic & p-value
G <- rest-unrest ; p <- 1-pchisq(G,1)
cat("G = ",G," p = ",p,"\n")
cat("\n")
cat("\n")
################# End of program gammatest.R ####################
Here is the output.
> source("gammatest.R")
Unrestricted MLE:
alphahat1 = 1.204890 betahat1 = 2.693892
alphahat2 = 1.631417 betahat2 = 3.544673
-2LL at this parameter value = 620.5435
Restricted MLE:
alphahathat = 1.426550
betahathat1 = 2.27531 betahathat2 = 4.053725
-2LL at this parameter value = 622.253
G = 1.709518 p = 0.1910479
Warning messages:
1: NA/Inf replaced by maximum positive value
2: NaNs produced in: log(x)
3: NA/Inf replaced by maximum positive value