STA313 F 2004 Handout 2
Large-Sample Likelihood Ratio Tests with R
Let X1, ..., Xn be a random sample from a Bernoulli distribution with parameter theta. Want to test H0: theta=theta0. When theta0=1/2, this is called a sign test. Here is a function that takes the sample proportion theta-hat, the sample size n and theta0 as input, and returns G, df (always 1) and the p-value.
proptest <- function(thetahat,nn,theta0=1/2) # Test whether a sample proportion thetahat differs from theta0; # nn is the sample size { proptest <- numeric(3) names(proptest) <- c("G","df","p-value") proptest <- cbind(proptest) # Makes it a 3x1 matrix - nicer for display sumx <- nn*thetahat ;xbar <- thetahat proptest[1] <- -2*( sumx*log(theta0) + (nn-sumx)*log(1-theta0) ) - -2*( sumx*log(xbar) + (nn-sumx)*log(1-xbar) ) proptest[2] <-1 proptest[3] <- 1-pchisq(proptest[1],1) proptest } # End of function proptest > > proptest(.6,100,.75) proptest G 10.823064182 df 1.000000000 p-value 0.001002434 > > proptest(.6,10,.5) proptest G 0.4027103 df 1.0000000 p-value 0.5256929 > > proptest(.6,10) proptest G 0.4027103 df 1.0000000 p-value 0.5256929 > > proptest(.6,100) proptest G 4.02710271 df 1.00000000 p-value 0.04477477 > > proptest(.6,1000) proptest G 4.027103e+01 df 1.000000e+00 p-value 2.210632e-10 >
########################################################################### # berntest2.R Two independent Bernoulli samples. Test H0: theta1=theta2 # # Execute with source("berntest2.R") # ########################################################################### # # Re-use the function BernLL # BernLL <- function(theta,xx) # -2 Log likelihood for Bernoulli distribution # Data are in xx { n <- length(xx) ; sumx <- sum(xx) BernLL <- -2*sumx*log(theta) - 2*(n-sumx)*log(1-theta) BernLL } # end of function BernLL # # Read the data. The file bern2.dat has two columns. The first column is # 1 for Group 1 or 2 for Group 2. The second column has 0s and 1s. We # could pretend that 1 means the customer bought the product, and 0 means # she did not. # datamat <- read.table("bern2.dat") alldat <- datamat[,2] # Just the second column x <- alldat[datamat[,1]==1] # x gets numbers in the second column such that the number in the first column # that the number in the first column equals one. y <- alldat[datamat[,1]==2] xbar1 <- mean(x) ; xbar2 <- mean(y) # Unrestricted MLEs xbar <- mean(alldat) # Restricted MLE cat("\n") cat(" Group 1 Group 2 \n") cat("Thetahat ",xbar1," ",xbar2,"\n") cat("n ",length(x)," ",length(y),"\n") cat("\n") rest <- BernLL(xbar,alldat) # -2LL at unrestricted MLE unrest <- BernLL(xbar1,x) + BernLL(xbar2,y) # -2LL at restricted MLE G <- rest-unrest ; p <- 1-pchisq(G,1) # Test statistic & p-value cat("G = ",G," p = ",p,"\n") cat("\n") cat("\n") ################# End of program berntest2.R #################### > source("berntest2.R") Group 1 Group 2 Thetahat 0.2333333 0.08 n 30 100 G = 4.63282 p = 0.03136596
####################################################################### # gammatest.R Two independent Gamma samples. Test H0: alpha1=alpha2 # # Execute with source("gammatest.R") # ####################################################################### # # Define functions # GamLL <- function(theta,xx) # -2 Log likelihood for a simgle Gamma distribution # Data are in xx { alpha <- theta[1] ; beta <- theta[2] n <- length(xx) ; sumx <- sum(xx) ; sumlogx <- sum(log(xx)) GamLL <- 2*n*alpha*log(beta) + 2*n*lgamma(alpha) + 2*sumx/beta - 2*(alpha-1)*sumlogx # Fix up to avoid negative alpha, beta # The nlm function will replace Inf with the largest machine value if(alpha <= 0) GamLL <- Inf ; if(beta <= 0) GamLL <- Inf GamLL } # end of function GamLL alphaeq <- function(theta,xx,yy) # Restricted MLE for two independent Gamma # samples. H0: alpha1=alpha2 { alpha <- theta[1] ; beta1 <- theta[2] ; beta2 <- theta[3] n1 <- length(xx) ; n2 <- length(yy) sumx <- sum(xx) ; sumy <- sum(yy) sumlogx <- sum(log(xx)) ; sumlogy <- sum(log(yy)) alphaeq <- 2*alpha*(n1*log(beta1)+n2*log(beta2)) + 2*(n1+n2)*lgamma(alpha) + (2/beta1)*sumx + (2/beta2)*sumy - 2*(alpha-1)*(sumlogx+sumlogy) # Fix up to avoid negative alpha, betas # The nlm function will replace Inf with the largest machine value if(alpha <= 0) alphaeq <- Inf if(beta1 <= 0) alphaeq <- Inf ; if(beta2 <= 0) alphaeq <- Inf alphaeq } # End of function alphaeq # # Read the data # datta <- read.table("gammatest.dat") datta[1:10,] # Look at first 10 rows datta[120:125,] # Last 6 rows x <- datta[,2][datta[,1]==1] # x gets numbers in the second column such # that the number in the first column equals one. y <- datta[,2][datta[,1]==2] # # Compute and print MLEs # # First, unrestricted. Get starting values with method of moments # xbar <- mean(x) ; s2 <- var(x) astart <- xbar^2/s2 ; bstart <- s2/xbar samp1 <- nlm(GamLL,c(astart,bstart),xx=x)[1:2] # First element is minimum value # Second element is pair (alphahat,betahat) xbar <- mean(y) ; s2 <- var(y) astart <- xbar^2/s2 ; bstart <- s2/xbar samp2 <- nlm(GamLL,c(astart,bstart),xx=y)[1:2] alphahat1 <- samp1$estimate[1] ; betahat1 <- samp1$estimate[2] alphahat2 <- samp2$estimate[1] ; betahat2 <- samp2$estimate[2] unrest <- samp1$minimum + samp2$minimum # -2LL at unrestricted MLE # # Now restricted. Use crude modification of the unrestricted MLEs # as starting values. # astart <- (samp1$estimate[1]+samp2$estimate[1])/2 # Mean of unrestricted MLEs b1start <- samp1$estimate[2] ; b2start <- samp1$estimate[2] together <- nlm(alphaeq,c(astart,b1start,b2start),xx=x,yy=y)[1:2] alphahathat <- together$estimate[1] ; betahathat1 <- together$estimate[2] betahathat2 <- together$estimate[3] rest <- together$minimum # -2LL at restricted MLE # cat("\n") cat("Unrestricted MLE: \n") cat(" alphahat1 = ",alphahat1," betahat1 = ",betahat1,"\n") cat(" alphahat2 = ",alphahat2," betahat2 = ",betahat2,"\n") cat(" -2LL at this parameter value = ",unrest,"\n") cat("\n") cat("\n") cat("Restricted MLE: \n") cat(" alphahathat = ",alphahathat,"\n") cat(" betahathat1 = ",betahathat1," betahathat2 = ",betahathat2,"\n") cat(" -2LL at this parameter value = ",rest,"\n") cat("\n") cat("\n") # Test statistic & p-value G <- rest-unrest ; p <- 1-pchisq(G,1) cat("G = ",G," p = ",p,"\n") cat("\n") cat("\n") ################# End of program gammatest.R ####################
Here is the output.
> source("gammatest.R") Unrestricted MLE: alphahat1 = 1.204890 betahat1 = 2.693892 alphahat2 = 1.631417 betahat2 = 3.544673 -2LL at this parameter value = 620.5435 Restricted MLE: alphahathat = 1.426550 betahathat1 = 2.27531 betahathat2 = 4.053725 -2LL at this parameter value = 622.253 G = 1.709518 p = 0.1910479 Warning messages: 1: NA/Inf replaced by maximum positive value 2: NaNs produced in: log(x) 3: NA/Inf replaced by maximum positive value