STA313 F 2004 Handout 1: MLE with R
Here are a couple of command files that do numerical maximum likelihood, as follows: 1.Define the function that computes the likelihood, or log likelihood, or minus log likelihood or whatever. 2.How we got the data into R -- usually a scan statement. 3.Listing of the data 4.The nlm statement and resulting output. 5.Calculation of the MLE using an explicit formula, if this is possible. # ex1.R Bernoulli MLE (Example 1): true theta = 1/10 # execute with source("ex1.R") at the R prompt cat("\n") cat("1. Define the function BernLL \n") BernLL <- function(theta,x) # -2 Log likelihood for Bernoulli distribution # Data are in x { n <- length(x) ; sumx <- sum(x) BernLL <- -2*sumx*log(theta) - 2*(n-sumx)*log(1-theta) BernLL } # end of function BernLL print(BernLL) cat("\n") cat("2. Read the data \n") ex1dat <- scan("ex1.dat") cat("\n") cat("3. Print the data \n") print(ex1dat) cat("\n") cat("4. Minimize the function with nlm \n") print(nlm(BernLL,.5,x=ex1dat)) cat("\n") cat("5. Compare exact MLE \n") print(mean(ex1dat)) # Another way to do it. The output of nlm (like most R output) # is a list of matrices, and this list caqn be assigned toa variable # so you can get at it easliy. This is very unlike most stats packages. # Bernsearch <- nlm(BernLL,.5,x=ex1dat) # print(Bernsearch) # thetahat <- Bernsearch$estimate ; print(thetahat) ##################### End of ex1.r ############################## > source("ex1.R") 1. Define the function BernLL function(theta,x) # -2 Log likelihood for Bernoulli distribution # Data are in x { n <- length(x) ; sumx <- sum(x) BernLL <- -2*sumx*log(theta) - 2*(n-sumx)*log(1-theta) BernLL } # end of function BernLL 2. Read the data Read 100 items 3. Print the data [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 [75] 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 4. Minimize the function with nlm $minimum [1] 55.75387 $estimate [1] 0.0799995 $gradient [1] 1.210765e-05 $code [1] 1 $iterations [1] 8 In addition: Warning messages: 1: NaNs produced in: log(x) 2: NA/Inf replaced by maximum positive value 3: NaNs produced in: log(x) 4: NA/Inf replaced by maximum positive value 5: NaNs produced in: log(x) 6: NA/Inf replaced by maximum positive value 7: NaNs produced in: log(x) 8: NA/Inf replaced by maximum positive value # ex2.R Gamma MLE (Example 2) # Run from the unix prompt with R --vanilla < ex2.R > ex2.out # # 1. Define the function GamLL <- function(theta,x) # -2 Log likelihood for Gamma distribution # Data are in x { alpha <- theta[1] ; beta <- theta[2] n <- length(x) ; sumx <- sum(x) ; sumlogx <- sum(log(x)) GamLL <- 2*n*alpha*log(beta) + 2*n*lgamma(alpha) + 2*sumx/beta - 2*(alpha-1)*sumlogx GamLL } # end of function GamLL # # 2. Read the data gamdat <- scan("ex2.dat") # 3. Print the data print(gamdat) # # 4. Minimize the function with nlm. Except, watch out! # The starting value can matter. # print(nlm(GamLL,c(1,1),x=gamdat)[1:2]) print(nlm(GamLL,c(10,10),x=gamdat)[1:2]) print(nlm(GamLL,c(100,100),x=gamdat)[1:2]) # Need good starting values - Try method of moments. xbar <- mean(gamdat) ; s2 <- var(gamdat) astart <- xbar^2/s2 ; bstart <- s2/xbar print(nlm(GamLL,c(astart,bstart),x=gamdat)[1:2]) # # 5. There is no explicit formula for the MLE, but we can look at # the Method of Moments estimates astart and bstart. astart ; bstart # Invisible with source function. ##################### End of ex2.R ############################## tuzo.erin > R --vanilla < ex2.R > ex2.out tuzo.erin > cat ex2.out R : Copyright 2001, The R Development Core Team Version 1.4.0 (2001-12-19) > # ex2.R Gamma MLE (Example 2) > # Run from the unix prompt with R --vanilla < ex2.R > ex2.out > # > # 1. Define the function > GamLL <- function(theta,x) # -2 Log likelihood for Gamma distribution + # Data are in x + { + alpha <- theta[1] ; beta <- theta[2] + n <- length(x) ; sumx <- sum(x) ; sumlogx <- sum(log(x)) + GamLL <- 2*n*alpha*log(beta) + 2*n*lgamma(alpha) + 2*sumx/beta - + 2*(alpha-1)*sumlogx + GamLL + } # end of function GamLL > # > # 2. Read the data > gamdat <- scan("ex2.dat") Read 25 items > # 3. Print the data > print(gamdat) [1] 310.3775 389.4431 378.3313 691.3620 359.8532 352.0103 569.4055 573.7330 [9] 362.8067 287.6562 450.0208 456.9386 362.7775 258.0109 442.6085 492.4364 [17] 375.2044 464.7285 468.0111 400.6817 238.6555 469.2342 551.1889 434.2065 [25] 303.7339 > # > # 4. Minimize the function with nlm. Except, watch out! > # The starting value can matter. > # > print(nlm(GamLL,c(1,1),x=gamdat)[1:2]) $minimum [1] 4249.545 $estimate [1] 23.32949 1415.03727 > print(nlm(GamLL,c(10,10),x=gamdat)[1:2]) $minimum [1] 302.0773 $estimate [1] 16.16075 25.84882 > print(nlm(GamLL,c(100,100),x=gamdat)[1:2]) $minimum [1] -905203624 $estimate [1] -901316.8 645789.8 > # Need good starting values - Try method of moments. > xbar <- mean(gamdat) ; s2 <- var(gamdat) > astart <- xbar^2/s2 ; bstart <- s2/xbar > print(nlm(GamLL,c(astart,bstart),x=gamdat)[1:2]) $minimum [1] 302.0773 $estimate [1] 16.16085 25.84867 > # > # 5. There is no explicit formula for the MLE, but we can look at > # the Method of Moments estimates astart and bstart. > astart ; bstart # Invisible with source function. [1] 15.30497 [1] 27.29418 > ##################### End of ex2.R ##############################