R Intro

STA313 F 2004 Handout 1: MLE with R
Here are a couple of command files that do numerical maximum likelihood, as follows: 

    1.Define the function that computes the likelihood, or log likelihood, or     
      minus log likelihood or whatever.  
    2.How we got the data into R -- usually a scan statement. 
    3.Listing of the data 
    4.The nlm statement and resulting output. 
    5.Calculation of the MLE using an explicit formula, if this is possible. 



# ex1.R    Bernoulli MLE (Example 1): true theta = 1/10
# execute with source("ex1.R")   at the R prompt          
cat("\n")
cat("1. Define the function BernLL \n")
BernLL <- function(theta,x) # -2 Log likelihood for Bernoulli distribution
                            # Data are in x
   {
   n <- length(x) ; sumx <- sum(x)
   BernLL <- -2*sumx*log(theta) - 2*(n-sumx)*log(1-theta)
   BernLL
   } # end of function BernLL
print(BernLL)
cat("\n")
cat("2. Read the data  \n")
ex1dat <- scan("ex1.dat")
cat("\n")
cat("3. Print the data \n")
print(ex1dat)
cat("\n")
cat("4. Minimize the function with nlm \n")
print(nlm(BernLL,.5,x=ex1dat))
cat("\n")
cat("5. Compare exact MLE \n")
print(mean(ex1dat))
# Another way to do it. The output of nlm (like most R output)
# is a list of matrices, and this list caqn be assigned toa variable
# so you can get at it easliy. This is very unlike most stats packages.
# Bernsearch <- nlm(BernLL,.5,x=ex1dat)
# print(Bernsearch)
# thetahat <- Bernsearch$estimate ; print(thetahat)
##################### End of ex1.r ##############################

> source("ex1.R")



1. Define the function BernLL
function(theta,x) # -2 Log likelihood for Bernoulli distribution
                            # Data are in x
   {
   n <- length(x) ; sumx <- sum(x)
   BernLL <- -2*sumx*log(theta) - 2*(n-sumx)*log(1-theta)
   BernLL
   } # end of function BernLL

2. Read the data
Read 100 items

3. Print the data
  [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0
 [38] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 [75] 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0

4. Minimize the function with nlm
$minimum
[1] 55.75387

$estimate
[1] 0.0799995

$gradient
[1] 1.210765e-05

$code
[1] 1

$iterations
[1] 8

In addition: Warning messages:
1: NaNs produced in: log(x)
2: NA/Inf replaced by maximum positive value
3: NaNs produced in: log(x)
4: NA/Inf replaced by maximum positive value
5: NaNs produced in: log(x)
6: NA/Inf replaced by maximum positive value
7: NaNs produced in: log(x)
8: NA/Inf replaced by maximum positive value




# ex2.R       Gamma MLE (Example 2)
# Run from the unix prompt with   R --vanilla < ex2.R > ex2.out
#
# 1. Define the function
GamLL <- function(theta,x) # -2 Log likelihood for Gamma distribution
                           # Data are in x
   {
   alpha <- theta[1] ; beta <- theta[2]
   n <- length(x) ; sumx <- sum(x) ; sumlogx <- sum(log(x))
   GamLL <- 2*n*alpha*log(beta) + 2*n*lgamma(alpha) + 2*sumx/beta -
            2*(alpha-1)*sumlogx
   GamLL
   } # end of function GamLL
#
# 2. Read the data
gamdat <- scan("ex2.dat")
# 3. Print the data
print(gamdat)
#
# 4. Minimize the function with nlm. Except, watch out!
#    The starting value can matter.
#
print(nlm(GamLL,c(1,1),x=gamdat)[1:2])
print(nlm(GamLL,c(10,10),x=gamdat)[1:2])
print(nlm(GamLL,c(100,100),x=gamdat)[1:2])
# Need good starting values - Try method of moments.
xbar <- mean(gamdat) ; s2 <- var(gamdat)
astart <- xbar^2/s2 ; bstart <- s2/xbar
print(nlm(GamLL,c(astart,bstart),x=gamdat)[1:2])
#
# 5. There is no explicit formula for the MLE, but we can look at
#    the Method of Moments estimates astart and bstart.
astart ; bstart # Invisible with source function.
##################### End of ex2.R ##############################




tuzo.erin > R --vanilla < ex2.R > ex2.out
tuzo.erin > cat ex2.out

R : Copyright 2001, The R Development Core Team
Version 1.4.0  (2001-12-19)

> # ex2.R       Gamma MLE (Example 2)
> # Run from the unix prompt with   R --vanilla < ex2.R > ex2.out
> #
> # 1. Define the function
> GamLL <- function(theta,x) # -2 Log likelihood for Gamma distribution
+                            # Data are in x
+    {
+    alpha <- theta[1] ; beta <- theta[2]
+    n <- length(x) ; sumx <- sum(x) ; sumlogx <- sum(log(x))
+    GamLL <- 2*n*alpha*log(beta) + 2*n*lgamma(alpha) + 2*sumx/beta -
+             2*(alpha-1)*sumlogx
+    GamLL
+    } # end of function GamLL
> #
> # 2. Read the data
> gamdat <- scan("ex2.dat")
Read 25 items
> # 3. Print the data
> print(gamdat)
 [1] 310.3775 389.4431 378.3313 691.3620 359.8532 352.0103 569.4055 573.7330
 [9] 362.8067 287.6562 450.0208 456.9386 362.7775 258.0109 442.6085 492.4364
[17] 375.2044 464.7285 468.0111 400.6817 238.6555 469.2342 551.1889 434.2065
[25] 303.7339
> #
> # 4. Minimize the function with nlm. Except, watch out!
> #    The starting value can matter.
> #
> print(nlm(GamLL,c(1,1),x=gamdat)[1:2])
$minimum
[1] 4249.545

$estimate
[1]   23.32949 1415.03727

> print(nlm(GamLL,c(10,10),x=gamdat)[1:2])
$minimum
[1] 302.0773

$estimate
[1] 16.16075 25.84882

> print(nlm(GamLL,c(100,100),x=gamdat)[1:2])
$minimum
[1] -905203624

$estimate
[1] -901316.8  645789.8

> # Need good starting values - Try method of moments.
> xbar <- mean(gamdat) ; s2 <- var(gamdat)
> astart <- xbar^2/s2 ; bstart <- s2/xbar
> print(nlm(GamLL,c(astart,bstart),x=gamdat)[1:2])
$minimum
[1] 302.0773

$estimate
[1] 16.16085 25.84867

> #
> # 5. There is no explicit formula for the MLE, but we can look at
> #    the Method of Moments estimates astart and bstart.
> astart ; bstart # Invisible with source function.
[1] 15.30497
[1] 27.29418
> ##################### End of ex2.R ##############################