Maximum Likelihood Estimation

Statistics for Laboratory Scientists ( 140.615 )

Example from class: Binomial log likelihood

Some Binomial probabilities.

dbinom(22,100,prob=22/100)
## [1] 0.09591895
dbinom(22,100,prob=0.10)
## [1] 0.0001977617
dbinom(22,100,prob=0.20)
## [1] 0.08489953
dbinom(22,100,prob=0.25)
## [1] 0.07493508
p <- seq(0,1,by=0.01)
p
##   [1] 0.00 0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.10 0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19 0.20
##  [22] 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.30 0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.40 0.41
##  [43] 0.42 0.43 0.44 0.45 0.46 0.47 0.48 0.49 0.50 0.51 0.52 0.53 0.54 0.55 0.56 0.57 0.58 0.59 0.60 0.61 0.62
##  [64] 0.63 0.64 0.65 0.66 0.67 0.68 0.69 0.70 0.71 0.72 0.73 0.74 0.75 0.76 0.77 0.78 0.79 0.80 0.81 0.82 0.83
##  [85] 0.84 0.85 0.86 0.87 0.88 0.89 0.90 0.91 0.92 0.93 0.94 0.95 0.96 0.97 0.98 0.99 1.00
plot(p,dbinom(22,100,prob=p))

Plot the likelihood function.

p <- seq(0.1,0.4,length=501)
plot(p,dbinom(22,100,prob=p),type="l",ylab="likelihood")
abline(v=22/100,lwd=2,col="blue")     

The log likelihood function from class.

ll <- function(x,n,p){
  x*log(p)+(n-x)*log(1-p)+log(choose(n,x))
}

The log likelihood for some values of p.

p <- seq(0,1,length=51)
p
##  [1] 0.00 0.02 0.04 0.06 0.08 0.10 0.12 0.14 0.16 0.18 0.20 0.22 0.24 0.26 0.28 0.30 0.32 0.34 0.36 0.38 0.40
## [22] 0.42 0.44 0.46 0.48 0.50 0.52 0.54 0.56 0.58 0.60 0.62 0.64 0.66 0.68 0.70 0.72 0.74 0.76 0.78 0.80 0.82
## [43] 0.84 0.86 0.88 0.90 0.92 0.94 0.96 0.98 1.00
ll(x=22,n=100,p=p)
##  [1]        -Inf  -37.293773  -23.652839  -16.374773  -11.723251   -8.528448   -6.270256   -4.672124
##  [9]   -3.569812   -2.858194   -2.466287   -2.344252   -2.456089   -2.775273   -3.282018   -3.961503
## [17]   -4.802683   -5.797473   -6.940177   -8.227097   -9.656250  -11.227188  -12.940870  -14.799608
## [25]  -16.807042  -18.968174  -21.289434  -23.778796  -26.445946  -29.302498  -32.362296  -35.641797
## [33]  -39.160569  -42.941949  -47.013904  -51.410183  -56.171868  -61.349512  -67.006142  -73.221569
## [41]  -80.098771  -87.773654  -96.430584 -106.328362 -117.846346 -131.573024 -148.494685 -170.460750
## [49] -201.623854 -255.235710        -Inf
ll(22,100,p)
##  [1]        -Inf  -37.293773  -23.652839  -16.374773  -11.723251   -8.528448   -6.270256   -4.672124
##  [9]   -3.569812   -2.858194   -2.466287   -2.344252   -2.456089   -2.775273   -3.282018   -3.961503
## [17]   -4.802683   -5.797473   -6.940177   -8.227097   -9.656250  -11.227188  -12.940870  -14.799608
## [25]  -16.807042  -18.968174  -21.289434  -23.778796  -26.445946  -29.302498  -32.362296  -35.641797
## [33]  -39.160569  -42.941949  -47.013904  -51.410183  -56.171868  -61.349512  -67.006142  -73.221569
## [41]  -80.098771  -87.773654  -96.430584 -106.328362 -117.846346 -131.573024 -148.494685 -170.460750
## [49] -201.623854 -255.235710        -Inf

The plot.

p <- seq(0,1,length=501)
plot(p,ll(22,100,p),type="l",xlab="p",ylab="log likelihood",main="n=100, x=22")

p <- seq(0.1,0.4,length=501)
plot(p,ll(22,100,p),type="l",xlab="p",ylab="log likelihood",main="n=100, x=22")
abline(v=22/100,lwd=2,col="blue")

Note: you can also use the dbinom() function with argument log=TRUE.

p <- seq(0.1,0.4,by=0.01)
p
##  [1] 0.10 0.11 0.12 0.13 0.14 0.15 0.16 0.17 0.18 0.19 0.20 0.21 0.22 0.23 0.24 0.25 0.26 0.27 0.28 0.29 0.30
## [22] 0.31 0.32 0.33 0.34 0.35 0.36 0.37 0.38 0.39 0.40
ll(22,100,p)
##  [1] -8.528448 -7.303141 -6.270256 -5.400755 -4.672124 -4.066572 -3.569812 -3.170213 -2.858194 -2.625783
## [11] -2.466287 -2.374048 -2.344252 -2.372779 -2.456089 -2.591133 -2.775273 -3.006227 -3.282018 -3.600936
## [21] -3.961503 -4.362448 -4.802683 -5.281284 -5.797473 -6.350610 -6.940177 -7.565771 -8.227097 -8.923957
## [31] -9.656250
dbinom(22,100,p)
##  [1] 1.977617e-04 6.734200e-04 1.891744e-03 4.513172e-03 9.352386e-03 1.713603e-02 2.816115e-02 4.199464e-02
##  [9] 5.737227e-02 7.238309e-02 8.489953e-02 9.310307e-02 9.591895e-02 9.322135e-02 8.576971e-02 7.493508e-02
## [17] 6.233245e-02 4.947802e-02 3.755241e-02 2.729817e-02 1.903449e-02 1.274714e-02 8.207694e-03 5.085899e-03
## [25] 3.035216e-03 1.745682e-03 9.680982e-04 5.178777e-04 2.673113e-04 1.331603e-04 6.402414e-05
dbinom(22,100,p,log=TRUE)
##  [1] -8.528448 -7.303141 -6.270256 -5.400755 -4.672124 -4.066572 -3.569812 -3.170213 -2.858194 -2.625783
## [11] -2.466287 -2.374048 -2.344252 -2.372779 -2.456089 -2.591133 -2.775273 -3.006227 -3.282018 -3.600936
## [21] -3.961503 -4.362448 -4.802683 -5.281284 -5.797473 -6.350610 -6.940177 -7.565771 -8.227097 -8.923957
## [31] -9.656250
plot(p,dbinom(22,100,prob=p,log=TRUE),type="l",ylab="log likelihood")
abline(v=22/100,lwd=2,col="blue")     

Also note: you can also omit the constant in the log likelihood function (e.g., the last part that does not depend on p).

ll2 <- function(x,n,p){
  x*log(p)+(n-x)*log(1-p)
}

p <- seq(0.1,0.4,length=501)
plot(p,ll2(22,100,p),type="l",xlab="p",ylab="log likelihood")
abline(v=22/100,lwd=2,col="blue")

Example from class: Poisson log likelihood

The log likelihood function.

ll <- function(x,lambda){
  -length(x)*lambda+sum(x)*log(lambda)-sum(log(gamma(x+1)))
}

The log likelihood for some values of lambda.

x <- c(1,1,0,1,2,5,2,4,0,0,1,0,3,1,0,0,1,1,3,4)
x
##  [1] 1 1 0 1 2 5 2 4 0 0 1 0 3 1 0 0 1 1 3 4
table(x)
## x
## 0 1 2 3 4 5 
## 6 7 2 2 2 1
length(x)
## [1] 20
mean(x)
## [1] 1.5
lambda <- seq(0.5,4.5,by=0.1)
lambda
##  [1] 0.5 0.6 0.7 0.8 0.9 1.0 1.1 1.2 1.3 1.4 1.5 1.6 1.7 1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 2.9 3.0
## [27] 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 4.0 4.1 4.2 4.3 4.4 4.5
ll(x,lambda)
##  [1] -46.90783 -43.43818 -40.81366 -38.80772 -37.27423 -36.11341 -35.25411 -34.64377 -34.24248 -34.01925
## [11] -33.94946 -34.01330 -34.19457 -34.47981 -34.85780 -35.31900 -35.85529 -36.45969 -37.12614 -37.84935
## [21] -38.62469 -39.44807 -40.31586 -41.22483 -42.17209 -43.15504 -44.17135 -45.21889 -46.29574 -47.40015
## [31] -48.53052 -49.68540 -50.86343 -52.06338 -53.28412 -54.52458 -55.78380 -57.06088 -58.35496 -59.66528
## [41] -60.99109

The plot.

lambda <- seq(0.5,4,length=501)
plot(lambda,ll(x,lambda),type="l",xlab=expression(lambda),ylab="log likelihood",main="n=20, mean=1.5")
abline(v=mean(x),lwd=2,col="blue")

Note: you can also use the dpois() function with argument log=TRUE.

ll(x,lambda=1.5)
## [1] -33.94946
dpois(x,lambda=1.5)
##  [1] 0.33469524 0.33469524 0.22313016 0.33469524 0.25102143 0.01411996 0.25102143 0.04706652 0.22313016
## [10] 0.22313016 0.33469524 0.22313016 0.12551072 0.33469524 0.22313016 0.22313016 0.33469524 0.33469524
## [19] 0.12551072 0.04706652
dpois(x,lambda=1.5,log=TRUE)
##  [1] -1.094535 -1.094535 -1.500000 -1.094535 -1.382217 -4.260166 -1.382217 -3.056193 -1.500000 -1.500000
## [11] -1.094535 -1.500000 -2.075364 -1.094535 -1.500000 -1.500000 -1.094535 -1.094535 -2.075364 -3.056193
sum(dpois(x,lambda=1.5,log=TRUE))
## [1] -33.94946