o <- c(35,43,22)
p <- c(0.25,0.50,0.25)
e <- sum(o)*p
e
## [1] 25 50 25
Functions to calculate the test statistics.
f.xsq <- function(o,p){
e <- (p*sum(o))
sum((o-e)^2/e)
}
f.lrt <- function(o,p){
e <- (p*sum(o))
2*sum(o*log(o/e),na.rm=TRUE)
}
xsq <- f.xsq(o,p)
xsq
## [1] 5.34
lrt <- f.lrt(o,p)
lrt
## [1] 4.95762
The Multinomial distribution.
set.seed(1)
rmultinom(1,100,c(0.25,0.50,0.25))
## [,1]
## [1,] 22
## [2,] 53
## [3,] 25
rmultinom(5,100,c(0.25,0.50,0.25))
## [,1] [,2] [,3] [,4] [,5]
## [1,] 26 21 32 26 21
## [2,] 44 47 44 55 57
## [3,] 30 32 24 19 22
Simulate 10,000 test statistics under the null.
set.seed(45)
simdat <- rmultinom(10000,100,c(0.25,0.50,0.25))
dim(simdat)
## [1] 3 10000
res.xsq <- apply(simdat,2,f.xsq,p=c(0.25,0.50,0.25))
str(res.xsq)
## num [1:10000] 0.22 0.54 0.64 0.54 7.26 1.92 0.24 0.16 4.54 0.72 ...
res.lrt <- apply(simdat,2,f.lrt,p=c(0.25,0.50,0.25))
str(res.lrt)
## num [1:10000] 0.224 0.552 0.641 0.551 7.516 ...
Plot the histograms of the test statistics, and estimate the p-values.
hist(res.xsq,col="lightgrey",prob=TRUE,breaks=seq(0,20,1))
abline(v=xsq,col="red",lwd=2)
mean(res.xsq>xsq)
## [1] 0.0699
hist(res.lrt,col="lightgrey",prob=TRUE,breaks=seq(0,20,1))
abline(v=lrt,col="red",lwd=2)
mean(res.lrt>lrt)
## [1] 0.0909
x <- c(2, 2, 0, 0, 0, 0, 0, 1, 0, 0,
0, 0, 4, 0, 0, 3, 0, 0, 0, 3,
0, 5, 0, 1, 0, 1, 2, 0, 2, 1,
0, 0, 0, 0, 0, 0, 0, 0)
ob <- table(x)
ob
## x
## 0 1 2 3 4 5
## 26 4 4 2 1 1
mu <- mean(x)
mu
## [1] 0.7105263
Binning 5+ together.
dpois(0:10,mu)
## [1] 4.913855e-01 3.491423e-01 1.240374e-01 2.937728e-02 5.218333e-03 7.415526e-04 8.781543e-05 8.913597e-06
## [9] 7.916681e-07 6.250012e-08 4.440798e-09
dpois(0:4,mu)
## [1] 0.491385505 0.349142333 0.124037408 0.029377281 0.005218333
ppois(4,mu)
## [1] 0.9991609
ppois(4,mu,lower=FALSE)
## [1] 0.0008391405
ppois(4,mu)+ppois(4,mu,lower=FALSE)
## [1] 1
p <- c(dpois(0:4,mu),ppois(4,mu,lower=FALSE))
p
## [1] 0.4913855054 0.3491423328 0.1240374077 0.0293772808 0.0052183328 0.0008391405
e <- p*length(x)
e
## [1] 18.67264921 13.26740865 4.71342149 1.11633667 0.19829665 0.03188734
The parametric hypothesis tests.
lrt <- f.lrt(ob,p)
lrt
## [1] 18.76827
pchisq(lrt,4,lower=FALSE)
## [1] 0.0008727531
xsq <- f.xsq(ob,p)
xsq
## [1] 42.78971
pchisq(xsq,4,lower=FALSE)
## [1] 1.144142e-08
Inference via simulations.
n <- length(x)
n
## [1] 38
set.seed(1)
tmp <- rpois(n,mu)
tmp
## [1] 0 0 1 2 0 2 2 1 1 0 0 0 1 0 1 1 1 3 0 1 2 0 1 0 0 0 0 0 2 0 0 1 1 0 1 1 1 0
table(tmp)
## tmp
## 0 1 2 3
## 18 14 5 1
table(factor(tmp))
##
## 0 1 2 3
## 18 14 5 1
table(factor(tmp,levels=0:10))
##
## 0 1 2 3 4 5 6 7 8 9 10
## 18 14 5 1 0 0 0 0 0 0 0
tmp[tmp>5] <-5
table(factor(tmp,levels=0:5))
##
## 0 1 2 3 4 5
## 18 14 5 1 0 0
nit <- 10000
res.lrt <- rep(NA,nit)
res.xsq <- rep(NA,nit)
set.seed(1)
for(j in 1:nit){
tmp <- rpois(n,mu)
tmp[tmp>5] <-5
x <- table(factor(tmp,levels=0:5))
res.lrt[j] <- f.lrt(x,p)
res.xsq[j] <- f.xsq(x,p)
}
Plot the histograms of the test statistics, and estimate the p-values.
hist(res.lrt,col="lightgrey",prob=TRUE,breaks=101)
abline(v=lrt,col="red",lwd=2)
mean(res.lrt>lrt)
## [1] 5e-04
hist(res.xsq,col="lightgrey",prob=TRUE,breaks=101)
abline(v=xsq,col="red",lwd=2)
mean(res.xsq>xsq)
## [1] 0.0011