Methods in Biostatistics 2 (140.652)

Confidence intervals for proportions

Tick example 1

Suppose X ~ binomial(n=29,p) and we wish to test H0: p=1/2.

Our observed data are X = 24.

# The easy way:
binom.test(24, 29)

## 
##  Exact binomial test
## 
## data:  24 and 29
## number of successes = 24, number of trials = 29, p-value =
## 0.0005461
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
##  0.6422524 0.9415439
## sample estimates:
## probability of success 
##              0.8275862

# The drawn-out way:
# Lower endpoint of rejection region:
qbinom(0.025, 29, 0.5)

## [1] 9

pbinom(9, 29, 0.5) # (9 is too big)

## [1] 0.03071417

pbinom(8, 29, 0.5) # (8 is the lower critical value)

## [1] 0.01205977

# 29-8 = 21 is the upper critical value

# Actual significance level = Pr(X <= 8 or X >= 21 | p = 1/2)
pbinom(8, 29, 0.5) + 1 - pbinom(20, 29, 0.5)

## [1] 0.02411954

# P-value for the data X = 24: 2*Pr(X >= 24 | p = 1/2)
2*(1 - pbinom(23, 29, 0.5))

## [1] 0.0005461127

Tick example 2

Suppose X ~ binomial(n=25,p) and we wish to test H0: p=1/2.

Our observed data are X = 17.

# The easy way:
binom.test(17, 25)

## 
##  Exact binomial test
## 
## data:  17 and 25
## number of successes = 17, number of trials = 25, p-value = 0.1078
## alternative hypothesis: true probability of success is not equal to 0.5
## 95 percent confidence interval:
##  0.4649993 0.8505046
## sample estimates:
## probability of success 
##                   0.68

# The drawn-out way:
# Lower endpoint of rejection region:
qbinom(0.025, 25, 0.5)

## [1] 8

pbinom(8, 25, 0.5) # (8 is too big)

## [1] 0.05387607

pbinom(7, 25, 0.5) # (7 is the lower critical value)

## [1] 0.02164263

# 25-7 = 18 is the upper critical value

# Actual significance level = Pr(X <= 7 or X >= 18 | p = 1/2)
pbinom(7, 25, 0.5) + 1 - pbinom(17, 25, 0.5)

## [1] 0.04328525

# P-value for the data X = 17: 2*Pr(X >= 17 | p = 1/2)
2*(1 - pbinom(16, 25, 0.5))

## [1] 0.1077521

Tick example 1

X ~ binomial(n=29,p), observe X = 24.

Want 95% confidence interval for p.

# The easy way:
binom.test(24,29)$conf.int

## [1] 0.6422524 0.9415439
## attr(,"conf.level")
## [1] 0.95

# The drawn-out way:
p <- seq(0, 1, by=0.001) 

# upper value:
min(p[pbinom(24, 29, p) <= 0.025])

## [1] 0.942

# lower value:
max(p[1-pbinom(23, 29, p) <= 0.025])

## [1] 0.642

Tick example 2

X ~ binomial(n=25,p), observe X = 17.

Want 95% confidence interval for p.

# The easy way:
binom.test(17,25)$conf.int

## [1] 0.4649993 0.8505046
## attr(,"conf.level")
## [1] 0.95

# The drawn-out way:
p <- seq(0, 1, by=0.001) 

# upper value:
min(p[pbinom(17, 25, p) <= 0.025])

## [1] 0.851

# lower value:
max(p[1-pbinom(16, 25, p) <= 0.025])

## [1] 0.464

The case X = 0

X ~ binomial(n=15,p), observe X = 0.

Want 95% confidence interval for p

# The easy way:
binom.test(0,15)$conf.int

## [1] 0.0000000 0.2180194
## attr(,"conf.level")
## [1] 0.95

# The direct way:
# lower limit = 0
# upper limit:
1-(0.025)^(1/15)

## [1] 0.2180194

Coverage

# parameters

p <- c(0.01,0.05,seq(0.1,0.5,by=0.1))
n <- c(10,25,50,100,250,500,1000)

cvr <- matrix(NA,nrow=length(n),ncol=length(p))

# calculate coverage

for(i in 1:length(p)){
  for(j in 1:length(n)){
    prbs <- dbinom(0:n[j],n[j],p[i])
    z <- rep(NA,n[j]+1)
    for(k in 0:n[j]){
      tmp <- binom.test(k,n[j])$conf[1:2]
      z[k+1] <- ifelse(tmp[1]<p[i]&p[i]<tmp[2],1,0)
    }
    cvr[j,i] <- sum(z*prbs)
  }
}

# output

cvr <- data.frame(cvr)
names(cvr) <- as.character(p)
row.names(cvr) <- as.character(n)
print(round(100*cvr,1))

##      0.01 0.05  0.1  0.2  0.3  0.4  0.5
## 10   99.6 98.8 98.7 99.4 98.9 98.2 97.9
## 25   99.8 99.3 99.1 97.9 97.4 97.7 95.7
## 50   98.6 98.8 97.0 96.7 96.9 97.1 96.7
## 100  98.2 98.3 95.6 96.7 96.3 95.8 96.5
## 250  98.6 97.2 96.6 96.0 95.5 95.5 95.0
## 500  98.0 96.0 96.3 95.6 95.5 95.5 95.6
## 1000 97.6 95.8 95.5 95.2 95.5 95.1 95.4

par(las=1,yaxs="i")
plot(range(p),range(cvr),type="n",xlab="p",ylab="coverage",xlim=c(0,0.5),ylim=c(0.94,1))
abline(h=0.95,lwd=2,col="red")
for(j in 1:length(n)) lines(p,cvr[j,],type="b",cex=0.5,pch=19)
axis(4,cvr[,length(p)],as.character(n),mgp=c(3,0.2,0),tcl=0,cex.axis=0.3)

# coverage - small n

n <- 25
x <- 7
binom.test(x,n)$conf

## [1] 0.1207167 0.4938768
## attr(,"conf.level")
## [1] 0.95

p <- seq(0.01,0.5,by=0.01)
cvr95 <- rep(NA,length(p))
cvr90 <- rep(NA,length(p))

for(i in 1:length(p)){
  prbs <- dbinom(0:n,n,p[i])
  z <- rep(NA,n+1)
  for(k in 0:n){
    tmp <- binom.test(k,n)$conf[1:2]
    z[k+1] <- ifelse(tmp[1]<p[i]&p[i]<tmp[2],1,0)
  }
  cvr95[i] <- sum(z*prbs)
  z <- rep(NA,n+1)
  for(k in 0:n){
    tmp <- binom.test(k,n,conf.level=0.90)$conf[1:2]
    z[k+1] <- ifelse(tmp[1]<p[i]&p[i]<tmp[2],1,0)
  }
  cvr90[i] <- sum(z*prbs)
}

par(las=1,yaxs="i")
plot(range(p),range(cvr95),type="n",xlab="p",ylab="coverage",xlim=c(0,0.5),ylim=c(0.9,1))
abline(h=0.95,lwd=2,col="red")
lines(p,cvr95,type="b",cex=0.25,pch=19)
lines(p,cvr90,type="b",cex=0.25,pch=19,col="blue")

# does it matter?

round(binom.test(x,n)$conf,2)

## [1] 0.12 0.49
## attr(,"conf.level")
## [1] 0.95

round(binom.test(x,n,conf.level=0.90)$conf,2)

## [1] 0.14 0.46
## attr(,"conf.level")
## [1] 0.9

Simulation from class

set.seed(1)
x <- 11; n1 <- 20; alpha1 <- 1; beta1 <- 1
y <- 5; n2 <- 20; alpha2 <- 1; beta2 <- 1
p1 <- rbeta(1000, x + alpha1, n1 - x + beta1)
p2 <- rbeta(1000, y + alpha2, n2 - y + beta2)
rd <- p1 - p2
quantile(rd, c(.025, .975))

##        2.5%       97.5% 
## -0.01649329  0.54630326

mean(rd)

## [1] 0.273871

median(rd)

## [1] 0.2831055

plot(density(rd))
abline(v=quantile(rd,c(0.025,0.975)),lwd=2,lty=2,col="blue")
abline(v=0,lwd=2,col="red")

Methods in Biostatistics 2 (140.652)

Confidence intervals for proportions

Tick example 1

Tick example 2

Tick example 1

Tick example 2

The case X = 0

Coverage

Simulation from class

End of code