Methods in Biostatistics 2 (140.652)

Fisher’s exact test

Example 1

x <- rbind(c(18, 2), c(11, 9))
x
##      [,1] [,2]
## [1,]   18    2
## [2,]   11    9
fisher.test(x) 
## 
##  Fisher's Exact Test for Count Data
## 
## data:  x
## p-value = 0.03095
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   1.147793 78.183838
## sample estimates:
## odds ratio 
##   6.994073

Example 2

x <- rbind(c(9,9), c(20, 62))
x
##      [,1] [,2]
## [1,]    9    9
## [2,]   20   62
fisher.test(x) 
## 
##  Fisher's Exact Test for Count Data
## 
## data:  x
## p-value = 0.04386
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   0.9356744 10.0979315
## sample estimates:
## odds ratio 
##   3.059762

Hypergeometric distribution

Consider an urn with 30 balls, of which 12 are white and 18 are black.

Suppose we draw 10 balls without replacement and count the number of white balls obtained.

# Simulate this
rhyper(1, 12, 18, 10) 
## [1] 6
# Simulate this 100 times
rhyper(100, 12, 18, 10)
##   [1] 4 2 5 6 2 3 4 3 5 2 4 4 5 3 5 3 4 5 4 6 6 4 3 2 5 4 5 2 3 6 7 6 5 4 3
##  [36] 6 6 5 2 4 4 6 2 3 3 2 5 3 1 5 4 3 2 4 4 3 3 4 5 5 4 4 3 4 5 5 3 5 4 4
##  [71] 2 3 3 4 3 6 3 4 3 4 5 2 4 2 4 3 7 5 4 4 4 2 4 6 5 4 2 7 4 5
# Probability of getting no white balls in the sample
dhyper(0, 12, 18, 10)
## [1] 0.001456415
# Probability of getting exactly 2 white balls in the sample
dhyper(2, 12, 18, 10)
## [1] 0.09612337
# Probability of getting 2 or fewer white balls in the sample
phyper(2, 12, 18, 10)
## [1] 0.1169986
# 2.5th and 97.5th %iles of number of white balls drawn
qhyper(c(0.025, 0.975), 12, 18, 10)
## [1] 2 6

Blood groups by state

# The data
x <- rbind(c(122, 117, 19, 244),
           c(1781, 1351, 288,3301),
           c(353, 269, 60, 713))
x
##      [,1] [,2] [,3] [,4]
## [1,]  122  117   19  244
## [2,] 1781 1351  288 3301
## [3,]  353  269   60  713

Approximate Fisher’s exact test

fisher <- function(tab, n.sim=1000, return.all=FALSE, prnt=FALSE){
  bot0 <- sum(lgamma(tab+1))

  bot <- 1:n.sim
  a <- list(rep(row(tab),tab), rep(col(tab),tab))
  for(i in 1:n.sim) {
    a[[1]] <- sample(a[[1]])
    bot[i] <- sum(lgamma(table(a)+1))
    if(prnt) { if(i == round(i/10)*10) cat(i,"\n") }
  }
  if(return.all) return(list(bot0,bot))
  mean(bot0 <= bot)
}

# Apply Fisher's exact test, using 1000 simulated tables
set.seed(5)
fisher(x) 
## [1] 0.482
# Built-in function (faster!)
fisher.test(x,simulate=TRUE,B=1000)
## 
##  Fisher's Exact Test for Count Data with simulated p-value (based
##  on 1000 replicates)
## 
## data:  x
## p-value = 0.4975
## alternative hypothesis: two.sided

End of code