Statistics for Laboratory Scientists ( 140.615 )

Multiple Comparisons - Advanced

For this part, you need to install the ‘qvalue’ package from Bioconductor. Uncomment the first two lines for the installation. Note this can take a wee bit.

# install.packages("BiocManager")
# BiocManager::install("qvalue")
library(qvalue)

Null results

# 100,000 independent p-values under the null
set.seed(1)
n <- 1e5
p <- runif(n)
hist(p,breaks=101,col="lightgrey")

hist(p,breaks=101,col="lightgrey",prob=T)
abline(h=1,lwd=2,col="red")

Example 1: enrichment for low p-values

# 100,000 independent p-values under the null
set.seed(1)
p <- runif(n)
# 5,000 p-values under some alternatives
ns <- 5000
psmall <- rbeta(ns,1,10)
hist(psmall,breaks=101,col="lightgrey",xlim=c(0,1))

# replace 5,000 null p-values at random entries
wh <- sample(1:n,size=ns)
p[wh] <- psmall
hist(p,breaks=101,col="lightgrey",prob=T)
abline(h=1,lwd=2,col="red")

# bonferroni cut-off
cut.b <- 0.05/n 
cut.b
## [1] 5e-07
min(p)
## [1] 3.895489e-06
min(p)*n
## [1] 0.3895489
which(p<cut.b)
## integer(0)
p.b <- p.adjust(p,method="bonferroni")
min(p.b)
## [1] 0.3895489
# q-values
q <- qvalue(p)
q$pi0
## [1] 0.9546252
wh <- order(q$pvalues)
cbind(q$pvalues,q$qvalues)[wh,][1:20,]
##               [,1]      [,2]
##  [1,] 3.895489e-06 0.2911962
##  [2,] 8.431729e-06 0.2911962
##  [3,] 1.139240e-05 0.2911962
##  [4,] 1.220149e-05 0.2911962
##  [5,] 1.992821e-05 0.3804794
##  [6,] 6.533717e-05 0.6379691
##  [7,] 8.663668e-05 0.6379691
##  [8,] 9.427452e-05 0.6379691
##  [9,] 9.680678e-05 0.6379691
## [10,] 9.693165e-05 0.6379691
## [11,] 1.064336e-04 0.6379691
## [12,] 1.066752e-04 0.6379691
## [13,] 1.113729e-04 0.6379691
## [14,] 1.127592e-04 0.6379691
## [15,] 1.152959e-04 0.6379691
## [16,] 1.199543e-04 0.6379691
## [17,] 1.206559e-04 0.6379691
## [18,] 1.276392e-04 0.6379691
## [19,] 1.368246e-04 0.6379691
## [20,] 1.377927e-04 0.6379691

Example 2: enrichment for low p-values

# 100,000 independent p-values under the null
set.seed(1)
p <- runif(n)
# 10 p-values under some alternatives
ns <- 10
psmall <- runif(10,0,1e-6)
# replace 10 null p-values at random entries
wh <- sample(1:n,size=ns)
p[wh] <- psmall
hist(p,breaks=101,col="lightgrey",prob=T)
abline(h=1,lwd=2,col="red")

# bonferroni cut-off
cut.b <- 0.05/n 
cut.b
## [1] 5e-07
min(p)
## [1] 3.129363e-07
min(p)*n
## [1] 0.03129363
which(p<cut.b)
## [1] 43078 78294
p[which(p<cut.b)]
## [1] 3.129363e-07 3.354800e-07
p.b <- p.adjust(p,method="bonferroni")
min(p.b)
## [1] 0.03129363
which(p.b<0.05)
## [1] 43078 78294
p.b[which(p.b<0.05)]
## [1] 0.03129363 0.03354800
# q-values
q <- qvalue(p)
wh <- order(q$pvalues)
cbind(q$pvalues,q$qvalues)[wh,][1:20,]
##               [,1]        [,2]
##  [1,] 3.129363e-07 0.008929532
##  [2,] 3.354800e-07 0.008929532
##  [3,] 5.523553e-07 0.008929532
##  [4,] 6.811237e-07 0.008929532
##  [5,] 7.005180e-07 0.008929532
##  [6,] 7.017648e-07 0.008929532
##  [7,] 7.471679e-07 0.008929532
##  [8,] 8.724921e-07 0.008929532
##  [9,] 8.878426e-07 0.008929532
## [10,] 8.929532e-07 0.008929532
## [11,] 3.895489e-06 0.035413541
## [12,] 8.431729e-06 0.070264408
## [13,] 1.139240e-05 0.087153499
## [14,] 1.220149e-05 0.087153499
## [15,] 1.992821e-05 0.132854717
## [16,] 6.533717e-05 0.408357300
## [17,] 9.427452e-05 0.548435756
## [18,] 1.064336e-04 0.548435756
## [19,] 1.113729e-04 0.548435756
## [20,] 1.152959e-04 0.548435756
# benjamini-hochberg
psort <- sort(p)
table(psort < (1:n)/n*0.05)
## 
## FALSE  TRUE 
## 99989    11
max(which(psort < (1:n)/n*0.05))
## [1] 11
psort[1:11]
##  [1] 3.129363e-07 3.354800e-07 5.523553e-07 6.811237e-07 7.005180e-07 7.017648e-07 7.471679e-07 8.724921e-07
##  [9] 8.878426e-07 8.929532e-07 3.895489e-06
p.bh <- p.adjust(p,method="BH")
which(p.bh<0.05)
##  [1]   557  1558  3597 20953 40200 43078 58792 68378 78294 90960 99242

End of code