Load the following packages:
library(limma)
library(qvalue)
library(RColorBrewer)
Set parameters:
n <- 1394 # number of proteins
d0 <- 4.428296 # prior degrees of freedom
s02 <- 0.03197716 # prior variance
nit <- 1000 # number of iterations
hm <- 100 # number of significant proteins
mv <- 0.1 # proportion of missing values (rows) in each data set (optional)
Code case and control groups, label 3 experiments, and define the design:
tr <- as.factor(rep(c(1,1,1,1,2,2,2,2),3))
ex <- as.factor(c(rep(1,8),rep(2,8),rep(3,8)))
design <- model.matrix(~ ex+tr)
Load all functions for simulations:
source("http://www.biostat.jhsph.edu/~kkammers/software/eupa/sim.functions.r")
source("http://www.biostat.jhsph.edu/~kkammers/software/eupa/sim.eb.mult.r")
source("http://www.biostat.jhsph.edu/~kkammers/software/eupa/sim.eb.md.mult.r")
Perform simulations with full data:
tp.25 <- sim.eb.mult(n,d0,s02,hm,hm.fc=1.25,nit,design)
tp.50 <- sim.eb.mult(n,d0,s02,hm,hm.fc=1.50,nit,design)
Perform simulations with missing data:
tp.md.25 <- sim.eb.md.mult(n,d0,s02,mv,hm,hm.fc=1.25,nit,design)
tp.md.50 <- sim.eb.md.mult(n,d0,s02,mv,hm,hm.fc=1.50,nit,design)
Create ROC curves for all different setups:
nfp <- 21
mfp <- 0.1
fp <- seq(0,mfp,length=nfp)
mcol <- c(brewer.pal(9,"Blues")[c(8,8)],brewer.pal(9,"Reds")[c(7,7)])
par(las=1,mar=c(5,7,5,3),xaxs="i",yaxs="i",font.lab=2,cex.lab=1.7,font.axis=2,cex.axis=1.7)
plot(c(-0.005,mfp+0.005),c(0.6,1),xlab="false positive rate",ylab="",type="n",xaxt="n",yaxt="n")
abline(v=seq(0,0.5,0.01),h=seq(0.5,1,0.05),col="lightgray",lty="dotted")
lines(fp[-1],tp.25$tp.ord[-1],col=mcol[3],type="b",cex=1.1,lwd=2,pch=15)
lines(fp[-1],tp.25$tp.mod[-1],col=mcol[1],type="b",cex=1.1,lwd=2,pch=15)
lines(fp[-1],tp.50$tp.ord[-1],col=mcol[4],type="b",cex=1.1,lwd=2,pch=19)
lines(fp[-1],tp.50$tp.mod[-1],col=mcol[2],type="b",cex=1.1,lwd=2,pch=19)
lines(fp[-1],tp.md.25$tp.ord[-1],col=mcol[3],type="b",cex=0.8,lwd=1,pch=22,bg="white")
lines(fp[-1],tp.md.25$tp.mod[-1],col=mcol[1],type="b",cex=0.8,lwd=1,pch=22,bg="white")
lines(fp[-1],tp.md.50$tp.ord[-1],col=mcol[4],type="b",cex=0.8,lwd=1,pch=21,bg="white")
lines(fp[-1],tp.md.50$tp.mod[-1],col=mcol[2],type="b",cex=0.8,lwd=1,pch=21,bg="white")
axis(1,seq(0,0.1,0.005),rep("",length(seq(0,0.1,0.005))),tck=-0.01)
axis(1,seq(0,0.1,0.01),rep("",length(seq(0,0.1,0.01))),tck=-0.02)
axis(1,seq(0,0.1,0.05),paste0(seq(0,10,5),"%"))
axis(2,seq(0.5,1,0.1),paste0(seq(50,100,10),"%"))
mtext("true positive rate",side=2,line=5,las=0,font=2,cex=1.7)
legend("bottomright",col=rep(mcol,2),pch=c(19,21,19,21,15,22,15,22),bg="white",cex=1.3,
c("50% fold change / moderated / complete",
"50% fold change / moderated / incomplete",
"50% fold change / ordinary / complete",
"50% fold change / ordinary / incomplete",
"25% fold change / moderated / complete",
"25% fold change / moderated / incomplete",
"25% fold change / ordinary / complete",
"25% fold change / ordinary / incomplete"))
eb.fit <- function(dat, design){
n <- dim(dat)[1]
fit <- lmFit(dat, design)
fit.eb <- eBayes(fit)
logFC <- fit.eb$coefficients[, 2]
df.r <- fit.eb$df.residual
df.0 <- rep(fit.eb$df.prior, n)
s2.0 <- rep(fit.eb$s2.prior, n)
s2 <- (fit.eb$sigma)^2
s2.post <- fit.eb$s2.post
t.ord <- fit.eb$coefficients[, 2]/fit.eb$sigma/fit.eb$stdev.unscaled[, 2]
t.mod <- fit.eb$t[, 2]
p.ord <- 2*pt(-abs(t.ord), fit.eb$df.residual)
p.mod <- fit.eb$p.value[, 2]
q.ord <- qvalue(p.ord)$q
q.mod <- qvalue(p.mod)$q
results.eb <- data.frame(logFC, t.ord, t.mod, p.ord, p.mod, q.ord, q.mod, df.r, df.0, s2.0, s2, s2.post)
return(results.eb)
}
eb.fit.mult <- function(dat, design){
n <- dim(dat)[1]
fit <- lmFit(dat, design)
fit.eb <- eBayes(fit)
logFC <- fit.eb$coef[, "tr2"]
df.0 <- rep(fit.eb$df.prior, n)
df.r <- fit.eb$df.residual
s2.0 <- rep(fit.eb$s2.prior, n)
s2 <- (fit.eb$sigma)^2
s2.post <- fit.eb$s2.post
t.ord <- fit.eb$coef[, "tr2"]/fit.eb$sigma/fit.eb$stdev.unscaled[, "tr2"]
t.mod <- fit.eb$t[, "tr2"]
p.ord <- 2*pt(-abs(t.ord), fit.eb$df.residual)
p.mod <- fit.eb$p.value[, "tr2"]
q.ord <- qvalue(p.ord)$q
q.mod <- qvalue(p.mod)$q
results.eb.mult <- data.frame(logFC, t.ord, t.mod, p.ord, p.mod, q.ord, q.mod, df.0, df.r, s2.0, s2, s2.post)
return(results.eb.mult)
}
# in "fit <- lmFit(dat, design)" of all NAs is taken care; only the warning message is suppressed
eb.fit.md.mult <- function(dat, design){
n <- dim(dat)[1]
suppressWarnings(fit <- lmFit(dat, design))
fit.eb <- eBayes(fit)
logFC <- fit.eb$coef[, "tr2"]
df.0 <- rep(fit.eb$df.prior, n)
df.r <- fit.eb$df.residual
s2.0 <- rep(fit.eb$s2.prior, n)
s2 <- (fit.eb$sigma)^2
s2.post <- fit.eb$s2.post
t.ord <- fit.eb$coef[, "tr2"]/fit.eb$sigma/fit.eb$stdev.unscaled[, "tr2"]
t.mod <- fit.eb$t[, "tr2"]
p.ord <- 2*pt(-abs(t.ord), fit.eb$df.residual)
p.mod <- fit.eb$p.value[, "tr2"]
q.ord <- q.mod <- rep(NA,n)
ids <- which(!is.na(p.ord))
k <- 0
q1 <- qvalue(p.ord[!is.na(p.ord)])$q
q2 <- qvalue(p.mod[!is.na(p.mod)])$q
for(i in ids){
k <- k+1
q.ord[i] <- q1[k]
q.mod[i] <- q2[k]
}
results.eb.md.mult <- data.frame(logFC, t.ord, t.mod, p.ord, p.mod, q.ord, q.mod, df.0, df.r, s2.0, s2, s2.post)
return(results.eb.md.mult)
}
sim.eb.mult <- function(n,d0,s02,hm,hm.fc,nit,design){
set.seed(1)
p.mod <- p.ord <- matrix(nrow=n,ncol=nit)
for(l in 1:nit){
# dat 1
v <- rchisq(n,d0)
v <- d0*s02/v
x <- matrix(NA,nrow=n,ncol=8)
for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
dat1 <- x
# dat 2
v <- rchisq(n,d0)
v <- d0*s02/v
x <- matrix(NA,nrow=n,ncol=8)
for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
dat2 <- x
# dat 3
v <- rchisq(n,d0)
v <- d0*s02/v
x <- matrix(NA,nrow=n,ncol=8)
for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
dat3 <- x
# limma, factorial design
dat <- data.frame(dat1, dat2, dat3)
res.eb <- eb.fit.mult(dat, design)
p.ord[,l] <- res.eb$p.ord
p.mod[,l] <- res.eb$p.mod
}
nfp <- 21
mfp <- 0.1
x.ord <- x.mod <- matrix(nrow=nfp,ncol=nit)
fp <- seq(0,mfp,length=nfp)
for(l in 1:nit){
p <- p.ord[-(1:hm),l]
cut <- quantile(p,fp)
p <- p.ord[1:hm,l]
x.ord[,l] <- unlist(lapply(lapply(cut,">",p),mean))
p <- p.mod[-(1:hm),l]
cut <- quantile(p,fp)
p <- p.mod[1:hm,l]
x.mod[,l] <- unlist(lapply(lapply(cut,">",p),mean))
}
tp.ord <- apply(x.ord,1,mean)
tp.mod <- apply(x.mod,1,mean)
tp <- data.frame(tp.ord, tp.mod)
return(tp)
}
sim.eb.md.mult <- function(n,d0,s02,mv,hm,hm.fc,nit,design){
set.seed(1)
p.mod <- p.ord <- matrix(nrow=n,ncol=nit)
for(l in 1:nit){
# dat 1
v <- rchisq(n,d0)
v <- d0*s02/v
x <- matrix(NA,nrow=n,ncol=8)
for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
# add missing values
del1 <- sample(n, n*mv) # delete the following data points in matrix
x[del1, ] <- NA
dat1 <- x
# dat 2
v <- rchisq(n,d0)
v <- d0*s02/v
x <- matrix(NA,nrow=n,ncol=8)
for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
# add missing values
del2 <- sample(n, n*mv) # delete the following data points in matrix
x[del2, ] <- NA
dat2 <- x
# dat 3
v <- rchisq(n,d0)
v <- d0*s02/v
x <- matrix(NA,nrow=n,ncol=8)
for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
# add missing values
del3 <- sample(n, n*mv) # delete the following data points in matrix
x[del3, ] <- NA
dat3 <- x
# limma, factorial design
dat <- data.frame(dat1, dat2, dat3)
res.eb <- eb.fit.md.mult(dat, design)
p.ord[,l] <- res.eb$p.ord
p.mod[,l] <- res.eb$p.mod
}
nfp <- 21
mfp <- 0.1
x.ord <- x.mod <- matrix(nrow=nfp,ncol=nit)
fp <- seq(0,mfp,length=nfp)
for(l in 1:nit){
p <- p.ord[-(1:hm),l]
cut <- quantile(p,fp, na.rm=TRUE)
p <- p.ord[1:hm,l]
x.ord[,l] <- unlist(lapply(lapply(cut,">",p),mean, na.rm=TRUE))
p <- p.mod[-(1:hm),l]
cut <- quantile(p,fp, na.rm=TRUE)
p <- p.mod[1:hm,l]
x.mod[,l] <- unlist(lapply(lapply(cut,">",p),mean, na.rm=TRUE))
}
tp.ord <- apply(x.ord,1,mean)
tp.mod <- apply(x.mod,1,mean)
tp <- data.frame(tp.ord, tp.mod)
return(tp)
}