R code: Figure 4


Load the following packages:

library(limma)
library(qvalue)
library(RColorBrewer)


Set parameters:

n <- 1394 # number of proteins
d0 <- 4.428296 # prior degrees of freedom
s02 <- 0.03197716 # prior variance

nit <- 1000 # number of iterations
hm <- 100 # number of significant proteins
mv <- 0.1 # proportion of missing values (rows) in each data set (optional)


Code case and control groups, label 3 experiments, and define the design:

tr <- as.factor(rep(c(1,1,1,1,2,2,2,2),3))
ex <- as.factor(c(rep(1,8),rep(2,8),rep(3,8)))
design <- model.matrix(~ ex+tr)


Load all functions for simulations:

source("http://www.biostat.jhsph.edu/~kkammers/software/eupa/sim.functions.r")
source("http://www.biostat.jhsph.edu/~kkammers/software/eupa/sim.eb.mult.r")
source("http://www.biostat.jhsph.edu/~kkammers/software/eupa/sim.eb.md.mult.r")


Perform simulations with full data:

tp.25 <- sim.eb.mult(n,d0,s02,hm,hm.fc=1.25,nit,design)
tp.50 <- sim.eb.mult(n,d0,s02,hm,hm.fc=1.50,nit,design)


Perform simulations with missing data:

tp.md.25 <- sim.eb.md.mult(n,d0,s02,mv,hm,hm.fc=1.25,nit,design)
tp.md.50 <- sim.eb.md.mult(n,d0,s02,mv,hm,hm.fc=1.50,nit,design)


Create ROC curves for all different setups:

nfp <- 21
mfp <- 0.1
fp <- seq(0,mfp,length=nfp)

mcol <- c(brewer.pal(9,"Blues")[c(8,8)],brewer.pal(9,"Reds")[c(7,7)])
par(las=1,mar=c(5,7,5,3),xaxs="i",yaxs="i",font.lab=2,cex.lab=1.7,font.axis=2,cex.axis=1.7)
plot(c(-0.005,mfp+0.005),c(0.6,1),xlab="false positive rate",ylab="",type="n",xaxt="n",yaxt="n")
abline(v=seq(0,0.5,0.01),h=seq(0.5,1,0.05),col="lightgray",lty="dotted")
lines(fp[-1],tp.25$tp.ord[-1],col=mcol[3],type="b",cex=1.1,lwd=2,pch=15)
lines(fp[-1],tp.25$tp.mod[-1],col=mcol[1],type="b",cex=1.1,lwd=2,pch=15)
lines(fp[-1],tp.50$tp.ord[-1],col=mcol[4],type="b",cex=1.1,lwd=2,pch=19)
lines(fp[-1],tp.50$tp.mod[-1],col=mcol[2],type="b",cex=1.1,lwd=2,pch=19)
lines(fp[-1],tp.md.25$tp.ord[-1],col=mcol[3],type="b",cex=0.8,lwd=1,pch=22,bg="white")
lines(fp[-1],tp.md.25$tp.mod[-1],col=mcol[1],type="b",cex=0.8,lwd=1,pch=22,bg="white")
lines(fp[-1],tp.md.50$tp.ord[-1],col=mcol[4],type="b",cex=0.8,lwd=1,pch=21,bg="white")
lines(fp[-1],tp.md.50$tp.mod[-1],col=mcol[2],type="b",cex=0.8,lwd=1,pch=21,bg="white")
axis(1,seq(0,0.1,0.005),rep("",length(seq(0,0.1,0.005))),tck=-0.01)
axis(1,seq(0,0.1,0.01),rep("",length(seq(0,0.1,0.01))),tck=-0.02)
axis(1,seq(0,0.1,0.05),paste0(seq(0,10,5),"%"))
axis(2,seq(0.5,1,0.1),paste0(seq(50,100,10),"%"))
mtext("true positive rate",side=2,line=5,las=0,font=2,cex=1.7)
legend("bottomright",col=rep(mcol,2),pch=c(19,21,19,21,15,22,15,22),bg="white",cex=1.3,
      c("50% fold change  /  moderated  /  complete",
        "50% fold change  /  moderated  /  incomplete",
        "50% fold change  /  ordinary  /  complete",
        "50% fold change  /  ordinary  /  incomplete",
        "25% fold change  /  moderated  /  complete",
        "25% fold change  /  moderated  /  incomplete",
        "25% fold change  /  ordinary  /  complete",
        "25% fold change  /  ordinary  /  incomplete"))


Appendix: R code for source functions

eb.fit <- function(dat, design){
  n <- dim(dat)[1]
  fit <- lmFit(dat, design)
  fit.eb <- eBayes(fit)
  logFC <- fit.eb$coefficients[, 2]
  df.r <- fit.eb$df.residual
  df.0 <- rep(fit.eb$df.prior, n)
  s2.0 <- rep(fit.eb$s2.prior, n)
  s2 <- (fit.eb$sigma)^2
  s2.post <- fit.eb$s2.post
  t.ord <- fit.eb$coefficients[, 2]/fit.eb$sigma/fit.eb$stdev.unscaled[, 2]
  t.mod <- fit.eb$t[, 2]
  p.ord <- 2*pt(-abs(t.ord), fit.eb$df.residual)
  p.mod <- fit.eb$p.value[, 2]
  q.ord <- qvalue(p.ord)$q
  q.mod <- qvalue(p.mod)$q
  results.eb <- data.frame(logFC, t.ord, t.mod, p.ord, p.mod, q.ord, q.mod, df.r, df.0, s2.0, s2, s2.post)
  return(results.eb)
}


eb.fit.mult <- function(dat, design){
  n <- dim(dat)[1]
  fit <- lmFit(dat, design)
  fit.eb <- eBayes(fit)
  logFC <- fit.eb$coef[, "tr2"]
  df.0 <- rep(fit.eb$df.prior, n)
  df.r <- fit.eb$df.residual
  s2.0 <- rep(fit.eb$s2.prior, n)
  s2 <- (fit.eb$sigma)^2
  s2.post <- fit.eb$s2.post
  t.ord <- fit.eb$coef[, "tr2"]/fit.eb$sigma/fit.eb$stdev.unscaled[, "tr2"]
  t.mod <- fit.eb$t[, "tr2"]
  p.ord <- 2*pt(-abs(t.ord), fit.eb$df.residual)
  p.mod <- fit.eb$p.value[, "tr2"]
  q.ord <- qvalue(p.ord)$q
  q.mod <- qvalue(p.mod)$q
  results.eb.mult <- data.frame(logFC, t.ord, t.mod, p.ord, p.mod, q.ord, q.mod, df.0, df.r, s2.0, s2, s2.post)
  return(results.eb.mult)
}


#  in "fit <- lmFit(dat, design)"  of all NAs is taken care; only the warning message is suppressed
eb.fit.md.mult <- function(dat, design){
  n <- dim(dat)[1]
  suppressWarnings(fit <- lmFit(dat, design))
  fit.eb <- eBayes(fit)
  logFC <- fit.eb$coef[, "tr2"]
  df.0 <- rep(fit.eb$df.prior, n)
  df.r <- fit.eb$df.residual
  s2.0 <- rep(fit.eb$s2.prior, n)
  s2 <- (fit.eb$sigma)^2
  s2.post <- fit.eb$s2.post
  t.ord <- fit.eb$coef[, "tr2"]/fit.eb$sigma/fit.eb$stdev.unscaled[, "tr2"]
  t.mod <- fit.eb$t[, "tr2"]
  p.ord <- 2*pt(-abs(t.ord), fit.eb$df.residual)
  p.mod <- fit.eb$p.value[, "tr2"]
  q.ord <- q.mod <- rep(NA,n)
  ids <- which(!is.na(p.ord))
  k <- 0
  q1 <- qvalue(p.ord[!is.na(p.ord)])$q
  q2 <- qvalue(p.mod[!is.na(p.mod)])$q
  for(i in ids){ 
    k <- k+1
    q.ord[i] <- q1[k] 
    q.mod[i] <- q2[k]
  }
  results.eb.md.mult <- data.frame(logFC, t.ord, t.mod, p.ord, p.mod, q.ord, q.mod, df.0, df.r, s2.0, s2, s2.post)
  return(results.eb.md.mult)
}


sim.eb.mult <- function(n,d0,s02,hm,hm.fc,nit,design){
  set.seed(1)
  
  p.mod <- p.ord <- matrix(nrow=n,ncol=nit)
  for(l in 1:nit){
    
    # dat 1
    v <- rchisq(n,d0)
    v <- d0*s02/v
    x <- matrix(NA,nrow=n,ncol=8)
    for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
    x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
    dat1 <- x
    
    # dat 2
    v <- rchisq(n,d0)
    v <- d0*s02/v
    x <- matrix(NA,nrow=n,ncol=8)
    for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
    x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
    dat2 <- x
    
    # dat 3
    v <- rchisq(n,d0)
    v <- d0*s02/v
    x <- matrix(NA,nrow=n,ncol=8)
    for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
    x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
    dat3 <- x
    
    # limma, factorial design
    dat <- data.frame(dat1, dat2, dat3)
    res.eb <- eb.fit.mult(dat, design)
    p.ord[,l] <- res.eb$p.ord
    p.mod[,l] <- res.eb$p.mod
  }
  
  nfp <- 21
  mfp <- 0.1
  x.ord <- x.mod <- matrix(nrow=nfp,ncol=nit)
  fp <- seq(0,mfp,length=nfp)
  
  for(l in 1:nit){
    p <- p.ord[-(1:hm),l]
    cut <- quantile(p,fp)
    p <- p.ord[1:hm,l]
    x.ord[,l] <- unlist(lapply(lapply(cut,">",p),mean))
    p <- p.mod[-(1:hm),l]
    cut <- quantile(p,fp)
    p <- p.mod[1:hm,l]
    x.mod[,l] <- unlist(lapply(lapply(cut,">",p),mean))
  }
  
  tp.ord <- apply(x.ord,1,mean)
  tp.mod <- apply(x.mod,1,mean)

  tp <- data.frame(tp.ord, tp.mod)
  return(tp)
}


sim.eb.md.mult <- function(n,d0,s02,mv,hm,hm.fc,nit,design){
  set.seed(1)
  
  p.mod <- p.ord <- matrix(nrow=n,ncol=nit)
  for(l in 1:nit){
    
    # dat 1
    v <- rchisq(n,d0)
    v <- d0*s02/v
    x <- matrix(NA,nrow=n,ncol=8)
    for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
    x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
    # add missing values
    del1 <- sample(n, n*mv) # delete the following data points in matrix
    x[del1, ] <- NA
    dat1 <- x
    
    # dat 2
    v <- rchisq(n,d0)
    v <- d0*s02/v
    x <- matrix(NA,nrow=n,ncol=8)
    for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
    x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
    # add missing values
    del2 <- sample(n, n*mv) # delete the following data points in matrix
    x[del2, ] <- NA
    dat2 <- x
    
    # dat 3
    v <- rchisq(n,d0)
    v <- d0*s02/v
    x <- matrix(NA,nrow=n,ncol=8)
    for(k in 1:n) x[k,] <- round(rnorm(8,0,sqrt(v[k])),5)
    x[1:hm,1:4] <- x[1:hm,1:4] - log2(hm.fc)
    # add missing values
    del3 <- sample(n, n*mv) # delete the following data points in matrix
    x[del3, ] <- NA
    dat3 <- x
    
    # limma, factorial design
    dat <- data.frame(dat1, dat2, dat3)
    res.eb <- eb.fit.md.mult(dat, design)
    p.ord[,l] <- res.eb$p.ord
    p.mod[,l] <- res.eb$p.mod
  }
  
  nfp <- 21
  mfp <- 0.1
  x.ord <- x.mod <- matrix(nrow=nfp,ncol=nit)
  fp <- seq(0,mfp,length=nfp)
  
  for(l in 1:nit){
    p <- p.ord[-(1:hm),l]
    cut <- quantile(p,fp, na.rm=TRUE)
    p <- p.ord[1:hm,l]
    x.ord[,l] <- unlist(lapply(lapply(cut,">",p),mean, na.rm=TRUE))
    p <- p.mod[-(1:hm),l]
    cut <- quantile(p,fp, na.rm=TRUE)
    p <- p.mod[1:hm,l]
    x.mod[,l] <- unlist(lapply(lapply(cut,">",p),mean, na.rm=TRUE))
  }
  
  tp.ord <- apply(x.ord,1,mean)
  tp.mod <- apply(x.mod,1,mean)

  tp <- data.frame(tp.ord, tp.mod)
  return(tp)
}