Simulate some highly correlated bivariate data, and plot the data.
set.seed(1)
m <- rpois(1000,50)
eps1 <- rnorm(1000)
eps2 <- rnorm(1000)
x1 <- m + eps1
x2 <- m + eps2
par(pty="s")
plot(x1, x2, xlim=c(10,90), ylim=c(10,90))
abline(c(0,1), col="blue")
The Pearson correlation and the concordance.
cor.con <- function(x,y) 2*cov(x,y)/(var(x)+var(y)+(mean(x)-mean(y))^2)
cor(x1,x2)
## [1] 0.9789568
cor.con(x1,x2)
## [1] 0.9788988
A linear transformation of x2.
x3 <- -10 + 1.25*x2
par(pty="s")
plot(x1, x3, xlim=c(10,90), ylim=c(10,90))
abline(c(0,1), col="blue")
cor(x1,x3)
## [1] 0.9789568
cor.con(x1,x3)
## [1] 0.9110233
Another linear transformation of x2.
x4 <- -40 + 1.75*x2
par(pty="s")
plot(x1, x4, xlim=c(10,90), ylim=c(10,90))
abline(c(0,1), col="blue")
cor(x1,x4)
## [1] 0.9789568
cor.con(x1,x4)
## [1] 0.8188843
Generate some hypothetical, independent log2 measurements, and plot the data. Note the difference of logs is the same as the log ratio.
set.seed(1)
x <- rnorm(1000)
y <- rnorm(1000)
r1 <- max(abs(c(x,y)))
r1 <- c(-r1,r1)
par(las=1, pty="s")
plot(x, y , xlab="log2(X)", ylab="log2(Y)", xlim=r1, ylim=r1, pch=1, cex=0.7)
Simulate and independent “reference standard”, and plot the log2 measurements relative to the standard.
z <- rnorm(1000,sd=2)
r2 <- max(abs(c(x-z,y-z)))
r2 <- c(-r2,r2)
par(las=1, pty="s")
plot(x-z, y-z , xlab="log2(X/Z)", ylab="log2(Y/Z)", xlim=r2, ylim=r2, pch=1, cex=0.7)
The respective correlations.
cor(x,y)
## [1] 0.006401211
cor(x-z,y-z)
## [1] 0.7934214
Another example.
z <- rnorm(1000,sd=1)
r3 <- max(abs(c(x-z,y-z)))
r3 <- c(-r3,r3)
par(las=1, pty="s")
plot(x-z, y-z , xlab="log2(X/Z)", ylab="log2(Y/Z)", xlim=r3, ylim=r3, pch=1, cex=0.7)
cor(x-z,y-z)
## [1] 0.4939203
Assume that some measurements in the reference standard failed, so the intensities are near zero (background), and therefore the log2 intensities are super small.
z[1:20] <- log2(runif(20,0,1e-3))
z[1:20]
## [1] -12.92858 -12.93505 -10.32844 -11.21681 -10.44795 -12.67132 -13.46826 -12.25190 -15.52666 -10.71385
## [11] -10.43458 -10.85769 -13.63112 -13.34472 -10.04536 -11.33832 -10.01398 -10.43432 -10.09490 -10.23836
r4 <- max(abs(c(x-z,y-z)))
r4 <- c(-r4,r4)
par(las=1, pty="s")
plot(x-z, y-z, xlab="log2(X/Z)", ylab="log2(Y/Z)", xlim=r4, ylim=r4, pch=1, cex=0.7)
points((x-z)[1:20], (y-z)[1:20], pch=21, bg="red", cex=0.7)
cor(x-z,y-z)
## [1] 0.7750309
To show how useful MA plots are, we simulate some (log2) gene expression data from two technical replicates, with and without artifacts.
n <- 1000
set.seed(1)
x <- runif(n,6,14)
No artifacts.
x1 <- x + rnorm(n,sd=1/x^2)
x2 <- x + rnorm(n,sd=1/x^2)
r <- max(abs(x1-x2))
r <- c(-r,r)
par(mfrow=c(1,2), las=1)
par(pty="s")
plot(x1, x2, xlab="log2(R1)", ylab="log2(R2)")
abline(c(0,1), col="red", lwd=2)
par(pty="m")
plot((x1+x2)/2, x1-x2, xlab="( log2(R1) + log2(R2) ) / 2", ylab="log2( R1 / R2 )", ylim=r)
abline(h=0, col="red", lwd=2)
Replicate 1 has on average a 5% higher abundance.
x1b <- x1 + log2(1.05)
x2b <- x2
r <- max(abs(x1b-x2b))
r <- c(-r,r)
par(mfrow=c(1,2), las=1)
par(pty="s")
plot(x1b, x2b, xlab="log2(R1)", ylab="log2(R2)")
abline(c(0,1), col="red", lwd=2)
par(pty="m")
plot((x1b+x2b)/2, x1b-x2b, xlab="( log2(R1) + log2(R2) ) / 2", ylab="log2( R1 / R2 )", ylim=r)
abline(h=0, col="red", lwd=2)
Replicate 1 has on average a 1% higher abundance.
x1b <- x1 + log2(1.01)
x2b <- x2
r <- max(abs(x1b-x2b))
r <- c(-r,r)
par(mfrow=c(1,2), las=1)
par(pty="s")
plot(x1b, x2b, xlab="log2(R1)", ylab="log2(R2)")
abline(c(0,1), col="red", lwd=2)
par(pty="m")
plot((x1b+x2b)/2, x1b-x2b, xlab="( log2(R1) + log2(R2) ) / 2", ylab="log2( R1 / R2 )", ylim=r)
abline(h=0,col="red",lwd=2)
Assume there is a bias depending on the gene abundance.
m <- mean(c(x1,x2))
x1b <- x1 - 0.01*(x1-m)
x2b <- x2
r <- max(abs(x1b-x2b))
r <- c(-r,r)
par(mfrow=c(1,2), las=1)
par(pty="s")
plot(x1b, x2b, xlab="log2(R1)", ylab="log2(R2)")
abline(c(0,1), col="red", lwd=2)
par(pty="m")
plot((x1b+x2b)/2, x1b-x2b, xlab="( log2(R1) + log2(R2) ) / 2",ylab="log2( R1 / R2 )", ylim=r)
abline(h=0, col="red", lwd=2)
Something like this we often observe in real data.
m <- mean(c(x1,x2))
x1b <- x1 - (x1-m)/(x1^2)
x2b <- x2
r <- max(abs(x1b-x2b))
r <- c(-r,r)
par(mfrow=c(1,2), las=1)
par(pty="s")
plot(x1b, x2b, xlab="log2(R1)", ylab="log2(R2)")
abline(c(0,1), col="red", lwd=2)
par(pty="m")
plot((x1b+x2b)/2, x1b-x2b, xlab="( log2(R1) + log2(R2) ) / 2", ylab="log2( R1 / R2 )", ylim=r)
abline(h=0, col="red", lwd=2)