x <- c(15.1,13.1,21.5)
y <- c(35.1,39.5,58.8)
par(mar=c(4,4,2,1),mfrow=c(1,2),las=1)
barplot(c(mean(x),mean(y)),width=1,space=c(0.5,0.5),
col=c("white","gray40"),xlim=c(0,3),names=c("A","B"),
ylim=c(0,76))
segments(1,mean(x),1,mean(x)+sd(x),lwd=2)
segments(0.8,mean(x)+sd(x),1.2,mean(x)+sd(x),lwd=2)
segments(2.5,mean(y),2.5,mean(y)+sd(y),lwd=2)
segments(2.3,mean(y)+sd(y),2.7,mean(y)+sd(y),lwd=2)
mtext("Bad plot",cex=1.5,line=0.5)
plot(rep(0:1,c(3,3)),c(x,y),xaxt="n",ylim=c(0,76),xlim=c(-0.5,1.5),ylab="",xlab="")
abline(v=0:1,col="gray40",lty=2)
points(rep(0:1,c(3,3)),c(x,y),lwd=2)
mtext("Good plot",cex=1.5,line=0.5)
xci <- t.test(x)$conf.int
yci <- t.test(y)$conf.int
segments(0.25,xci[1],0.25,xci[2],lwd=2,col="blue")
segments(c(0.23,0.23,0.2),c(xci,mean(x)),c(0.27,0.27,0.3),c(xci,mean(x)),lwd=2,col="blue")
segments(1-0.25,yci[1],1-0.25,yci[2],lwd=2,col="red")
segments(1-c(0.23,0.23,0.2),c(yci,mean(y)),1-c(0.27,0.27,0.3),c(yci,mean(y)),lwd=2,col="red")
u <- par("usr")
segments(0:1,u[3],0:1,u[3]-diff(u[3:4])*0.03,xpd=TRUE)
text(0:1,u[3]-diff(u[3:4])*0.08,c("A","B"),xpd=TRUE)
Simulate ten cases, ten controls, under the null of no difference in population means (here the population means are zero, and the population standard deviations are one in each group).
set.seed(5)
n <- 10
x <- rnorm(n)
y <- rnorm(n)
par(las=1)
stripchart(list(x,y),vertical=T,pch=20,col=rgb(0,0,0,0.5),cex=2,xlim=c(0.5,2.5),method="jitter",jitter=0.05)
segments(0.8,mean(x),1.2,mean(x),col="red",lwd=3)
segments(1.8,mean(y),2.2,mean(y),col="blue",lwd=3)
t.test(x,y,var.equal=TRUE)
##
## Two Sample t-test
##
## data: x and y
## t = 0.96902, df = 18, p-value = 0.3454
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4712247 1.2780496
## sample estimates:
## mean of x mean of y
## -0.07885155 -0.48226403
attributes(t.test(x,y,var.equal=TRUE))
## $names
## [1] "statistic" "parameter" "p.value" "conf.int" "estimate" "null.value" "stderr"
## [8] "alternative" "method" "data.name"
##
## $class
## [1] "htest"
t.test(x,y,var.equal=TRUE)$stat
## t
## 0.9690169
t.test(x,y,var.equal=TRUE)$p.val
## [1] 0.3453781
Do this ten times, and look at the test statistics and the p-values.
for(k in 1:10){
x <- rnorm(n)
y <- rnorm(n)
tst <- round(t.test(x,y,var.equal=TRUE)$stat,3)
pvl <- round(t.test(x,y,var.equal=TRUE)$p.val,3)
print(c(tst,pvl))
}
## t
## 0.336 0.741
## t
## 2.053 0.055
## t
## -0.016 0.987
## t
## 0.545 0.592
## t
## 0.558 0.583
## t
## -0.061 0.952
## t
## 0.775 0.448
## t
## -0.794 0.438
## t
## 0.248 0.807
## t
## 2.227 0.039
Now let’s do this 10,000 times, and make histograms of the test statistics and the p-values. Note that the type I error is protected!
nit <- 10000
p1 <- t1 <- rep(NA,nit)
set.seed(1)
for(j in 1:nit){
x <- rnorm(n)
y <- rnorm(n)
tmp <- t.test(x,y,var.equal=TRUE)
t1[j] <- tmp$stat
p1[j] <- tmp$p.value
}
mean(ifelse(p1<0.05,1,0))
## [1] 0.0528
cval <- qt(0.975,2*n-2)
mean(ifelse(t1<(-cval),1,0))
## [1] 0.0278
mean(ifelse(t1>cval,1,0))
## [1] 0.025
par(mfrow=c(1,2),las=1)
xr <- c(-6,6)
cval <- qt(0.975,2*n-2)
hist(t1,xlim=xr,ylim=c(0,0.45),prob=TRUE,col=grey(0.9),breaks=21,xlab="test statistic",ylab="",main="")
curve(dt(x,2*n-2),col="blue",lwd=3,add=TRUE)
abline(v=c(-cval,cval),lty="dotted",col="blue",lwd=1.5)
title("Under the null")
hist(p1,xlim=c(0,1),ylim=c(0,5),prob=TRUE,col="lightgray",breaks=21,xlab="p-value",ylab="",main="")
title(paste0("Significance level : ",formatC(mean(ifelse(p1<0.05,1,0)),format="f",digits=2)))
Now simulate ten cases, ten controls, under the alternative. Here the difference in population means is 0.5, and the population standard deviations are one in each group. How much power do we have to detect the difference in means?
x <- rnorm(n)
y <- rnorm(n)-0.5
par(las=1)
stripchart(list(x,y),vertical=T,pch=20,col=rgb(0,0,0,0.5),cex=2,xlim=c(0.5,2.5),method="jitter",jitter=0.05)
segments(0.8,mean(x),1.2,mean(x),col="red",lwd=3)
segments(1.8,mean(y),2.2,mean(y),col="blue",lwd=3)
t.test(x,y,var.equal=TRUE)
##
## Two Sample t-test
##
## data: x and y
## t = 2.5607, df = 18, p-value = 0.01966
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.190364 1.930136
## sample estimates:
## mean of x mean of y
## 0.4965427 -0.5637071
attributes(t.test(x,y,var.equal=TRUE))
## $names
## [1] "statistic" "parameter" "p.value" "conf.int" "estimate" "null.value" "stderr"
## [8] "alternative" "method" "data.name"
##
## $class
## [1] "htest"
t.test(x,y,var.equal=TRUE)$stat
## t
## 2.560684
t.test(x,y,var.equal=TRUE)$p.val
## [1] 0.01965551
Do this ten times, and look at the test statistics and the p-values.
for(k in 1:10){
x <- rnorm(n)
y <- rnorm(n)-0.5
tst <- round(t.test(x,y,var.equal=TRUE)$stat,3)
pvl <- round(t.test(x,y,var.equal=TRUE)$p.val,3)
print(c(tst,pvl))
}
## t
## 2.082 0.052
## t
## 1.172 0.257
## t
## 2.132 0.047
## t
## 1.114 0.280
## t
## 0.763 0.456
## t
## 2.623 0.017
## t
## -0.725 0.478
## t
## 0.330 0.745
## t
## 1.103 0.285
## t
## -0.049 0.962
Now let’s do this 10,000 times, and make histograms of the test statistics and the p-values. How often would we reject the null hypothesis?
p2 <- t2 <- rep(NA,nit)
set.seed(1)
for(j in 1:nit){
x <- rnorm(n)
y <- rnorm(n)-0.5
tmp <- t.test(x,y,var.equal=TRUE)
t2[j] <- tmp$stat
p2[j] <- tmp$p.value
}
mean(ifelse(p2<0.05,1,0))
## [1] 0.1813
cval <- qt(0.975,2*n-2)
mean(ifelse(t2<(-cval),1,0))
## [1] 0.0012
mean(ifelse(t2>cval,1,0))
## [1] 0.1801
par(mfrow=c(1,2),las=1)
cval <- qt(0.975,2*n-2)
hist(t2,xlim=xr,ylim=c(0,0.45),prob=TRUE,col=grey(0.95),breaks=21,xlab="test statistic",ylab="",main="")
curve(dt(x,2*n-2,ncp=0.5/sqrt(2/n)),col="red",lwd=3,add=TRUE)
abline(v=c(-cval,cval),lty="dotted",col="red",lwd=1.5)
title("Under the alternative")
hist(p2,xlim=c(0,1),ylim=c(0,5),prob=TRUE,col="lightgray",breaks=21,xlab="p-value",ylab="",main="")
title(paste0("Power : ",formatC(mean(ifelse(p2<0.05,1,0)),format="f",digits=2)))
Simulate five cases, five controls, under the null of no difference in population means (here the population means are zero, and the population standard deviations are one in each group). Assume we have three technical replicates for each experimental unit, with technical variability 0.1.
n1 <- 5
n2 <- 3
s2 <- 0.1
set.seed(10)
# ten measurements (the biological replicates) under the null
m <- rnorm(2*n1)
m
## [1] 0.01874617 -0.18425254 -1.37133055 -0.59916772 0.29454513 0.38979430 -1.20807618 -0.36367602
## [9] -1.62667268 -0.25647839
# repeat each measure three times (the technical replicates)
m <- rep(m,rep(n2,2*n1))
m
## [1] 0.01874617 0.01874617 0.01874617 -0.18425254 -0.18425254 -0.18425254 -1.37133055 -1.37133055
## [9] -1.37133055 -0.59916772 -0.59916772 -0.59916772 0.29454513 0.29454513 0.29454513 0.38979430
## [17] 0.38979430 0.38979430 -1.20807618 -1.20807618 -1.20807618 -0.36367602 -0.36367602 -0.36367602
## [25] -1.62667268 -1.62667268 -1.62667268 -0.25647839 -0.25647839 -0.25647839
# add experimental error (1/10th of the biological standard deviation)
m <- m+rnorm(length(m),sd=s2)
m
## [1] 0.128924121 0.094324322 -0.005077185 -0.085508072 -0.110113529 -0.175317815 -1.466824936 -1.390845588
## [9] -1.278778424 -0.550869863 -0.658798779 -0.817696400 0.227058533 0.082639007 0.168025324 0.352428145
## [17] 0.321038758 0.302578418 -1.218252276 -1.233454228 -1.393450221 -0.371470624 -0.266819383 -0.345183421
## [25] -1.764667040 -1.770224118 -1.590463959 -0.432387069 -0.288932795 -0.321634693
# plot the data
par(las=1)
plot(rep(1:10,rep(3,10)),m,pch=20,col=c(rep(rgb(1,0,0,0.5),15),rep(rgb(0,0,1,0.5),15)),cex=2,xlab="",ylab="",xaxt="n")
abline(v=5.5)
axis(1,c(3,8),c("cases","contols"))
These data are not independent - technical replicates from an experimental unit are more alike than replicates across experimental units.
Assume we ignore the dependence, and simply carry out a 2-sample t-test.
x <- m[1:(n1*n2)]
y <- m[-(1:(n1*n2))]
x
## [1] 0.128924121 0.094324322 -0.005077185 -0.085508072 -0.110113529 -0.175317815 -1.466824936 -1.390845588
## [9] -1.278778424 -0.550869863 -0.658798779 -0.817696400 0.227058533 0.082639007 0.168025324
y
## [1] 0.3524281 0.3210388 0.3025784 -1.2182523 -1.2334542 -1.3934502 -0.3714706 -0.2668194 -0.3451834
## [10] -1.7646670 -1.7702241 -1.5904640 -0.4323871 -0.2889328 -0.3216347
t.test(x,y,var.equal=TRUE)
##
## Two Sample t-test
##
## data: x and y
## t = 1.1176, df = 28, p-value = 0.2732
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2321859 0.7897906
## sample estimates:
## mean of x mean of y
## -0.3892573 -0.6680596
Do this ten times, and look at the test statistics and the p-values.
for(k in 1:10){
m <- rnorm(2*n1)
m <- rep(m,rep(n2,2*n1))
m <- m+rnorm(length(m),sd=s2)
x <- m[1:(n1*n2)]
y <- m[-(1:(n1*n2))]
tst <- round(t.test(x,y,var.equal=TRUE)$stat,3)
pvl <- round(t.test(x,y,var.equal=TRUE)$p.val,3)
print(c(tst,pvl))
}
## t
## -0.248 0.806
## t
## 0.155 0.878
## t
## 2.100 0.045
## t
## 1.450 0.158
## t
## 2.146 0.041
## t
## 3.791 0.001
## t
## 0.088 0.931
## t
## 2.519 0.018
## t
## -0.845 0.405
## t
## 3.500 0.002
Now let’s do this 10,000 times, and make histograms of the test statistics and the p-values. How often would we reject the null hypothesis?
p1 <- t1 <- rep(NA,nit)
set.seed(1)
for(j in 1:nit){
m <- rnorm(2*n1)
m <- rep(m,rep(n2,2*n1))
m <- m+rnorm(length(m),sd=s2)
x <- m[1:(n1*n2)]
y <- m[-(1:(n1*n2))]
tmp <- t.test(x,y,var.equal=TRUE)
t1[j] <- tmp$stat
p1[j] <- tmp$p.value
}
mean(ifelse(p1<0.05,1,0))
## [1] 0.3006
cval <- qt(0.975,2*n1*n2-2)
mean(ifelse(t1<(-cval),1,0))
## [1] 0.1462
mean(ifelse(t1>cval,1,0))
## [1] 0.1544
par(mfrow=c(1,2),las=1)
hist(t1,xlim=xr,ylim=c(0,0.45),prob=TRUE,col=grey(0.95),breaks=21,xlab="test statistic",ylab="",main="")
curve(dt(x,2*n1*n2-2),col="blue",lwd=3,add=TRUE)
abline(v=c(-cval,cval),lty="dotted",col="blue",lwd=1.5)
title("Ignoring dependence")
hist(p1,xlim=c(0,1),ylim=c(0,5),prob=TRUE,col="lightgray",breaks=21,xlab="p-value",ylab="",main="")
title(paste0("Significance level : ",formatC(mean(ifelse(p1<0.05,1,0)),format="f",digits=2)))
We have what’s called type I error inflation. Since we simulated under the null of no difference between group means, we should be rejecting the null on average in 5% of the simulations, not 30%.
What can we do? In case the number of technical replicates is the same for each experimental unit, we can just take the average across technical replicates, and carry out a two-sample t-test comparing the 5 case averages to the 5 control averages.
set.seed(1)
m <- rnorm(2*n1)
m <- rep(m,rep(n2,2*n1))
m <- m+rnorm(length(m),sd=s2)
m
## [1] -0.47527569 -0.58746949 -0.68857787 -0.03782666 0.29613642 0.17914996 -0.83724764 -0.74124499
## [9] -0.75350649 1.65467093 1.68717854 1.67349443 0.33696427 0.13057260 0.39149035 -0.82608126
## [17] -0.83604793 -0.96754362 0.43961405 0.52922321 0.62329701 0.72804593 0.77709187 0.73294420
## [25] 0.43807540 0.53428190 0.53635236 -0.31131973 -0.19538585 -0.22907081
m <- apply(matrix(m,byrow=TRUE,ncol=n2),1,mean)
plot(1:10,m,pch=20,col=c(rep(rgb(1,0,0,0.5),n1),rep(rgb(0,0,1,0.5),n1)),cex=2,xlab="",ylab="",xaxt="n")
abline(v=5.5)
axis(1,c(3,8),c("cases","contols"))
x <- m[1:(n1)]
y <- m[-(1:n1)]
x
## [1] -0.5837743 0.1458199 -0.7773330 1.6717813 0.2863424
y
## [1] -0.8765576 0.5307114 0.7460273 0.5029032 -0.2452588
t.test(x,y,var.equal=TRUE)
##
## Two Sample t-test
##
## data: x and y
## t = 0.032229, df = 8, p-value = 0.9751
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.199496 1.233500
## sample estimates:
## mean of x mean of y
## 0.1485672 0.1315651
Now let’s do this 10,000 times as well, and make histograms of the test statistics and the p-values. The type I error is protected! Note this only works when the number of technical replicates is the same for each experimental unit. In 140.616 you will learn what to do when this is not the case.
p2 <- t2 <- rep(NA,nit)
set.seed(1)
for(j in 1:nit){
m <- rnorm(2*n1)
m <- rep(m,rep(n2,2*n1))
m <- m+rnorm(length(m),sd=s2)
x <- m[1:(n1*n2)]
y <- m[-(1:(n1*n2))]
x <- apply(matrix(x,byrow=TRUE,ncol=n2),1,mean)
y <- apply(matrix(y,byrow=TRUE,ncol=n2),1,mean)
tmp <- t.test(x,y,var.equal=TRUE)
t2[j] <- tmp$stat
p2[j] <- tmp$p.value
}
mean(ifelse(p2<0.05,1,0))
## [1] 0.0526
cval <- qt(0.975,2*n1-2)
mean(ifelse(t2<(-cval),1,0))
## [1] 0.0259
mean(ifelse(t2>cval,1,0))
## [1] 0.0267
par(mfrow=c(1,2),las=1)
hist(t2,xlim=xr,ylim=c(0,0.45),prob=TRUE,col=grey(0.95),breaks=21,xlab="test statistic",ylab="",main="")
curve(dt(x,2*n1-2),col="blue",lwd=3,add=TRUE)
abline(v=c(-cval,cval),lty="dotted",col="blue",lwd=1.5)
title("Accounting for dependence")
hist(p2,xlim=c(0,1),ylim=c(0,5),prob=TRUE,col="lightgray",breaks=21,xlab="p-value",ylab="",main="")
title(paste0("Significance level : ",formatC(mean(ifelse(p2<0.05,1,0)),format="f",digits=2)))
Another example: when you have paired data and ignore the pairing, your type I error will be affected. Assume paired data (before and after vaccination, say) in ten experimental units, and the between unit (biological) standard deviation to be four times as large as the measurement error (1 versus 0.25). Assume there is no vaccination effect.
n <- 10
s2 <- 0.25
set.seed(1)
# sample ten measurements
m <- rnorm(n)
m
## [1] -0.6264538 0.1836433 -0.8356286 1.5952808 0.3295078 -0.8204684 0.4874291 0.7383247 0.5757814
## [10] -0.3053884
# repeat twice (before / after)
m <- rep(m,2)
m
## [1] -0.6264538 0.1836433 -0.8356286 1.5952808 0.3295078 -0.8204684 0.4874291 0.7383247 0.5757814
## [10] -0.3053884 -0.6264538 0.1836433 -0.8356286 1.5952808 0.3295078 -0.8204684 0.4874291 0.7383247
## [19] 0.5757814 -0.3053884
# add measurements error
m <- m+rnorm(length(m),sd=s2)
m
## [1] -0.2485085 0.2811041 -0.9909388 1.0416058 0.6107405 -0.8317018 0.4833815 0.9742838 0.7810867
## [10] -0.1569131 -0.3967095 0.3791774 -0.8169874 1.0979429 0.4844642 -0.8345006 0.4484802 0.3706366
## [19] 0.4562438 -0.2009030
par(las=1)
plot(c(rep(1,n),rep(2,n)),m,pch=20,col=c(rep(rgb(1,0,0,0.5),n),rep(rgb(0,0,1,0.5),n)),cex=2,
xlim=c(0.5,2.5),xlab="",ylab="",xaxt="n")
for(k in 1:n) lines(c(1,2),c(m[k],m[n+k]),lty=2)
axis(1,1:2,c("before","after"))
x <- m[1:n]
y <- m[-(1:n)]
# This is the wrong thing to do - ignoring the pairing!
t.test(x,y,var.equal=TRUE)
##
## Two Sample t-test
##
## data: x and y
## t = 0.31477, df = 18, p-value = 0.7566
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.5426378 0.7338969
## sample estimates:
## mean of x mean of y
## 0.19441402 0.09878447
# This is the correct thing to do
t.test(x,y,paired=TRUE)
##
## Paired t-test
##
## data: x and y
## t = 1.3339, df = 9, p-value = 0.215
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06655031 0.25780941
## sample estimates:
## mean of the differences
## 0.09562955
t.test(x-y)
##
## One Sample t-test
##
## data: x - y
## t = 1.3339, df = 9, p-value = 0.215
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.06655031 0.25780941
## sample estimates:
## mean of x
## 0.09562955
How often do we reject the null hypothesis if we carried out a 2-sample t-test, instead of a paired t-test (or equivalently, a one-sample t-test on the differences)? Let’s do this 10,000 times, and make histograms of the test statistics and the p-values.
p1 <- t1 <- rep(NA,nit)
p2 <- t2 <- rep(NA,nit)
set.seed(1)
for(j in 1:nit){
m <- rnorm(n)
m <- rep(m,2)
m <- m+rnorm(length(m),sd=s2)
x <- m[1:n]
y <- m[-(1:n)]
tmp <- t.test(x,y,var.equal=TRUE)
t1[j] <- tmp$stat
p1[j] <- tmp$p.value
z <- x-y
tmp <- t.test(z)
t2[j] <- tmp$stat
p2[j] <- tmp$p.value
}
mean(ifelse(p1<0.05,1,0))
## [1] 0
mean(ifelse(p2<0.05,1,0))
## [1] 0.0477
cval <- qt(0.975,2*n-2)
mean(ifelse(t1<(-cval),1,0))
## [1] 0
mean(ifelse(t1>cval,1,0))
## [1] 0
cval <- qt(0.975,n-1)
mean(ifelse(t2<(-cval),1,0))
## [1] 0.0237
mean(ifelse(t2>cval,1,0))
## [1] 0.024
par(mfrow=c(1,2),las=1)
hist(t1,xlim=xr,ylim=c(0,0.45),prob=TRUE,col=grey(0.95),breaks=21,xlab="test statistic",ylab="",main="")
curve(dt(x,2*n-2),col="blue",lwd=3,add=TRUE)
abline(v=c(-cval,cval),lty="dotted",col="blue",lwd=1.5)
title("Ignoring dependence")
hist(p1,xlim=c(0,1),ylim=c(0,5),prob=TRUE,col="lightgray",breaks=21,xlab="p-value",ylab="",main="")
title(paste0("Significance level : ",formatC(mean(ifelse(p1<0.05,1,0)),format="f",digits=2)))
hist(t2,xlim=xr,ylim=c(0,0.45),prob=TRUE,col=grey(0.95),breaks=21,xlab="test statistic",ylab="",main="")
curve(dt(x,n-1),col="blue",lwd=3,add=TRUE)
abline(v=c(-cval,cval),lty="dotted",col="blue",lwd=1.5)
title("Accounting for dependence")
hist(p2,xlim=c(0,1),ylim=c(0,5),prob=TRUE,col="lightgray",breaks=21,xlab="p-value",ylab="",main="")
title(paste0("Significance level : ",formatC(mean(ifelse(p2<0.05,1,0)),format="f",digits=2)))
When you ignore the pairing you don’t reject the null as often as you should - why is that? You fail to eliminate the between subject variability!
set.seed(1)
m <- rnorm(n)
m <- rep(m,2)
m <- m+rnorm(length(m),sd=s2)
x <- m[1:n]
y <- m[-(1:n)]
r <- range(m)
par(las=1,mfrow=c(1,2))
plot(c(rep(1,n),rep(2,n)),m,pch=20,col=c(rep(rgb(1,0,0,0.5),n),rep(rgb(0,0,1,0.5),n)),cex=2,
xlim=c(0.5,2.5),xlab="",ylab="",xaxt="n",ylim=r)
for(k in 1:n) lines(c(1,2),c(m[k],m[n+k]),lty=2)
axis(1,1:2,c("before","after"))
plot(rep(1,n),x-y,xaxt="n",xlab="",ylab="",ylim=r,pch=20,col=rgb(0,0,0,0.5),cex=2)
axis(1,1,"difference")