Statistics for Laboratory Scientists ( 140.615 )

Hypothesis Testing

One sample / paired t-test

Example 1

before <- c(13.04,11.23,15.27,7.56,10.12,10.02,8.25,13.09,12.62,10.15)
after <- c(15.15,13.96,16.41,12.52,12.57,11.07,13.23,13.13,10.01,12.63)

Plot the data.

par(las=1)
plot(before,after)
abline(0,1,lwd=2,lty=2,col="blue")

Make a square plot.

r <- range(c(before,after))
r
## [1]  7.56 16.41
par(pty="s",las=1)
plot(before,after,xlim=r,ylim=r)
abline(0,1,lwd=2,lty=2,col="blue")

Bind the paired measurements in one data object.

dat <- cbind(before,after)
dat
##       before after
##  [1,]  13.04 15.15
##  [2,]  11.23 13.96
##  [3,]  15.27 16.41
##  [4,]   7.56 12.52
##  [5,]  10.12 12.57
##  [6,]  10.02 11.07
##  [7,]   8.25 13.23
##  [8,]  13.09 13.13
##  [9,]  12.62 10.01
## [10,]  10.15 12.63
par(pty="s",las=1)
plot(dat,xlim=r,ylim=r)
abline(0,1,lwd=2,lty=2,col="blue")

The differences.

d <- after-before
d
##  [1]  2.11  2.73  1.14  4.96  2.45  1.05  4.98  0.04 -2.61  2.48
apply(dat,1,diff)
##  [1]  2.11  2.73  1.14  4.96  2.45  1.05  4.98  0.04 -2.61  2.48

Plot the differences.

par(las=1)
stripchart(d,method="jitter",vertical=TRUE,pch=1,ylab="difference")
abline(h=0,lwd=2,lty=2,col="blue")

Hypothesis test for \(\mu\) = 0.

m <- mean(d)
m
## [1] 1.933
s <- sd(d)
s
## [1] 2.243777
n <- length(d)
n
## [1] 10
se <- s/sqrt(n)
se
## [1] 0.7095445
tq <- qt(0.975,n-1)
tq
## [1] 2.262157
Tstat <- m/se
Tstat
## [1] 2.724283

The test statistic is larger than the critical value, so we reject the null hypothesis.

Plot the null distribution, and calculate the p-value.

curve(dt(x,n-1),-3.5,3.5)
abline(h=0)
abline(v=c(-tq,tq),col="red",lty=2)
abline(v=Tstat,col="blue")

pt(Tstat,n-1,lower=FALSE)
## [1] 0.01172144
pt(-Tstat,n-1)
## [1] 0.01172144
2*pt(-Tstat,n-1)
## [1] 0.02344289

The built-in function.

t.test(d)
## 
##  One Sample t-test
## 
## data:  d
## t = 2.7243, df = 9, p-value = 0.02344
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.3278988 3.5381012
## sample estimates:
## mean of x 
##     1.933
t.test(after,before,paired=TRUE)
## 
##  Paired t-test
## 
## data:  after and before
## t = 2.7243, df = 9, p-value = 0.02344
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  0.3278988 3.5381012
## sample estimates:
## mean difference 
##           1.933
attributes(t.test(d))
## $names
##  [1] "statistic"   "parameter"   "p.value"     "conf.int"    "estimate"    "null.value"  "stderr"     
##  [8] "alternative" "method"      "data.name"  
## 
## $class
## [1] "htest"
t.test(d)$p.value
## [1] 0.02344289
t.test(d)$stat
##        t 
## 2.724283
t.test(d)$conf
## [1] 0.3278988 3.5381012
## attr(,"conf.level")
## [1] 0.95
t.test(d)$param
## df 
##  9

Imagine you knew that antibody levels cannot decrease (in truth) after vaccination. Carry out a one-sided test.

t.test(d,alternative="greater")
## 
##  One Sample t-test
## 
## data:  d
## t = 2.7243, df = 9, p-value = 0.01172
## alternative hypothesis: true mean is greater than 0
## 95 percent confidence interval:
##  0.6323247       Inf
## sample estimates:
## mean of x 
##     1.933

The critical value for the one-sided test.

qt(0.95,n-1)
## [1] 1.833113

Plot the null distribution, the critical value (red) and the test statistic (blue).

curve(dt(x,n-1),-3.5,3.5)
abline(h=0)
abline(v=qt(0.95,n-1),col="red",lty=2)
abline(v=Tstat,col="blue")

Note the confidence interval in the output from the t.test function. Here is how you calculate the lower bound.

mean(d) - qt(0.95,9) * sd(d) / sqrt(length(d))
## [1] 0.6323247

Example 2

x <- c(9.98,9.87,10.05,10.08,9.99,9.90)
t.test(x)
## 
##  One Sample t-test
## 
## data:  x
## t = 298.68, df = 5, p-value = 7.984e-12
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##   9.892455 10.064212
## sample estimates:
## mean of x 
##  9.978333
t.test(x)$p.val
## [1] 7.984179e-12
t.test(x)$conf
## [1]  9.892455 10.064212
## attr(,"conf.level")
## [1] 0.95
t.test(x,mu=10)
## 
##  One Sample t-test
## 
## data:  x
## t = -0.64854, df = 5, p-value = 0.5452
## alternative hypothesis: true mean is not equal to 10
## 95 percent confidence interval:
##   9.892455 10.064212
## sample estimates:
## mean of x 
##  9.978333
t.test(x,mu=10)$p.val
## [1] 0.5452379
ci <- t.test(x)$conf
ci
## [1]  9.892455 10.064212
## attr(,"conf.level")
## [1] 0.95
t.test(x,mu=ci[1])$p.val
## [1] 0.05
t.test(x,mu=ci[2])$p.val
## [1] 0.05

Two-sample t-test

Example 1

x <- c(102.5,106.6,99.8,106.5,103.7,105.5,98.2,104.1,85.6,105.5,114.0,112.2)
y <- c(93.7,90.9,100.4,92.0,100.2,104.6,95.4,96.6,99.2)

Plot the data, using a custom function. Uncomment the line calling the function install.packages if you have not yet installed devtools.

# install.packages("devtools")
library(devtools)
## Loading required package: usethis
devtools::install_github("bllfrg/SPH.140.615",quiet=TRUE)
library(SPH.140.615)
dot.plot(x,y)

Test whether the population means are the same in the two groups versus the alternative that they differ, allowing for different population standard deviations.

t.test(x,y)
## 
##  Welch Two Sample t-test
## 
## data:  x and y
## t = 2.6041, df = 18.475, p-value = 0.01769
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##   1.30124 12.06543
## sample estimates:
## mean of x mean of y 
##  103.6833   97.0000
t.test(x,y)$param
##       df 
## 18.47511
qt(0.975,t.test(x,y)$param)
## [1] 2.097056
t.test(x,y)$stat
##        t 
## 2.604066

Plot the null distribution, the critical values (red) and the test statistic (blue).

curve(dt(x,t.test(x,y)$param),-3.5,3.5)
abline(h=0)
abline(v=qt(0.025,t.test(x,y)$param),col="red",lty=2)
abline(v=qt(0.975,t.test(x,y)$param),col="red",lty=2)
abline(v=t.test(x,y)$stat,col="blue")

Example 2

x <- c(59.4,52.3,42.6,45.1,65.9,40.8)
y <- c(82.7,56.7,46.9,67.8,74.8,85.7)
dot.plot(x,y,labels=c("L","H"))

Test whether the population means are the same versus the alternative that the high salt population mean is larger than the low salt population mean, allowing for different population standard deviations.

t.test(x,y,alt="less")
## 
##  Welch Two Sample t-test
## 
## data:  x and y
## t = -2.4421, df = 8.6937, p-value = 0.01907
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
##       -Inf -4.454703
## sample estimates:
## mean of x mean of y 
##  51.01667  69.10000
qt(0.05,t.test(x,y)$param)
## [1] -1.840493
t.test(x,y)$stat
##         t 
## -2.442083

Plot the null distribution, the critical value (red) and the test statistic (blue).

curve(dt(x,t.test(x,y)$param),-3.5,3.5)
abline(h=0)
abline(v=qt(0.05,t.test(x,y)$param),col="red",lty=2)
abline(v=t.test(x,y)$stat,col="blue")

Note the confidence interval in the output from the t.test function. Here is how you calculate the upper bound.

mean(x)-mean(y) + qt(0.95,t.test(x,y)$param) * t.test(x,y)$stderr
## [1] -4.454703