Statistics for Laboratory Scientists ( 140.615 )

Analysis of Variance - Non-Parametric Methods - Advanced


Example from class: diets and blood coagulation times

coag <- c(62, 60, 63, 59,
          63, 67, 71, 64, 65, 66,
          68, 66, 71, 67, 68, 68,
          56, 62, 60, 61, 63, 64, 63, 59)
ttt <- factor(rep(LETTERS[1:4],c(4,6,6,8)))

The Kruskal-Wallis test by brute-force.

ranks <- rank(coag)
ranks
##  [1]  7.5  4.5 10.5  2.5 10.5 18.5 23.5 13.5 15.0 16.5 21.0 16.5 23.5 18.5 21.0 21.0  1.0  7.5  4.5  6.0 10.5 13.5 10.5  2.5
rbar <- tapply(ranks, ttt, mean)
rbar
##     A     B     C     D 
##  6.25 16.25 20.25  7.00
nt <- tapply(ranks, ttt, length)
nt
## A B C D 
## 4 6 6 8
N <- sum(nt)
N
## [1] 24
e <- (N+1)/2
e
## [1] 12.5
H <- 12/(N*(N+1))*sum(nt*(rbar-e)^2)
H
## [1] 16.86
ties <- table(coag)
ties
## coag
## 56 59 60 61 62 63 64 65 66 67 68 71 
##  1  2  2  1  2  4  2  1  2  2  3  2
D <- 1-sum(ties^3-ties)/(N^3-N)
D
## [1] 0.9908696
kw <- H/D
kw
## [1] 17.01536
pchisq(kw, length(nt)-1, lower.tail=FALSE)
## [1] 0.000701621

The Kruskal-Wallis test with the built-in function.

kruskal.test(coag ~ ttt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coag by ttt
## Kruskal-Wallis chi-squared = 17.015, df = 3, p-value = 0.0007016

Permutation tests

Functions for the permutation tests using the ANOVA and Kruskal-Wallis test statistics.

perm.aov <- function(x,g,n.perm=1000){
  obs <- anova(aov(x~g))[1,4]
  perm <- 1:n.perm
  for(i in 1:n.perm) {
    perm[i] <- anova(aov(x~sample(g)))[1,4]
  }
  return(list(obs=obs,perm=perm))
}

perm.kw <- function(x,g,n.perm=1000){
  obs <- kruskal.test(x~g)$stat
  perm <- 1:n.perm
  for(i in 1:n.perm) {
    perm[i] <- kruskal.test(x~sample(g))$stat
  }
  return(list(obs=obs,perm=perm))
}

Example from class: IL10 cytokines

library(SPH.140.615)
il10 <- cbind(il10, logIL10=log10(il10$IL10))

The permutation test with the ANOVA F-statistic, on the original scale.

set.seed(1)
pm <- perm.aov(il10$IL10, il10$Strain)
str(pm)
## List of 2
##  $ obs : num 1.7
##  $ perm: num [1:1000] 0.995 2.268 1.024 1.068 0.721 ...

The estimated p-value.

mean(pm$perm>pm$obs)
## [1] 0.041

Plot the histogram of the permuted test statistics, the observed test statistic (blue), and the parametric F distribution.

hist(pm$perm, breaks=seq(0,4,by=0.1), main="", xlab="F statistic", prob=TRUE)
abline(v=pm$obs, col="blue", lwd=2)
curve(df(x,20,125), col="red", add=TRUE, lwd=2)

Here is the permutation test with the ANOVA F-statistic, on the log10 scale.

pm <- perm.aov(il10$logIL10, il10$Strain)
mean(pm$perm>pm$obs)
## [1] 0.002
hist(pm$perm, breaks=seq(0,4,by=0.1), main="", xlab="F statistic", prob=TRUE)
abline(v=pm$obs, col="blue", lwd=2)
curve(df(x,20,125), col="red", add=TRUE, lwd=2)

Here is the permutation test with Kruskal-Wallis test statistic. Note, it does not matter whether or not you use the log-transformed data.

pm <- perm.kw(il10$logIL10, il10$Strain)
mean(pm$perm>pm$obs)
## [1] 0.003
hist(pm$perm, breaks=seq(0,50,by=1), main="", xlab="KW statistic", prob=TRUE)
abline(v=pm$obs, col="blue", lwd=2)
curve(dchisq(x,20), col="red", add=TRUE, lwd=2)

Example from class: the fake data with the outlier

datA <- data.frame(x=c(33,34,35,36,39,  30,31,32,32,33,  31,33,33,34,36),
                   g=factor(rep(c("A","B","C"), rep(5,3))))
datB <- datA
datB$x[5] <- 50

Plot the data.

set.seed(1)
par(las=1, mfrow=c(1,2))
stripchart(x ~ g, data=datA, pch=1, method="jitter", jitter=0.05, xlim=c(30,50))
abline(h=1:3, lty="dotted", col="lightgrey")
stripchart(x ~ g, data=datB, pch=1, method="jitter", jitter=0.05, xlim=c(30,50))
abline(h=1:3, lty="dotted", col="lightgrey")

The permutation test with the ANOVA F-statistic for dataset A.

set.seed(1)
pm <- perm.aov(datA$x, datA$g)
mean(pm$perm>pm$obs)
## [1] 0.019
hist(pm$perm, breaks=seq(0,10,by=0.25), main="", xlab="F statistic", prob=TRUE)
abline(v=pm$obs, col="blue", lwd=2)
curve(df(x,2,12), col="red", add=TRUE, lwd=2)

The permutation test with the ANOVA F-statistic for dataset B.

set.seed(1)
pm <- perm.aov(datB$x, datB$g)
mean(pm$perm>pm$obs)
## [1] 0.025
hist(pm$perm, breaks=seq(0,10,by=0.25), main="", xlab="F statistic", prob=TRUE)
abline(v=pm$obs, col="blue", lwd=2)
curve(df(x,2,12), col="red", add=TRUE, lwd=2)

The permutation test with the Kruskal-Wallis test statistic. Note, with the same random seed you get the same results for datasets A and B.

Here is dataset A.

set.seed(1)
pm <- perm.kw(datA$x, datA$g)
mean(pm$perm>pm$obs)
## [1] 0.009
hist(pm$perm, breaks=seq(0,10,by=0.25), main="", xlab="KW statistic", prob=TRUE)
abline(v=pm$obs, col="blue", lwd=2)
curve(dchisq(x,2), col="red", add=TRUE, lwd=2)

And here is dataset B.

set.seed(1)
pm <- perm.kw(datB$x, datB$g)
mean(pm$perm>pm$obs)
## [1] 0.009
hist(pm$perm, breaks=seq(0,10,by=0.25), main="", xlab="KW statistic", prob=TRUE)
abline(v=pm$obs, col="blue", lwd=2)
curve(dchisq(x,2), col="red", add=TRUE, lwd=2)