Statistics for Laboratory Scientists ( 140.615 )

Analysis of Variance - Model Assumptions and Diagnostics - Advanced

Example from class: IL10 cytokines

Fancier plot. Re-order the factor levels (strain) for plotting.

library(SPH.140.615)
il10 <- cbind(il10, logIL10=log10(il10$IL10))
il10$Strain <- factor(il10$Strain, levels=rev(c("A","B6",as.character(c(1,2,4:8,10:15,17:19,24:26)))))

Calculate the strain means.

m <- tapply(il10$IL10, il10$Strain, mean)
logm <- tapply(il10$logIL10, il10$Strain, mean)

Plot the data. Note: ‘segments’ adds line segments at the strain means, ‘abline’ adds horizontal lines to indicate the groups, and ‘expression’ allows you to put fancy things in the axis labels.

par(las=1, mfrow=c(1,2))
stripchart(il10$IL10~il10$Strain, 
           method="jitter", pch=1, ylab="Strain", xlab="IL10 response", jitter=0.2)
segments(m, (1:21)-0.3, m, (1:21)+0.3, lwd=2, col="blue")
abline(h=1:21, lty=2, col="gray")
stripchart(il10$logIL10~il10$Strain, 
           method="jitter", pch=1, ylab="Strain", xlab=expression(paste(log[10]," IL10 response")), jitter=0.2)
segments(logm, (1:21)-0.3, logm, (1:21)+0.3, lwd=2, col="blue")
abline(h=1:21, lty=2, col="gray")

You can generate a bunch of diagnostic plots using the ‘plot’ function.

aov.il10 <- aov(IL10 ~ Strain, data=il10)
aov.logil10 <- aov(logIL10 ~ Strain, data=il10)
par(mfrow=c(2,2))
plot(aov.il10)

plot(aov.logil10)

Bartlett’s test for equality of variances

The brute-force method for IL10.

s <- tapply(il10$IL10/1000, il10$Strain, sd)
s
##        26        25        24        19        18        17        15        14        13        12        11 
## 0.1734159 0.3655768 0.8545685 1.2901688 0.6531433 0.5424332 1.0700615 0.1712304 0.4702388 1.1606007 0.5594971 
##        10         8         7         6         5         4         2         1        B6         A 
## 0.6338465 0.8701644 0.2195076 1.2141755 1.6383134 1.6405483 0.4498755 1.9180254 0.7141536 0.5934426
n <- table(il10$Strain)
n
## 
## 26 25 24 19 18 17 15 14 13 12 11 10  8  7  6  5  4  2  1 B6  A 
##  5  5 10  5  5  5  5  5 10 10  5  5 10  5  5  5 10 10  9  9  8
spsq <- sum((n-1)*s^2)/sum(n-1)
spsq
## [1] 0.9927934
xsq <- (sum(n)-length(n)) * log(spsq) - sum((n-1)*log(s^2))
xsq
## [1] 79.8956
K <- 1 + (sum(1/(n-1)) - 1/sum(n-1))/(3*(length(n)-1))
K
## [1] 1.067525
stat <- xsq/K
stat
## [1] 74.84187
pchisq(stat, length(n)-1, lower.tail=FALSE)
## [1] 2.894759e-08

Using the built-in function.

bartlett.test(il10$IL10 ~ il10$Strain) 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  il10$IL10 by il10$Strain
## Bartlett's K-squared = 74.842, df = 20, p-value = 2.895e-08

The brute-force method for log10(IL10).

logs <- tapply(il10$logIL10, il10$Strain, sd)
logs
##         26         25         24         19         18         17         15         14         13         12 
## 0.11822716 0.29026204 0.27849516 0.25272549 0.16997770 0.19775197 0.35512698 0.05671307 0.20586828 0.34798329 
##         11         10          8          7          6          5          4          2          1         B6 
## 0.27747688 0.41181170 0.24434276 0.08611903 0.50270177 0.31693434 0.28082376 0.18508639 0.34149839 0.22880190 
##          A 
## 0.22178466
logspsq <- sum((n-1)*logs^2)/sum(n-1)
logspsq
## [1] 0.07429708
logxsq <- (sum(n)-length(n)) * log(logspsq) - sum((n-1)*log(logs^2))
logxsq
## [1] 33.99216
logstat <- logxsq/K
logstat
## [1] 31.84202
pchisq(logstat, length(n)-1, lower.tail=FALSE)
## [1] 0.04501105

Using the built-in function.

bartlett.test(il10$logIL10 ~ il10$Strain) 
## 
##  Bartlett test of homogeneity of variances
## 
## data:  il10$logIL10 by il10$Strain
## Bartlett's K-squared = 31.842, df = 20, p-value = 0.04501