Statistics for Laboratory Scientists ( 140.615 )

Analysis of Variance - Non-Parametric Methods

Example from class: IL10 cytokines

Read the data, and include log10 of IL10 as a column.

library(SPH.140.615)
il10 <- cbind(il10, logIL10=log10(il10$IL10))

The analysis of variance tables.

aov.il10 <- aov(IL10 ~ Strain, data=il10)
anova(aov.il10)
## Analysis of Variance Table
## 
## Response: IL10
##            Df    Sum Sq Mean Sq F value  Pr(>F)  
## Strain     20  33757424 1687871  1.7001 0.04154 *
## Residuals 125 124099169  992793                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aov.logil10 <- aov(logIL10 ~ Strain, data=il10)
anova(aov.logil10)
## Analysis of Variance Table
## 
## Response: logIL10
##            Df Sum Sq  Mean Sq F value   Pr(>F)   
## Strain     20 3.3476 0.167380  2.2528 0.003575 **
## Residuals 125 9.2871 0.074297                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The Kruskal-Wallis test.

kruskal.test(IL10 ~ Strain, data=il10)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  IL10 by Strain
## Kruskal-Wallis chi-squared = 41.325, df = 20, p-value = 0.003383
kruskal.test(logIL10 ~ Strain, data=il10)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  logIL10 by Strain
## Kruskal-Wallis chi-squared = 41.325, df = 20, p-value = 0.003383

Example from class: diets and blood coagulation times

coag <- c(62, 60, 63, 59,
          63, 67, 71, 64, 65, 66,
          68, 66, 71, 67, 68, 68,
          56, 62, 60, 61, 63, 64, 63, 59)
ttt <- factor(rep(LETTERS[1:4],c(4,6,6,8)))

The analysis of variance table.

summary(aov(coag~ttt)) 
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## ttt          3    228    76.0   13.57 4.66e-05 ***
## Residuals   20    112     5.6                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The Kruskal-Wallis test by brute-force.

ranks <- rank(coag)
ranks
##  [1]  7.5  4.5 10.5  2.5 10.5 18.5 23.5 13.5 15.0 16.5 21.0 16.5 23.5 18.5 21.0 21.0  1.0  7.5  4.5  6.0 10.5
## [22] 13.5 10.5  2.5
rbar <- tapply(ranks, ttt, mean)
rbar
##     A     B     C     D 
##  6.25 16.25 20.25  7.00
nt <- tapply(ranks, ttt, length)
nt
## A B C D 
## 4 6 6 8
N <- sum(nt)
N
## [1] 24
e <- (N+1)/2
e
## [1] 12.5
H <- 12/(N*(N+1))*sum(nt*(rbar-e)^2)
H
## [1] 16.86
ties <- table(coag)
ties
## coag
## 56 59 60 61 62 63 64 65 66 67 68 71 
##  1  2  2  1  2  4  2  1  2  2  3  2
D <- 1-sum(ties^3-ties)/(N^3-N)
D
## [1] 0.9908696
kw <- H/D
kw
## [1] 17.01536
pchisq(kw, length(nt)-1, lower.tail=FALSE)
## [1] 0.000701621

The Kruskal-Wallis test with the built-in function.

kruskal.test(coag ~ ttt)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  coag by ttt
## Kruskal-Wallis chi-squared = 17.015, df = 3, p-value = 0.0007016

Example from class: the fake data with the outlier

Read and plot the data.

datA <- data.frame(x=c(33,34,35,36,39,  30,31,32,32,33,  31,33,33,34,36),
                   g=factor(rep(c("A","B","C"), rep(5,3))))
datB <- datA
datB$x[5] <- 50

set.seed(1)
par(las=1, mfrow=c(1,2))
stripchart(x ~ g, data=datA, pch=1, method="jitter", jitter=0.05, xlim=c(30,50))
abline(h=1:3, lty="dotted", col="lightgrey")
stripchart(x ~ g, data=datB, pch=1, method="jitter", jitter=0.05, xlim=c(30,50))
abline(h=1:3, lty="dotted", col="lightgrey")

The ANOVA with dataset A.

anova(aov(x ~ g, data=datA))
## Analysis of Variance Table
## 
## Response: x
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## g          2 36.133  18.067  5.4747 0.02044 *
## Residuals 12 39.600   3.300                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The ANOVA with dataset B.

anova(aov(x ~ g, data=datB))
## Analysis of Variance Table
## 
## Response: x
##           Df Sum Sq Mean Sq F value Pr(>F)
## g          2   94.8  47.400  2.6382 0.1123
## Residuals 12  215.6  17.967

The Kruskal-Wallis test.

kruskal.test(x ~ g, data=datA)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  x by g
## Kruskal-Wallis chi-squared = 7.6359, df = 2, p-value = 0.02197
kruskal.test(x ~ g, data=datB)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  x by g
## Kruskal-Wallis chi-squared = 7.6359, df = 2, p-value = 0.02197