Statistics for Laboratory Scientists ( 140.615 )

Analysis of Variance - Advanced

Example from class: diets and blood coagulation times

coag <- c(62, 60, 63, 59,
          63, 67, 71, 64, 65, 66,
          68, 66, 71, 67, 68, 68,
          56, 62, 60, 61, 63, 64, 63, 59)
ttt <- factor(rep(LETTERS[1:4],c(4,6,6,8)))

The output from the built-in function. Note that ‘summary’ creates a list, while ‘anova’ does not.

aov(coag~ttt)
## Call:
##    aov(formula = coag ~ ttt)
## 
## Terms:
##                 ttt Residuals
## Sum of Squares  228       112
## Deg. of Freedom   3        20
## 
## Residual standard error: 2.366432
## Estimated effects may be unbalanced
summary(aov(coag~ttt)) 
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## ttt          3    228    76.0   13.57 4.66e-05 ***
## Residuals   20    112     5.6                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
str(summary(aov(coag~ttt))) 
## List of 1
##  $ :Classes 'anova' and 'data.frame':    2 obs. of  5 variables:
##   ..$ Df     : num [1:2] 3 20
##   ..$ Sum Sq : num [1:2] 228 112
##   ..$ Mean Sq: num [1:2] 76 5.6
##   ..$ F value: num [1:2] 13.6 NA
##   ..$ Pr(>F) : num [1:2] 4.66e-05 NA
##  - attr(*, "class")= chr [1:2] "summary.aov" "listof"
summary(aov(coag~ttt))[[1]]
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## ttt          3    228    76.0  13.571 4.658e-05 ***
## Residuals   20    112     5.6                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(coag~ttt))[[1]][1]
##             Df
## ttt          3
## Residuals   20
summary(aov(coag~ttt))[[1]]$Df
## [1]  3 20
summary(aov(coag~ttt))[[1]][2]
##             Sum Sq
## ttt            228
## Residuals      112
summary(aov(coag~ttt))[[1]][3]
##             Mean Sq
## ttt            76.0
## Residuals       5.6
summary(aov(coag~ttt))[[1]][4]
##             F value
## ttt          13.571
## Residuals
summary(aov(coag~ttt))[[1]][5]
##                Pr(>F)    
## ttt         4.659e-05 ***
## Residuals                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(coag~ttt))[[1]][1:5]
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## ttt          3    228    76.0  13.571 4.658e-05 ***
## Residuals   20    112     5.6                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(aov(coag~ttt)) 
## Analysis of Variance Table
## 
## Response: coag
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## ttt        3    228    76.0  13.571 4.658e-05 ***
## Residuals 20    112     5.6                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
str(anova(aov(coag~ttt)))
## Classes 'anova' and 'data.frame':    2 obs. of  5 variables:
##  $ Df     : int  3 20
##  $ Sum Sq : num  228 112
##  $ Mean Sq: num  76 5.6
##  $ F value: num  13.6 NA
##  $ Pr(>F) : num  4.66e-05 NA
##  - attr(*, "heading")= chr [1:2] "Analysis of Variance Table\n" "Response: coag"
anova(aov(coag~ttt))[1] 
##           Df
## ttt        3
## Residuals 20
anova(aov(coag~ttt))$Df
## [1]  3 20
anova(aov(coag~ttt))[2] 
##           Sum Sq
## ttt          228
## Residuals    112
anova(aov(coag~ttt))[1,] 
## Analysis of Variance Table
## 
## Response: coag
##     Df Sum Sq Mean Sq F value    Pr(>F)    
## ttt  3    228      76  13.571 4.658e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(aov(coag~ttt))[2,] 
## Analysis of Variance Table
## 
## Response: coag
##           Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 20    112     5.6
anova(aov(coag~ttt))[1,4] 
## [1] 13.57143
anova(aov(coag~ttt))[1,5] 
## [1] 4.658471e-05

Example from class: meioses in females and males

The meiosis data used in this lecture are available in the SPH.140.615 R package.

library(SPH.140.615)
meioses$family <- as.factor(meioses$family)
out.fem <- aov(female~family,data=meioses)
summary(out.fem)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## family       7   1485  212.15   4.601 0.000216 ***
## Residuals   84   3873   46.11                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
out.mal <- aov(male~family,data=meioses)
summary(out.mal)
##             Df Sum Sq Mean Sq F value Pr(>F)
## family       7    114   16.28    1.23  0.296
## Residuals   84   1112   13.24

Permutation tests.

set.seed(1)
n.sim <- 1000
perm.fem <- perm.mal <- 1:n.sim
simdat <- meioses
for(i in 1:n.sim){
  simdat$family <- sample(simdat$family)
  perm.fem[i] <- anova(aov(female~family,data=simdat))[1,4]
  perm.mal[i] <- anova(aov(male ~ family,data=simdat))[1,4]
}

The observed F statistics,

obs.fem <- anova(out.fem)[1,4]
obs.fem
## [1] 4.601104
obs.mal <- anova(out.mal)[1,4]
obs.mal 
## [1] 1.23019

The estimated p-values.

mean(perm.fem>=obs.fem)
## [1] 0
mean(perm.mal>=obs.mal)
## [1] 0.281

Plots of the permutation F statistics, and the parametric F distribution.

hist(perm.fem,breaks=seq(0,5,by=0.25),prob=TRUE)
curve(df(x,7,84),add=TRUE,col="red",lwd=2)
abline(v=obs.fem,col="red",lwd=2,lty=2)

hist(perm.mal,breaks=seq(0,5,by=0.25),prob=TRUE)
curve(df(x,7,84),add=TRUE,col="blue",lwd=2)
abline(v=obs.mal,col="blue",lwd=2,lty=2)

Random effects.

k <- 8
n <- tapply(meioses$female,meioses$family,length)
n
##  102  884 1331 1332 1347 1362 1413 1416 
##   14   12   11   10   10   11   15    9
N <- sum(n)
N
## [1] 92
n0 <- (N-sum(n^2)/sum(n))/(k-1)
n0
## [1] 11.45342

Female meioses.

anova(out.fem)
## Analysis of Variance Table
## 
## Response: female
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## family     7 1485.1 212.152  4.6011 0.0002157 ***
## Residuals 84 3873.2  46.109                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(out.fem)[,3]
## [1] 212.15211  46.10896
MB <- anova(out.fem)[1,3]
MB
## [1] 212.1521
MW <- anova(out.fem)[2,3]
MW
## [1] 46.10896
sig <- sqrt(MW)
sig
## [1] 6.790358
sigA <- sqrt((MB-MW)/n0)
sigA
## [1] 3.807527

Male meioses.

MB <- anova(out.mal)[1,3]
MB
## [1] 16.28352
MW <- anova(out.mal)[2,3]
MW
## [1] 13.2366
sig <- sqrt(MW)
sig
## [1] 3.638213
sigA <- sqrt((MB-MW)/n0)
sigA
## [1] 0.515779