```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE, fig.align="center")
```

#### Statistics for Laboratory Scientists ( 140.615 )

## Analysis of Variance - Model Assumptions and Diagnostics

#### Quantile-quantile plots 

For the example, we take 100 random draws from a standard Normal distribution.

```{r}
set.seed(1)
n <- 100
y <- rnorm(n)
plot(sort(y))
```

The expected normal quantiles. Note that other possibilities have been proposed as well, for example qnorm((1:n)/(n+1)).

```{r}
p <- ((1:n)-0.5)/n
p
x <- qnorm(p) 
x
plot(x, p)
abline(h=c(0,1))
axis(4, p, rep("",n))
```

The qq-plot 'from scratch', and the built-in function.

```{r}
par(mfrow=c(1,2))
plot(x, sort(y), main="from scratch")
qqnorm(y)
```

#### Example from class: IL10 cytokines

Read the data, and include log10 of IL10 as a column.

```{r}
library(SPH.140.615)
head(il10)
il10 <- cbind(il10, logIL10=log10(il10$IL10))
head(il10)
```

Plot the data.

```{r,fig.height=7}
par(las=1, mfrow=c(1,2))
stripchart(il10$IL10 ~ il10$Strain, 
           method="jitter", pch=1, ylab="Strain", xlab="IL10 response", jitter=0.2)
abline(h=1:21, lty=2, col="gray")
stripchart(il10$logIL10 ~ il10$Strain, 
           method="jitter", pch=1, ylab="Strain", xlab="log10 IL10 response", jitter=0.2)
abline(h=1:21, lty=2, col="gray")
```

The analysis of variance tables.

```{r}
aov.il10 <- aov(IL10 ~ Strain, data=il10)
anova(aov.il10)
aov.logil10 <- aov(logIL10 ~ Strain, data=il10)
anova(aov.logil10)
```

Plot the residuals. Note that you can get the residuals and fitted values from the output of the 'aov' function.

```{r,fig.height=7}
attributes(aov.il10)
par(las=1, mfrow=c(1,2)) 
stripchart(aov.il10$residuals ~ il10$Strain, 
           method="jitter", pch=1, ylab="Strain", xlab="residuals (IL10)", jitter=0.2)
abline(h=1:21, lty=2, col="gray")
abline(v=0, lty=2, col="red")
stripchart(aov.logil10$residuals ~ il10$Strain, 
           method="jitter", pch=1, ylab="Strain", xlab="log10 residuals (IL10)", jitter=0.2)
abline(h=1:21, lty=2, col="gray")
abline(v=0, lty=2, col="red")
```

The qq-plots of all residuals, with histograms Note: 'mfcol' versus 'mfrow' leads you to fill up the different plots working down columns rather than across rows, and 'qqline' adds a line to the 'qqnorm' plot.

```{r,fig.width=8,fig.height=8}
par(las=1, mfcol=c(2,2))  
hist(aov.il10$residuals, breaks=30, yaxt="n", ylab="", xlab="Residuals", main="IL10")
qqnorm(aov.il10$residuals, main="IL10")
qqline(aov.il10$residuals, col="blue", lty=2, lwd=1)
hist(aov.logil10$residuals, breaks=30, yaxt="n", ylab="", xlab="Residuals", main="log10 IL10")
qqnorm(aov.logil10$residuals, main="log10 IL10")
qqline(aov.logil10$residuals, col="blue", lty=2, lwd=1)
```

Plot the residuals versus the fitted values.

```{r}
par(las=1, mfrow=c(1,2))
plot(aov.il10$fitted, aov.il10$residuals, 
     pch=1, xlab="fitted values (IL10)",ylab="residuals (IL10)")
abline(h=0, lty=2, col="red")
plot(aov.logil10$fitted, aov.logil10$residuals, 
     pch=1, xlab="fitted values (log10 IL10)",ylab="residuals (log10 IL10)")
abline(h=0, lty=2, col="red")
```

Plot the standard deviations versus the means.

```{r}
m <- tapply(il10$IL10, il10$Strain, mean)
s <- tapply(il10$IL10, il10$Strain, sd)  
logm <- tapply(il10$logIL10, il10$Strain, mean)
logs <- tapply(il10$logIL10, il10$Strain, sd)

par(las=1, mfrow=c(1,2))
plot(m, s, pch=1, xlab="Mean (IL10)", ylab="SD (IL10)")
plot(logm, logs, pch=1, xlab="Mean (log10 IL10)", ylab="SD (log10 IL10)")
```

#### Bartlett's test for equality of variances

The simple way, using the built-in function.

```{r}
bartlett.test(il10$IL10 ~ il10$Strain) 
bartlett.test(il10$logIL10 ~ il10$Strain) 
```