```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE, fig.align="center") ``` #### Statistics for Laboratory Scientists ( 140.615 ) ## Analysis of Variance - Model Assumptions and Diagnostics #### Quantile-quantile plots For the example, we take 100 random draws from a standard Normal distribution. ```{r} set.seed(1) n <- 100 y <- rnorm(n) plot(sort(y)) ``` The expected normal quantiles. Note that other possibilities have been proposed as well, for example qnorm((1:n)/(n+1)). ```{r} p <- ((1:n)-0.5)/n p x <- qnorm(p) x plot(x, p) abline(h=c(0,1)) axis(4, p, rep("",n)) ``` The qq-plot 'from scratch', and the built-in function. ```{r} par(mfrow=c(1,2)) plot(x, sort(y), main="from scratch") qqnorm(y) ``` #### Example from class: IL10 cytokines Read the data, and include log10 of IL10 as a column. ```{r} library(SPH.140.615) head(il10) il10 <- cbind(il10, logIL10=log10(il10$IL10)) head(il10) ``` Plot the data. ```{r,fig.height=7} par(las=1, mfrow=c(1,2)) stripchart(il10$IL10 ~ il10$Strain, method="jitter", pch=1, ylab="Strain", xlab="IL10 response", jitter=0.2) abline(h=1:21, lty=2, col="gray") stripchart(il10$logIL10 ~ il10$Strain, method="jitter", pch=1, ylab="Strain", xlab="log10 IL10 response", jitter=0.2) abline(h=1:21, lty=2, col="gray") ``` The analysis of variance tables. ```{r} aov.il10 <- aov(IL10 ~ Strain, data=il10) anova(aov.il10) aov.logil10 <- aov(logIL10 ~ Strain, data=il10) anova(aov.logil10) ``` Plot the residuals. Note that you can get the residuals and fitted values from the output of the 'aov' function. ```{r,fig.height=7} attributes(aov.il10) par(las=1, mfrow=c(1,2)) stripchart(aov.il10$residuals ~ il10$Strain, method="jitter", pch=1, ylab="Strain", xlab="residuals (IL10)", jitter=0.2) abline(h=1:21, lty=2, col="gray") abline(v=0, lty=2, col="red") stripchart(aov.logil10$residuals ~ il10$Strain, method="jitter", pch=1, ylab="Strain", xlab="log10 residuals (IL10)", jitter=0.2) abline(h=1:21, lty=2, col="gray") abline(v=0, lty=2, col="red") ``` The qq-plots of all residuals, with histograms Note: 'mfcol' versus 'mfrow' leads you to fill up the different plots working down columns rather than across rows, and 'qqline' adds a line to the 'qqnorm' plot. ```{r,fig.width=8,fig.height=8} par(las=1, mfcol=c(2,2)) hist(aov.il10$residuals, breaks=30, yaxt="n", ylab="", xlab="Residuals", main="IL10") qqnorm(aov.il10$residuals, main="IL10") qqline(aov.il10$residuals, col="blue", lty=2, lwd=1) hist(aov.logil10$residuals, breaks=30, yaxt="n", ylab="", xlab="Residuals", main="log10 IL10") qqnorm(aov.logil10$residuals, main="log10 IL10") qqline(aov.logil10$residuals, col="blue", lty=2, lwd=1) ``` Plot the residuals versus the fitted values. ```{r} par(las=1, mfrow=c(1,2)) plot(aov.il10$fitted, aov.il10$residuals, pch=1, xlab="fitted values (IL10)",ylab="residuals (IL10)") abline(h=0, lty=2, col="red") plot(aov.logil10$fitted, aov.logil10$residuals, pch=1, xlab="fitted values (log10 IL10)",ylab="residuals (log10 IL10)") abline(h=0, lty=2, col="red") ``` Plot the standard deviations versus the means. ```{r} m <- tapply(il10$IL10, il10$Strain, mean) s <- tapply(il10$IL10, il10$Strain, sd) logm <- tapply(il10$logIL10, il10$Strain, mean) logs <- tapply(il10$logIL10, il10$Strain, sd) par(las=1, mfrow=c(1,2)) plot(m, s, pch=1, xlab="Mean (IL10)", ylab="SD (IL10)") plot(logm, logs, pch=1, xlab="Mean (log10 IL10)", ylab="SD (log10 IL10)") ``` #### Bartlett's test for equality of variances The simple way, using the built-in function. ```{r} bartlett.test(il10$IL10 ~ il10$Strain) bartlett.test(il10$logIL10 ~ il10$Strain) ```