For the example, we take 100 random draws from a standard Normal distribution.
set.seed(1)
n <- 100
y <- rnorm(n)
plot(sort(y))
The expected normal quantiles. Note that other possibilities have been proposed as well, for example qnorm((1:n)/(n+1)).
p <- ((1:n)-0.5)/n
p
## [1] 0.005 0.015 0.025 0.035 0.045 0.055 0.065 0.075 0.085 0.095 0.105 0.115 0.125 0.135 0.145 0.155 0.165
## [18] 0.175 0.185 0.195 0.205 0.215 0.225 0.235 0.245 0.255 0.265 0.275 0.285 0.295 0.305 0.315 0.325 0.335
## [35] 0.345 0.355 0.365 0.375 0.385 0.395 0.405 0.415 0.425 0.435 0.445 0.455 0.465 0.475 0.485 0.495 0.505
## [52] 0.515 0.525 0.535 0.545 0.555 0.565 0.575 0.585 0.595 0.605 0.615 0.625 0.635 0.645 0.655 0.665 0.675
## [69] 0.685 0.695 0.705 0.715 0.725 0.735 0.745 0.755 0.765 0.775 0.785 0.795 0.805 0.815 0.825 0.835 0.845
## [86] 0.855 0.865 0.875 0.885 0.895 0.905 0.915 0.925 0.935 0.945 0.955 0.965 0.975 0.985 0.995
x <- qnorm(p)
x
## [1] -2.57582930 -2.17009038 -1.95996398 -1.81191067 -1.69539771 -1.59819314 -1.51410189 -1.43953147
## [9] -1.37220381 -1.31057911 -1.25356544 -1.20035886 -1.15034938 -1.10306256 -1.05812162 -1.01522203
## [17] -0.97411388 -0.93458929 -0.89647336 -0.85961736 -0.82389363 -0.78919165 -0.75541503 -0.72247905
## [25] -0.69030882 -0.65883769 -0.62800601 -0.59776013 -0.56805150 -0.53883603 -0.51007346 -0.48172685
## [33] -0.45376219 -0.42614801 -0.39885507 -0.37185609 -0.34512553 -0.31863936 -0.29237490 -0.26631061
## [41] -0.24042603 -0.21470157 -0.18911843 -0.16365849 -0.13830421 -0.11303854 -0.08784484 -0.06270678
## [49] -0.03760829 -0.01253347 0.01253347 0.03760829 0.06270678 0.08784484 0.11303854 0.13830421
## [57] 0.16365849 0.18911843 0.21470157 0.24042603 0.26631061 0.29237490 0.31863936 0.34512553
## [65] 0.37185609 0.39885507 0.42614801 0.45376219 0.48172685 0.51007346 0.53883603 0.56805150
## [73] 0.59776013 0.62800601 0.65883769 0.69030882 0.72247905 0.75541503 0.78919165 0.82389363
## [81] 0.85961736 0.89647336 0.93458929 0.97411388 1.01522203 1.05812162 1.10306256 1.15034938
## [89] 1.20035886 1.25356544 1.31057911 1.37220381 1.43953147 1.51410189 1.59819314 1.69539771
## [97] 1.81191067 1.95996398 2.17009038 2.57582930
plot(x, p)
abline(h=c(0,1))
axis(4, p, rep("",n))
The qq-plot ‘from scratch’, and the built-in function.
par(mfrow=c(1,2))
plot(x, sort(y), main="from scratch")
qqnorm(y)
Read the data, and include log10 of IL10 as a column.
library(SPH.140.615)
head(il10)
## Strain IL10
## 1 7 985.425
## 2 7 1057.254
## 3 7 989.480
## 4 7 816.093
## 5 7 1410.767
## 6 25 1159.141
il10 <- cbind(il10, logIL10=log10(il10$IL10))
head(il10)
## Strain IL10 logIL10
## 1 7 985.425 2.993624
## 2 7 1057.254 3.024179
## 3 7 989.480 2.995407
## 4 7 816.093 2.911740
## 5 7 1410.767 3.149455
## 6 25 1159.141 3.064136
Plot the data.
par(las=1, mfrow=c(1,2))
stripchart(il10$IL10 ~ il10$Strain,
method="jitter", pch=1, ylab="Strain", xlab="IL10 response", jitter=0.2)
abline(h=1:21, lty=2, col="gray")
stripchart(il10$logIL10 ~ il10$Strain,
method="jitter", pch=1, ylab="Strain", xlab="log10 IL10 response", jitter=0.2)
abline(h=1:21, lty=2, col="gray")
The analysis of variance tables.
aov.il10 <- aov(IL10 ~ Strain, data=il10)
anova(aov.il10)
## Analysis of Variance Table
##
## Response: IL10
## Df Sum Sq Mean Sq F value Pr(>F)
## Strain 20 33757424 1687871 1.7001 0.04154 *
## Residuals 125 124099169 992793
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aov.logil10 <- aov(logIL10 ~ Strain, data=il10)
anova(aov.logil10)
## Analysis of Variance Table
##
## Response: logIL10
## Df Sum Sq Mean Sq F value Pr(>F)
## Strain 20 3.3476 0.167380 2.2528 0.003575 **
## Residuals 125 9.2871 0.074297
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Plot the residuals. Note that you can get the residuals and fitted values from the output of the ‘aov’ function.
attributes(aov.il10)
## $names
## [1] "coefficients" "residuals" "effects" "rank" "fitted.values" "assign"
## [7] "qr" "df.residual" "contrasts" "xlevels" "call" "terms"
## [13] "model"
##
## $class
## [1] "aov" "lm"
par(las=1, mfrow=c(1,2))
stripchart(aov.il10$residuals ~ il10$Strain,
method="jitter", pch=1, ylab="Strain", xlab="residuals (IL10)", jitter=0.2)
abline(h=1:21, lty=2, col="gray")
abline(v=0, lty=2, col="red")
stripchart(aov.logil10$residuals ~ il10$Strain,
method="jitter", pch=1, ylab="Strain", xlab="log10 residuals (IL10)", jitter=0.2)
abline(h=1:21, lty=2, col="gray")
abline(v=0, lty=2, col="red")
The qq-plots of all residuals, with histograms Note: ‘mfcol’ versus ‘mfrow’ leads you to fill up the different plots working down columns rather than across rows, and ‘qqline’ adds a line to the ‘qqnorm’ plot.
par(las=1, mfcol=c(2,2))
hist(aov.il10$residuals, breaks=30, yaxt="n", ylab="", xlab="Residuals", main="IL10")
qqnorm(aov.il10$residuals, main="IL10")
qqline(aov.il10$residuals, col="blue", lty=2, lwd=1)
hist(aov.logil10$residuals, breaks=30, yaxt="n", ylab="", xlab="Residuals", main="log10 IL10")
qqnorm(aov.logil10$residuals, main="log10 IL10")
qqline(aov.logil10$residuals, col="blue", lty=2, lwd=1)
Plot the residuals versus the fitted values.
par(las=1, mfrow=c(1,2))
plot(aov.il10$fitted, aov.il10$residuals,
pch=1, xlab="fitted values (IL10)",ylab="residuals (IL10)")
abline(h=0, lty=2, col="red")
plot(aov.logil10$fitted, aov.logil10$residuals,
pch=1, xlab="fitted values (log10 IL10)",ylab="residuals (log10 IL10)")
abline(h=0, lty=2, col="red")
Plot the standard deviations versus the means.
m <- tapply(il10$IL10, il10$Strain, mean)
s <- tapply(il10$IL10, il10$Strain, sd)
logm <- tapply(il10$logIL10, il10$Strain, mean)
logs <- tapply(il10$logIL10, il10$Strain, sd)
par(las=1, mfrow=c(1,2))
plot(m, s, pch=1, xlab="Mean (IL10)", ylab="SD (IL10)")
plot(logm, logs, pch=1, xlab="Mean (log10 IL10)", ylab="SD (log10 IL10)")
The simple way, using the built-in function.
bartlett.test(il10$IL10 ~ il10$Strain)
##
## Bartlett test of homogeneity of variances
##
## data: il10$IL10 by il10$Strain
## Bartlett's K-squared = 74.842, df = 20, p-value = 2.895e-08
bartlett.test(il10$logIL10 ~ il10$Strain)
##
## Bartlett test of homogeneity of variances
##
## data: il10$logIL10 by il10$Strain
## Bartlett's K-squared = 31.842, df = 20, p-value = 0.04501