```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE, fig.align="center") ``` #### Statistics for Laboratory Scientists ( 140.615 ) ## Analysis of Variance - Non-Parametric Methods - Advanced Functions for the permutation tests using the ANOVA and Kruskal-Wallis test statistics. ```{r} perm.aov <- function(x,g,n.perm=1000){ obs <- anova(aov(x~g))[1,4] perm <- 1:n.perm for(i in 1:n.perm) { perm[i] <- anova(aov(x~sample(g)))[1,4] } return(list(obs=obs,perm=perm)) } perm.kw <- function(x,g,n.perm=1000){ obs <- kruskal.test(x~g)$stat perm <- 1:n.perm for(i in 1:n.perm) { perm[i] <- kruskal.test(x~sample(g))$stat } return(list(obs=obs,perm=perm)) } ``` #### Example from class: IL10 cytokines ```{r} library(SPH.140.615) il10 <- cbind(il10, logIL10=log10(il10$IL10)) ``` The permutation test with the ANOVA F-statistic, on the original scale. ```{r} set.seed(1) pm <- perm.aov(il10$IL10, il10$Strain) str(pm) ``` The estimated p-value. ```{r} mean(pm$perm>pm$obs) ``` Plot the histogram of the permuted test statistics, the observed test statistic (blue), and the parametric F distribution. ```{r} hist(pm$perm, breaks=seq(0,4,by=0.1), main="", xlab="F statistic", prob=TRUE) abline(v=pm$obs, col="blue", lwd=2) curve(df(x,20,125), col="red", add=TRUE, lwd=2) ``` Here is the permutation test with the ANOVA F-statistic, on the log10 scale. ```{r} pm <- perm.aov(il10$logIL10, il10$Strain) mean(pm$perm>pm$obs) hist(pm$perm, breaks=seq(0,4,by=0.1), main="", xlab="F statistic", prob=TRUE) abline(v=pm$obs, col="blue", lwd=2) curve(df(x,20,125), col="red", add=TRUE, lwd=2) ``` Here is the permutation test with Kruskal-Wallis test statistic. Note, it does not matter whether or not you use the log-transformed data. ```{r} pm <- perm.kw(il10$logIL10, il10$Strain) mean(pm$perm>pm$obs) hist(pm$perm, breaks=seq(0,50,by=1), main="", xlab="KW statistic", prob=TRUE) abline(v=pm$obs, col="blue", lwd=2) curve(dchisq(x,20), col="red", add=TRUE, lwd=2) ``` #### Example from class: the fake data with the outlier ```{r} datA <- data.frame(x=c(33,34,35,36,39, 30,31,32,32,33, 31,33,33,34,36), g=factor(rep(c("A","B","C"), rep(5,3)))) datB <- datA datB$x[5] <- 50 ``` Plot the data. ```{r, fig.width=8} set.seed(1) par(las=1, mfrow=c(1,2)) stripchart(x ~ g, data=datA, pch=1, method="jitter", jitter=0.05, xlim=c(30,50)) abline(h=1:3, lty="dotted", col="lightgrey") stripchart(x ~ g, data=datB, pch=1, method="jitter", jitter=0.05, xlim=c(30,50)) abline(h=1:3, lty="dotted", col="lightgrey") ``` The permutation test with the ANOVA F-statistic for dataset A. ```{r} set.seed(1) pm <- perm.aov(datA$x, datA$g) mean(pm$perm>pm$obs) hist(pm$perm, breaks=seq(0,10,by=0.25), main="", xlab="F statistic", prob=TRUE) abline(v=pm$obs, col="blue", lwd=2) curve(df(x,2,12), col="red", add=TRUE, lwd=2) ``` The permutation test with the ANOVA F-statistic for dataset B. ```{r} set.seed(1) pm <- perm.aov(datB$x, datB$g) mean(pm$perm>pm$obs) hist(pm$perm, breaks=seq(0,10,by=0.25), main="", xlab="F statistic", prob=TRUE) abline(v=pm$obs, col="blue", lwd=2) curve(df(x,2,12), col="red", add=TRUE, lwd=2) ``` The permutation test with the Kruskal-Wallis test statistic. Note, with the same random seed you get the same results for datasets A and B. Here is dataset A. ```{r} set.seed(1) pm <- perm.kw(datA$x, datA$g) mean(pm$perm>pm$obs) hist(pm$perm, breaks=seq(0,10,by=0.25), main="", xlab="KW statistic", prob=TRUE) abline(v=pm$obs, col="blue", lwd=2) curve(dchisq(x,2), col="red", add=TRUE, lwd=2) ``` And here is dataset B. ```{r} set.seed(1) pm <- perm.kw(datB$x, datB$g) mean(pm$perm>pm$obs) hist(pm$perm, breaks=seq(0,10,by=0.25), main="", xlab="KW statistic", prob=TRUE) abline(v=pm$obs, col="blue", lwd=2) curve(dchisq(x,2), col="red", add=TRUE, lwd=2) ```