We are starting to use functions and data from the SPH.140.615 GitHub
package. Uncomment and execute the line calling the function
install.packages
if you have not yet installed
devtools.
# install.packages("devtools")
library(devtools,quiet=TRUE)
devtools::install_github("bllfrg/SPH.140.615",quiet=TRUE)
library(SPH.140.615)
The data.
xa <- c(672, 747, 749, 792, 875, 888, 930, 962, 994,1295)
xb <- c(290, 359, 384, 466, 510, 516, 522, 532, 595, 706)
Plot the data, using the custom function dot.plot
from
the SPH.140.615 package.
dot.plot(xa,xb, includeCI=FALSE)
Plot the data, using the stripchart
function. Look at
the help file to explore what all the arguments do.
par(las=1)
stripchart(list(xa,xb), method="jitter", pch=1, vertical=TRUE, xlim=c(0.5,2.5), group.names=c("A","B"))
abline(v=1:2,lty=2)
The 95% confidence interval for the ratio of the population variances, the pedestrian way.
ratio <- var(xa)/var(xb)
ratio
## [1] 2.14067
na <- length(xa)
na
## [1] 10
nb <- length(xb)
nb
## [1] 10
L <- qf(0.025,na-1,nb-1)
L
## [1] 0.2483859
U <- qf(0.975,na-1,nb-1)
U
## [1] 4.025994
c(ratio/U,ratio/L)
## [1] 0.5317123 8.6183268
The 95% confidence interval for the ratio of the population standard deviations.
sqrt(c(ratio/U,ratio/L))
## [1] 0.729186 2.935699
The built-in function.
var.test(xa,xb)
##
## F test to compare two variances
##
## data: xa and xb
## F = 2.1407, num df = 9, denom df = 9, p-value = 0.2723
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.5317123 8.6183268
## sample estimates:
## ratio of variances
## 2.14067
attributes(var.test(xa,xb))
## $names
## [1] "statistic" "parameter" "p.value" "conf.int" "estimate" "null.value" "alternative"
## [8] "method" "data.name"
##
## $class
## [1] "htest"
var.test(xa,xb)$estimate
## ratio of variances
## 2.14067
var.test(xa,xb)$p.value
## [1] 0.2722659
var.test(xa,xb)$conf.int
## [1] 0.5317123 8.6183268
## attr(,"conf.level")
## [1] 0.95
sqrt(var.test(xa,xb)$conf.int)
## [1] 0.729186 2.935699
## attr(,"conf.level")
## [1] 0.95
coag <- c(62, 60, 63, 59,
63, 67, 71, 64, 65, 66,
68, 66, 71, 67, 68, 68,
56, 62, 60, 61, 63, 64, 63, 59)
ttt <- c("A","A","A","A",
"B","B","B","B","B","B",
"C","C","C","C","C","C",
"D","D","D","D","D","D","D","D")
ttt
## [1] "A" "A" "A" "A" "B" "B" "B" "B" "B" "B" "C" "C" "C" "C" "C" "C" "D" "D" "D" "D" "D" "D" "D" "D"
LETTERS[1:4]
## [1] "A" "B" "C" "D"
ttt <- rep(LETTERS[1:4],c(4,6,6,8))
ttt
## [1] "A" "A" "A" "A" "B" "B" "B" "B" "B" "B" "C" "C" "C" "C" "C" "C" "D" "D" "D" "D" "D" "D" "D" "D"
Make the treatment “ttt” a factor.
str(ttt)
## chr [1:24] "A" "A" "A" "A" "B" "B" "B" "B" "B" "B" "C" "C" "C" "C" "C" "C" "D" "D" "D" "D" "D" "D" "D" ...
ttt <- factor(ttt,levels=c("A","B","C","D"))
str(ttt)
## Factor w/ 4 levels "A","B","C","D": 1 1 1 1 2 2 2 2 2 2 ...
Plot the data. In the stripchart
function you want to
jitter the data points horizontally, vertically, or stack them, since
there are identical coagulation times recorded in some of the
groups.
set.seed(1)
par(las=1)
stripchart(coag~ttt, method="jitter", pch=1, vertical=TRUE, xlab="diet", ylab="coagulation time [ sec ]")
stripchart(jitter(coag,factor=2)~ttt, pch=1, vertical=TRUE, xlab="diet", ylab="coagulation time [ sec ]")
stripchart(coag~ttt, method="stack", pch=1, vertical=TRUE, xlab="diet", ylab="coagulation time [ sec ]")
Note: the function tapply
splits the first vector into
groups according to the values in the second vector, and then “applies”
the function, here ‘mean’, to each group.
The within-group means.
tapply(coag,ttt,mean)
## A B C D
## 61 66 68 61
The number of data points per group.
tapply(coag,ttt,length)
## A B C D
## 4 6 6 8
One-way ANOVA, the pedestrain way.
k <- 4
m <- tapply(coag,ttt,mean)
m
## A B C D
## 61 66 68 61
gm <- mean(coag)
gm
## [1] 64
n <- tapply(coag,ttt,length)
n
## A B C D
## 4 6 6 8
N <- sum(n)
N
## [1] 24
m-gm
## A B C D
## -3 2 4 -3
SB <- sum(n*(m-gm)^2)
SB
## [1] 228
coag
## [1] 62 60 63 59 63 67 71 64 65 66 68 66 71 67 68 68 56 62 60 61 63 64 63 59
rep(m,n)
## A A A A B B B B B B C C C C C C D D D D D D D D
## 61 61 61 61 66 66 66 66 66 66 68 68 68 68 68 68 61 61 61 61 61 61 61 61
coag-rep(m,n)
## A A A A B B B B B B C C C C C C D D D D D D D D
## 1 -1 2 -2 -3 1 5 -2 -1 0 0 -2 3 -1 0 0 -5 1 -1 0 2 3 2 -2
SW <- sum((coag-rep(m,n))^2)
SW
## [1] 112
MB <- SB/(k-1)
MB
## [1] 76
MW <- SW/(N-k)
MW
## [1] 5.6
Fstat <- MB/MW
Fstat
## [1] 13.57143
p <- pf(Fstat,k-1,N-k,lower=FALSE)
p
## [1] 4.658471e-05
The built-in function.
aov(coag~ttt)
## Call:
## aov(formula = coag ~ ttt)
##
## Terms:
## ttt Residuals
## Sum of Squares 228 112
## Deg. of Freedom 3 20
##
## Residual standard error: 2.366432
## Estimated effects may be unbalanced
summary(aov(coag~ttt))
## Df Sum Sq Mean Sq F value Pr(>F)
## ttt 3 228 76.0 13.57 4.66e-05 ***
## Residuals 20 112 5.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(aov(coag~ttt))
## Analysis of Variance Table
##
## Response: coag
## Df Sum Sq Mean Sq F value Pr(>F)
## ttt 3 228 76.0 13.571 4.658e-05 ***
## Residuals 20 112 5.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The meiosis
data used in this lecture are available in
the SPH.140.615 R package you installed and loaded above.
Reference: Broman et al. (1998) Am J Hum Genet 63:861-869.
The data have three columns: family, female, male. These last two give the total number of crossovers on the 22 autosomes in each of the male and female meioses.
head(meioses)
## family female male
## 1 102 28 24
## 2 102 50 22
## 3 102 32 26
## 4 102 35 14
## 5 102 39 20
## 6 102 44 21
str(meioses)
## 'data.frame': 92 obs. of 3 variables:
## $ family: int 102 102 102 102 102 102 102 102 102 102 ...
## $ female: int 28 50 32 35 39 44 33 40 28 37 ...
## $ male : int 24 22 26 14 20 21 21 22 30 20 ...
We need to make the family column a “factor” to get the ANOVA to work properly.
meioses$family <- as.factor(meioses$family)
str(meioses)
## 'data.frame': 92 obs. of 3 variables:
## $ family: Factor w/ 8 levels "102","884","1331",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ female: int 28 50 32 35 39 44 33 40 28 37 ...
## $ male : int 24 22 26 14 20 21 21 22 30 20 ...
Plot the data for the female meioses, get the within-group means, and add the means to the plot.
par(las=1)
stripchart(meioses$female~meioses$family,method="jitter",pch=1)
female.me <- tapply(meioses$female,meioses$family,mean)
segments(female.me,(1:8)-0.25,female.me,(1:8)+0.25,lwd=2,col="blue")
Same for the male counts.
par(las=1)
stripchart(meioses$male~meioses$family,method="jitter",pch=1)
male.me <- tapply(meioses$male,meioses$family,mean)
segments(male.me,(1:8)-0.25,male.me,(1:8)+0.25,lwd=2,col="blue")
The ANOVA tables.
out.fem <- aov(female~family,data=meioses)
summary(out.fem)
## Df Sum Sq Mean Sq F value Pr(>F)
## family 7 1485 212.15 4.601 0.000216 ***
## Residuals 84 3873 46.11
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
out.mal <- aov(male~family,data=meioses)
summary(out.mal)
## Df Sum Sq Mean Sq F value Pr(>F)
## family 7 114 16.28 1.23 0.296
## Residuals 84 1112 13.24
xA <- c(47,48,51,54)
xB <- c(39,44,38,39)
xC <- c(47,49,41,43)
x <- c(xA,xB,xC)
treat <- rep(1:3,rep(4,3))
x
## [1] 47 48 51 54 39 44 38 39 47 49 41 43
treat
## [1] 1 1 1 1 2 2 2 2 3 3 3 3
ex <- data.frame(rsp=x,trt=treat)
ex
## rsp trt
## 1 47 1
## 2 48 1
## 3 51 1
## 4 54 1
## 5 39 2
## 6 44 2
## 7 38 2
## 8 39 2
## 9 47 3
## 10 49 3
## 11 41 3
## 12 43 3
par(las=1)
stripchart(ex$rsp~ex$trt, method="jitter", pch=1, vertical=TRUE)
stripchart(jitter(ex$rsp,factor=2)~ex$trt, pch=1, vertical=TRUE)
The following is wrong.
aov.out <- aov(rsp~trt,data=ex)
summary(aov.out)
## Df Sum Sq Mean Sq F value Pr(>F)
## trt 1 50 50.0 2.066 0.181
## Residuals 10 242 24.2
The correct way.
treat <- factor(rep(1:3,rep(4,3)))
ex.c <- data.frame(rsp=x,trt=treat)
aov.out.c <- aov(rsp~trt,data=ex.c)
summary(aov.out.c)
## Df Sum Sq Mean Sq F value Pr(>F)
## trt 2 200 100.00 9.783 0.00553 **
## Residuals 9 92 10.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Compare.
summary(aov.out)
## Df Sum Sq Mean Sq F value Pr(>F)
## trt 1 50 50.0 2.066 0.181
## Residuals 10 242 24.2
summary(aov.out.c)
## Df Sum Sq Mean Sq F value Pr(>F)
## trt 2 200 100.00 9.783 0.00553 **
## Residuals 9 92 10.22
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
str(ex)
## 'data.frame': 12 obs. of 2 variables:
## $ rsp: num 47 48 51 54 39 44 38 39 47 49 ...
## $ trt: int 1 1 1 1 2 2 2 2 3 3 ...
str(ex.c)
## 'data.frame': 12 obs. of 2 variables:
## $ rsp: num 47 48 51 54 39 44 38 39 47 49 ...
## $ trt: Factor w/ 3 levels "1","2","3": 1 1 1 1 2 2 2 2 3 3 ...
summary(ex)
## rsp trt
## Min. :38.00 Min. :1
## 1st Qu.:40.50 1st Qu.:1
## Median :45.50 Median :2
## Mean :45.00 Mean :2
## 3rd Qu.:48.25 3rd Qu.:3
## Max. :54.00 Max. :3
summary(ex.c)
## rsp trt
## Min. :38.00 1:4
## 1st Qu.:40.50 2:4
## Median :45.50 3:4
## Mean :45.00
## 3rd Qu.:48.25
## Max. :54.00