The data are in the SPH.140.615 package.
library(SPH.140.615)
example(spiders)
##
## spidrs> spiders
## dose n.dead n
## 1 0.0 7 25
## 2 0.5 6 25
## 3 1.0 13 25
## 4 1.5 20 25
## 5 2.0 19 25
## 6 2.5 23 25
## 7 3.0 24 25
##
## spidrs> spiders$n.dead/spiders$n
## [1] 0.28 0.24 0.52 0.80 0.76 0.92 0.96
Fit the logistic regression model.
glm.out <- glm(n.dead/n ~ dose, data=spiders, weights=n, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.329592 0.3274858 -4.059999 4.907289e-05
## dose 1.448457 0.2303103 6.289153 3.192028e-10
Plot the data with the fitted curve.
par(las=1)
plot(n.dead/n ~ dose, data=spiders, lwd=2, ylim=c(0,1), ylab="proportion dead")
u <- par("usr")
x <- seq(0,u[2],len=250)
y <- predict(glm.out, data.frame(dose=x), type="response")
lines(x, y, col="blue", lwd=2)
The model parameters from the logistic model.
params <- glm.out$coef
params
## (Intercept) dose
## -1.329592 1.448457
When the dose is equal to zero, the log odds of death among the spider mites is
params[1]
## (Intercept)
## -1.329592
When the dose is equal to zero, the probability of death among the spider mites is
exp(params[1])/(1+exp(params[1]))
## (Intercept)
## 0.2092268
Comparing a spider mite receiving a dose one unit larger than another spider mite, the log odds ratio (or equivalently, difference in log odds) of death is
params[2]
## dose
## 1.448457
In other words, the odds of death are
exp(params[2])
## dose
## 4.256541
times larger for the spider mite receiving the higher dose.
ld50 <- -params[1]/params[2]
ld50
## (Intercept)
## 0.917937
Calculate the 95% confidence interval using a function from the SPH.140.615 package to calculate the estimated standard error.
se.ld50 <- ld50se(glm.out)
se.ld50
## (Intercept)
## 0.1359084
ci.ld50 <- ld50 + c(-1,1) * qnorm(0.975) * se.ld50
ci.ld50
## [1] 0.6515615 1.1843125
Sometimes the data are recorded for each experimental unit, with individual outcome (e.g., 0/1, alive/dead, etc).
d <- spiders$dose
n0 <- 25-spiders$n.dead
n1 <- spiders$n.dead
d0 <- rep(d,n0)
d1 <- rep(d,n1)
y0 <- rep(0,sum(n0))
y1 <- rep(1,sum(n1))
dat <- data.frame(y=c(y0,y1),dose=c(d0,d1))
dim(dat)
## [1] 175 2
head(dat)
## y dose
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
tail(dat)
## y dose
## 170 1 3
## 171 1 3
## 172 1 3
## 173 1 3
## 174 1 3
## 175 1 3
table(dat$y,dat$dose)
##
## 0 0.5 1 1.5 2 2.5 3
## 0 18 19 12 5 6 2 1
## 1 7 6 13 20 19 23 24
The logistic regression for this data format.
glm.out <- glm(y ~ dose, data=dat, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.329592 0.3274702 -4.060193 4.903219e-05
## dose 1.448457 0.2302846 6.289855 3.177630e-10
The data are in the SPH.140.615 package, and use log2 dose.
worms
## dose n.dead n sex
## 1 1 1 20 male
## 2 2 4 20 male
## 3 4 9 20 male
## 4 8 13 20 male
## 5 16 18 20 male
## 6 32 20 20 male
## 7 1 0 20 female
## 8 2 2 20 female
## 9 4 6 20 female
## 10 8 10 20 female
## 11 16 12 20 female
## 12 32 16 20 female
worms$log2dose <- log2(worms$dose)
worms
## dose n.dead n sex log2dose
## 1 1 1 20 male 0
## 2 2 4 20 male 1
## 3 4 9 20 male 2
## 4 8 13 20 male 3
## 5 16 18 20 male 4
## 6 32 20 20 male 5
## 7 1 0 20 female 0
## 8 2 2 20 female 1
## 9 4 6 20 female 2
## 10 8 10 20 female 3
## 11 16 12 20 female 4
## 12 32 16 20 female 5
Fit the model, sexes completely different.
glm.out <- glm(n.dead/n ~ sex*log2dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9935418 0.5526997 -5.4162175 6.087304e-08
## sexmale 0.1749868 0.7783100 0.2248292 8.221122e-01
## log2dose 0.9060364 0.1671016 5.4220678 5.891353e-08
## sexmale:log2dose 0.3529130 0.2699902 1.3071324 1.911678e-01
Fit the model with different slopes but common intercept.
glm.out <- glm(n.dead/n ~ log2dose + sex:log2dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9072994 0.3892857 -7.468293 8.124171e-14
## log2dose 0.8823333 0.1275071 6.919875 4.520414e-12
## log2dose:sexmale 0.4070062 0.1247047 3.263759 1.099447e-03
Plot the data with the fitted curves.
par(las=1)
plot(n.dead/n ~ log2dose, data=worms, col=ifelse(sex=="male","blue","red"), lwd=2, ylim=c(0,1),
xlab="log2 dose", ylab="proprtion dead")
u <- par("usr")
x <- seq(0,u[2],len=250)
ym <- predict(glm.out, data.frame(log2dose=x,sex=factor(rep("male",length(x)))), type="response")
lines(x, ym, col="blue")
yf <- predict(glm.out, data.frame(log2dose=x,sex=factor(rep("female",length(x)))), type="response")
lines(x, yf, col="red")
legend(u[2], u[3], col=c("blue","red"), lty=1, xjust=1, yjust=0, c("Male","Female"))
The model parameters from the logistic model.
params <- glm.out$coef
params
## (Intercept) log2dose log2dose:sexmale
## -2.9072994 0.8823333 0.4070062
The intercept and slope for the female tobacco budworms.
params[1:2]
## (Intercept) log2dose
## -2.9072994 0.8823333
The slope for the male tobacco budworms.
params[2]+params[3]
## log2dose
## 1.28934
The intercept and slope for the male tobacco budworms.
c(params[1], params[2]+params[3])
## (Intercept) log2dose
## -2.907299 1.289340
When the dose is equal to one (i.e., the log2 dose is 0), the log odds of death among the tobacco budworms (both male and female) is
params[1]
## (Intercept)
## -2.907299
When the dose is equal to one (i.e., the log2 dose is 0), the probability of death among the tobacco budworms (both male and female) is
exp(params[1])/(1+exp(params[1]))
## (Intercept)
## 0.05179391
Comparing a female tobacco budworm receiving a dose twice as large as another female tobacco budworm (i.e., the log2 dose difference is 1), the log odds ratio of death is
params[2]
## log2dose
## 0.8823333
In other words, the odds of death are
exp(params[2])
## log2dose
## 2.416532
times larger for the female tobacco budworm receiving the higher dose.
Comparing a male tobacco budworm receiving a dose twice as large as another male tobacco budworm (i.e., the log2 dose difference is 1), the log odds ratio of death is
params[2]+params[3]
## log2dose
## 1.28934
In other words, the odds of death are
exp(params[2]+params[3])
## log2dose
## 3.630388
times larger for the male tobacco budworm receiving the higher dose.
The data.
ticks
## Tsex Leg Dsex T U
## 1 ma fo fe 24 5
## 2 fe fo fe 18 5
## 3 ma fo ma 23 4
## 4 fe fo ma 20 4
## 5 ma hi fe 17 8
## 6 fe hi fe 25 3
## 7 ma hi ma 21 6
## 8 fe hi ma 25 2
Calculating the total number of ticks under the respective conditions.
ticks$n <- ticks$T + ticks$U
ticks
## Tsex Leg Dsex T U n
## 1 ma fo fe 24 5 29
## 2 fe fo fe 18 5 23
## 3 ma fo ma 23 4 27
## 4 fe fo ma 20 4 24
## 5 ma hi fe 17 8 25
## 6 fe hi fe 25 3 28
## 7 ma hi ma 21 6 27
## 8 fe hi ma 25 2 27
This is a \(\text{2}^\text{3}\) factorial design (three factors with two levels each). These designs in particular allow you to investigate the interactions between the factors.
Two-way interactions:
Does the tick sex effect differ across the levels of the factor leg?
Does the tick sex effect differ across the levels of the factor deer sex?
Does the leg effect differ across the levels of the factor deer sex?
Three-way interaction:
Does the tick sex / leg interaction differ across the levels of the factor deer sex?
Equivalently:
Does the tick sex / deer sex interaction differ across the levels of the factor leg?
Does the leg / deer sex interaction differ across the levels of the factor tick sex?
Fit the full model, will all higher order interactions. If you fit higher order interactions (e.g., the three-way interaction) always include the lower order interactions (e.g., the two-way interactions).
glm.out1 <- glm(T/n ~ Tsex * Leg * Dsex, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out1)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.28093385 0.5055250 2.5338683 0.01128111
## Tsexma 0.28768207 0.7051399 0.4079787 0.68328928
## Leghi 0.83932969 0.7930252 1.0583898 0.28987779
## Dsexma 0.32850407 0.7453560 0.4407345 0.65940525
## Tsexma:Leghi -1.65417381 1.0268296 -1.6109525 0.10719007
## Tsexma:Dsexma -0.14792013 1.0443661 -0.1416363 0.88736730
## Leghi:Dsexma 0.07696104 1.2119773 0.0635004 0.94936804
## Tsexma:Leghi:Dsexma 0.24144619 1.5498849 0.1557833 0.87620383
The three-way interaction is not significant. Fit the model without the three-way interaction.
glm.out2 <- glm(T/n ~ Tsex * Leg * Dsex - Tsex:Leg:Dsex, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out2)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.30673406 0.4811558 2.71582291 0.006611127
## Tsexma 0.23785126 0.6284471 0.37847461 0.705078048
## Leghi 0.77665144 0.6796707 1.14268790 0.253168194
## Dsexma 0.27293367 0.6532456 0.41781172 0.676084782
## Tsexma:Leghi -1.54898015 0.7701768 -2.01120062 0.044304270
## Tsexma:Dsexma -0.03859667 0.7736434 -0.04988948 0.960210457
## Leghi:Dsexma 0.22481996 0.7570753 0.29695850 0.766498193
Alternative syntax.
glm.out2 <- glm(T/n ~ (Tsex + Leg + Dsex)^2, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out2)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.30673406 0.4811558 2.71582291 0.006611127
## Tsexma 0.23785126 0.6284471 0.37847461 0.705078048
## Leghi 0.77665144 0.6796707 1.14268790 0.253168194
## Dsexma 0.27293367 0.6532456 0.41781172 0.676084782
## Tsexma:Leghi -1.54898015 0.7701768 -2.01120062 0.044304270
## Tsexma:Dsexma -0.03859667 0.7736434 -0.04988948 0.960210457
## Leghi:Dsexma 0.22481996 0.7570753 0.29695850 0.766498193
The two-way interactions are not very significant. Fit an additive model, and test for a significant decrease in the likelihood.
glm.out3 <- glm(T/n ~ Tsex + Leg + Dsex, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out3)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.71152444 0.3854051 4.4408448 8.960639e-06
## Tsexma -0.53662365 0.3729989 -1.4386736 1.502430e-01
## Leghi -0.05517661 0.3656716 -0.1508912 8.800616e-01
## Dsexma 0.33388122 0.3669443 0.9098961 3.628773e-01
anova(glm.out3, glm.out2, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: T/n ~ Tsex + Leg + Dsex
## Model 2: T/n ~ (Tsex + Leg + Dsex)^2
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 4 4.2926
## 2 1 0.0242 3 4.2684 0.2339
Calculate the confidence intervals.
confint(glm.out3)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.9912551 2.5101459
## Tsexma -1.2878749 0.1841138
## Leghi -0.7783154 0.6636186
## Dsexma -0.3816322 1.0656641
Summary: the ticks seems to have a preference for the treated tubes (since 0 is not in the 95% CI for the intercept), but neither tick sex, deer sex, or type of leg seem to matter as far as rates are concerned. So we fit a model with an intercept only.
glm.out <- glm(T/n ~ 1, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.542374 0.181128 8.515378 1.660476e-17
Estimate the proportion of ticks running to the treated tube on average.
b0 <- summary(glm.out)$coef[1,1]
b0
## [1] 1.542374
exp(b0)/(1+exp(b0))
## [1] 0.8238095
Get the confidence interval for the proportion of ticks running to the treated tube on average.
ci <- confint(glm.out)
## Waiting for profiling to be done...
ci
## 2.5 % 97.5 %
## 1.199959 1.912093
exp(ci)/(1+exp(ci))
## 2.5 % 97.5 %
## 0.7685174 0.8712541