The data are in the SPH.140.615 package.
library(SPH.140.615)
example(spiders)
##
## spidrs> spiders
## dose n.dead n
## 1 0.0 7 25
## 2 0.5 6 25
## 3 1.0 13 25
## 4 1.5 20 25
## 5 2.0 19 25
## 6 2.5 23 25
## 7 3.0 24 25
##
## spidrs> spiders$n.dead/spiders$n
## [1] 0.28 0.24 0.52 0.80 0.76 0.92 0.96
Fit the logistic regression model.
glm.out <- glm(n.dead/n ~ dose, data=spiders, weights=n, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.329592 0.3274858 -4.059999 4.907289e-05
## dose 1.448457 0.2303103 6.289153 3.192028e-10
Plot the data with the fitted curve.
par(las=1)
plot(n.dead/n ~ dose, data=spiders, lwd=2, ylim=c(0,1), ylab="proportion dead")
u <- par("usr")
x <- seq(0,u[2],len=250)
y <- predict(glm.out, data.frame(dose=x), type="response")
lines(x, y, col="blue", lwd=2)
The model parameters from the logistic model.
params <- glm.out$coef
params
## (Intercept) dose
## -1.329592 1.448457
When the dose is equal to zero, the log odds of death among the spider mites is
params[1]
## (Intercept)
## -1.329592
When the dose is equal to zero, the probability of death among the spider mites is
exp(params[1])/(1+exp(params[1]))
## (Intercept)
## 0.2092268
Comparing a spider mite receiving a dose one unit larger than another spider mite, the log odds ratio (or equivalently, difference in log odds) of death is
params[2]
## dose
## 1.448457
In other words, the odds of death are
exp(params[2])
## dose
## 4.256541
times larger for the spider mite receiving the higher dose.
ld50 <- -params[1]/params[2]
ld50
## (Intercept)
## 0.917937
Calculate the 95% confidence interval using a function from the SPH.140.615 package to calculate the estimated standard error.
se.ld50 <- ld50se(glm.out)
se.ld50
## (Intercept)
## 0.1359084
ci.ld50 <- ld50 + c(-1,1) * qnorm(0.975) * se.ld50
ci.ld50
## [1] 0.6515615 1.1843125
Sometimes the data are recorded for each experimental unit, with individual outcome (e.g., 0/1, alive/dead, etc).
d <- spiders$dose
n0 <- 25-spiders$n.dead
n1 <- spiders$n.dead
d0 <- rep(d,n0)
d1 <- rep(d,n1)
y0 <- rep(0,sum(n0))
y1 <- rep(1,sum(n1))
dat <- data.frame(y=c(y0,y1),dose=c(d0,d1))
dim(dat)
## [1] 175 2
head(dat)
## y dose
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
tail(dat)
## y dose
## 170 1 3
## 171 1 3
## 172 1 3
## 173 1 3
## 174 1 3
## 175 1 3
table(dat$y,dat$dose)
##
## 0 0.5 1 1.5 2 2.5 3
## 0 18 19 12 5 6 2 1
## 1 7 6 13 20 19 23 24
The logistic regression for this data format.
glm.out <- glm(y ~ dose, data=dat, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.329592 0.3274702 -4.060193 4.903219e-05
## dose 1.448457 0.2302846 6.289855 3.177630e-10
The data are in the SPH.140.615 package, and use log2 dose.
worms
## dose n.dead n sex
## 1 1 1 20 male
## 2 2 4 20 male
## 3 4 9 20 male
## 4 8 13 20 male
## 5 16 18 20 male
## 6 32 20 20 male
## 7 1 0 20 female
## 8 2 2 20 female
## 9 4 6 20 female
## 10 8 10 20 female
## 11 16 12 20 female
## 12 32 16 20 female
worms$log2dose <- log2(worms$dose)
worms
## dose n.dead n sex log2dose
## 1 1 1 20 male 0
## 2 2 4 20 male 1
## 3 4 9 20 male 2
## 4 8 13 20 male 3
## 5 16 18 20 male 4
## 6 32 20 20 male 5
## 7 1 0 20 female 0
## 8 2 2 20 female 1
## 9 4 6 20 female 2
## 10 8 10 20 female 3
## 11 16 12 20 female 4
## 12 32 16 20 female 5
Fit the model, sexes completely different.
glm.out <- glm(n.dead/n ~ sex*log2dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9935418 0.5526997 -5.4162175 6.087304e-08
## sexmale 0.1749868 0.7783100 0.2248292 8.221122e-01
## log2dose 0.9060364 0.1671016 5.4220678 5.891353e-08
## sexmale:log2dose 0.3529130 0.2699902 1.3071324 1.911678e-01
Fit the model with different slopes but common intercept.
glm.out <- glm(n.dead/n ~ log2dose + sex:log2dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9072994 0.3892857 -7.468293 8.124171e-14
## log2dose 0.8823333 0.1275071 6.919875 4.520414e-12
## log2dose:sexmale 0.4070062 0.1247047 3.263759 1.099447e-03
Plot the data with the fitted curves.
par(las=1)
plot(n.dead/n ~ log2dose, data=worms, col=ifelse(sex=="male","blue","red"), lwd=2, ylim=c(0,1),
xlab="log2 dose", ylab="proprtion dead")
u <- par("usr")
x <- seq(0,u[2],len=250)
ym <- predict(glm.out, data.frame(log2dose=x,sex=factor(rep("male",length(x)))), type="response")
lines(x, ym, col="blue")
yf <- predict(glm.out, data.frame(log2dose=x,sex=factor(rep("female",length(x)))), type="response")
lines(x, yf, col="red")
legend(u[2], u[3], col=c("blue","red"), lty=1, xjust=1, yjust=0, c("Male","Female"))
The model parameters from the logistic model.
params <- glm.out$coef
params
## (Intercept) log2dose log2dose:sexmale
## -2.9072994 0.8823333 0.4070062
The intercept and slope for the female tobacco budworms.
params[1:2]
## (Intercept) log2dose
## -2.9072994 0.8823333
The slope for the male tobacco budworms.
params[2]+params[3]
## log2dose
## 1.28934
The intercept and slope for the male tobacco budworms.
c(params[1], params[2]+params[3])
## (Intercept) log2dose
## -2.907299 1.289340
When the dose is equal to one (i.e., the log2 dose is 0), the log odds of death among the tobacco budworms (both male and female) is
params[1]
## (Intercept)
## -2.907299
When the dose is equal to one (i.e., the log2 dose is 0), the probability of death among the tobacco budworms (both male and female) is
exp(params[1])/(1+exp(params[1]))
## (Intercept)
## 0.05179391
Comparing a female tobacco budworm receiving a dose twice as large as another female tobacco budworm (i.e., the log2 dose difference is 1), the log odds ratio of death is
params[2]
## log2dose
## 0.8823333
In other words, the odds of death are
exp(params[2])
## log2dose
## 2.416532
times larger for the female tobacco budworm receiving the higher dose.
Comparing a male tobacco budworm receiving a dose twice as large as another male tobacco budworm (i.e., the log2 dose difference is 1), the log odds ratio of death is
params[2]+params[3]
## log2dose
## 1.28934
In other words, the odds of death are
exp(params[2]+params[3])
## log2dose
## 3.630388
times larger for the male tobacco budworm receiving the higher dose.
The data.
ticks
## Tsex Leg Dsex T U
## 1 ma fo fe 24 5
## 2 fe fo fe 18 5
## 3 ma fo ma 23 4
## 4 fe fo ma 20 4
## 5 ma hi fe 17 8
## 6 fe hi fe 25 3
## 7 ma hi ma 21 6
## 8 fe hi ma 25 2
Calculating the total number of ticks under the respective conditions.
ticks$n <- ticks$T + ticks$U
ticks
## Tsex Leg Dsex T U n
## 1 ma fo fe 24 5 29
## 2 fe fo fe 18 5 23
## 3 ma fo ma 23 4 27
## 4 fe fo ma 20 4 24
## 5 ma hi fe 17 8 25
## 6 fe hi fe 25 3 28
## 7 ma hi ma 21 6 27
## 8 fe hi ma 25 2 27
Fit the full model, will all higher order interactions.
glm.out1 <- glm(T/n ~ Tsex * Leg * Dsex, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out1)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.28093385 0.5055250 2.5338683 0.01128111
## Tsexma 0.28768207 0.7051399 0.4079787 0.68328928
## Leghi 0.83932969 0.7930252 1.0583898 0.28987779
## Dsexma 0.32850407 0.7453560 0.4407345 0.65940525
## Tsexma:Leghi -1.65417381 1.0268296 -1.6109525 0.10719007
## Tsexma:Dsexma -0.14792013 1.0443661 -0.1416363 0.88736730
## Leghi:Dsexma 0.07696104 1.2119773 0.0635004 0.94936804
## Tsexma:Leghi:Dsexma 0.24144619 1.5498849 0.1557833 0.87620383
The three-way interaction is not significant. Fit the model without the three-way interaction.
glm.out2 <- glm(T/n ~ Tsex * Leg * Dsex - Tsex:Leg:Dsex, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out2)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.30673406 0.4811558 2.71582291 0.006611127
## Tsexma 0.23785126 0.6284471 0.37847461 0.705078048
## Leghi 0.77665144 0.6796707 1.14268790 0.253168194
## Dsexma 0.27293367 0.6532456 0.41781172 0.676084782
## Tsexma:Leghi -1.54898015 0.7701768 -2.01120062 0.044304270
## Tsexma:Dsexma -0.03859667 0.7736434 -0.04988948 0.960210457
## Leghi:Dsexma 0.22481996 0.7570753 0.29695850 0.766498193
Alternative syntax.
glm.out2 <- glm(T/n ~ (Tsex + Leg + Dsex)^2, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out2)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.30673406 0.4811558 2.71582291 0.006611127
## Tsexma 0.23785126 0.6284471 0.37847461 0.705078048
## Leghi 0.77665144 0.6796707 1.14268790 0.253168194
## Dsexma 0.27293367 0.6532456 0.41781172 0.676084782
## Tsexma:Leghi -1.54898015 0.7701768 -2.01120062 0.044304270
## Tsexma:Dsexma -0.03859667 0.7736434 -0.04988948 0.960210457
## Leghi:Dsexma 0.22481996 0.7570753 0.29695850 0.766498193
The two-way interactions are not very significant. Fit an additive model, and test for a significant decrease in the likelihood.
glm.out3 <- glm(T/n ~ Tsex + Leg + Dsex, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out3)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.71152444 0.3854051 4.4408448 8.960639e-06
## Tsexma -0.53662365 0.3729989 -1.4386736 1.502430e-01
## Leghi -0.05517661 0.3656716 -0.1508912 8.800616e-01
## Dsexma 0.33388122 0.3669443 0.9098961 3.628773e-01
anova(glm.out3, glm.out2, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: T/n ~ Tsex + Leg + Dsex
## Model 2: T/n ~ (Tsex + Leg + Dsex)^2
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 4 4.2926
## 2 1 0.0242 3 4.2684 0.2339
Calculate the confidence intervals.
confint(glm.out3)
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 0.9912551 2.5101459
## Tsexma -1.2878749 0.1841138
## Leghi -0.7783154 0.6636186
## Dsexma -0.3816322 1.0656641
Summary: the ticks seems to have a preference for the treated tubes (since 0 is not in the 95% CI for the intercept), but neither tick sex, deer sex, or type of leg seem to matter as far as rates are concerned. So we fit a model with an intercept only.
glm.out <- glm(T/n ~ 1, data=ticks, weights=n, family=binomial(link=logit))
summary(glm.out)$coef
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.542374 0.181128 8.515378 1.660476e-17
Estimate the proportion of ticks running to the treated tube on average.
b0 <- summary(glm.out)$coef[1,1]
b0
## [1] 1.542374
exp(b0)/(1+exp(b0))
## [1] 0.8238095
Get the confidence interval for the proportion of ticks running to the treated tube on average.
ci <- confint(glm.out)
## Waiting for profiling to be done...
ci
## 2.5 % 97.5 %
## 1.199959 1.912093
exp(ci)/(1+exp(ci))
## 2.5 % 97.5 %
## 0.7685174 0.8712541