Statistics for Laboratory Scientists ( 140.615 )

Logistic Regression

Example from class: tobacco budworms

The data.

library(SPH.140.615)
worms
##    dose n.dead  n    sex
## 1     1      1 20   male
## 2     2      4 20   male
## 3     4      9 20   male
## 4     8     13 20   male
## 5    16     18 20   male
## 6    32     20 20   male
## 7     1      0 20 female
## 8     2      2 20 female
## 9     4      6 20 female
## 10    8     10 20 female
## 11   16     12 20 female
## 12   32     16 20 female

Plot the data.

par(las=1)
plot(n.dead/n ~ dose, data=worms, col=ifelse(sex=="male","blue","red"), lwd=2, ylim=c(0,1), 
     ylab="proportion dead")
u <- par("usr")
legend(u[2], u[3], c("Male","Female"), pch=1, col=c("blue","red"), xjust=1, yjust=0, cex=1.3)

Fit the model without sex differences.

glm.outA <- glm(n.dead/n ~ dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.outA)$coef
##               Estimate Std. Error   z value     Pr(>|z|)
## (Intercept) -1.5739248 0.23127861 -6.805319 1.008254e-11
## dose         0.1530269 0.02247849  6.807705 9.916801e-12

Sexes completely different.

glm.outB <- glm(n.dead/n ~ sex*dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.outB)$coef
##                Estimate Std. Error    z value     Pr(>|z|)
## (Intercept)  -1.7157796 0.32233098 -5.3230368 1.020491e-07
## sexmale      -0.2119351 0.51523286 -0.4113384 6.808244e-01
## dose          0.1156765 0.02378865  4.8626745 1.158102e-06
## sexmale:dose  0.1815579 0.06691613  2.7132151 6.663383e-03

Different slopes but common intercept.

glm.outC <- glm(n.dead/n ~ dose + sex:dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.outC)$coef
##                Estimate Std. Error   z value     Pr(>|z|)
## (Intercept)  -1.8015388 0.25126742 -7.169807 7.510379e-13
## dose          0.1203152 0.02138318  5.626626 1.837681e-08
## dose:sexmale  0.1616116 0.04427579  3.650112 2.621257e-04

Plot the data with the fitted curves.

par(las=1)
plot(n.dead/n ~ dose, data=worms, col=ifelse(sex=="male","blue","red"), lwd=2, ylim=c(0,1), 
     ylab="proportion dead")
u <- par("usr")
x <- seq(0,u[2],len=250)
y <- predict(glm.outA, data.frame(dose=x), type="response")
lines(x, y, col="green3")
ym <- predict(glm.outB, data.frame(dose=x,sex=factor(rep("male",length(x)))), type="response")
lines(x, ym, col="blue", lty=2)
yf <- predict(glm.outB, data.frame(dose=x,sex=factor(rep("female",length(x)))), type="response")
lines(x, yf, col="red", lty=2)
ym <- predict(glm.outC, data.frame(dose=x,sex=factor(rep("male",length(x)))), type="response")
lines(x, ym, col="blue")
yf <- predict(glm.outC, data.frame(dose=x,sex=factor(rep("female",length(x)))), type="response")
lines(x, yf, col="red")
legend(u[2], u[3], col=c("blue","blue","red","red","green3"), lty=c(1,2,1,2,1), xjust=1, yjust=0,
       c("common intercept (M)", "separate intercepts (M)", 
         "common intercept (F)", "separate intercepts (F)",
         "same curve"))

The fit is poor, especially at the low doses. Let’s convert dose to log2(dose).

worms$log2dose <- log2(worms$dose)
worms
##    dose n.dead  n    sex log2dose
## 1     1      1 20   male        0
## 2     2      4 20   male        1
## 3     4      9 20   male        2
## 4     8     13 20   male        3
## 5    16     18 20   male        4
## 6    32     20 20   male        5
## 7     1      0 20 female        0
## 8     2      2 20 female        1
## 9     4      6 20 female        2
## 10    8     10 20 female        3
## 11   16     12 20 female        4
## 12   32     16 20 female        5

Fit the model without sex differences.

glm.outA <- glm(n.dead/n ~ log2dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.outA)$coef
##              Estimate Std. Error   z value     Pr(>|z|)
## (Intercept) -2.766087  0.3701265 -7.473356 7.817524e-14
## log2dose     1.006807  0.1235859  8.146619 3.742416e-16

Sexes completely different.

glm.outB <- glm(n.dead/n ~ sex*log2dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.outB)$coef
##                    Estimate Std. Error    z value     Pr(>|z|)
## (Intercept)      -2.9935418  0.5526997 -5.4162175 6.087304e-08
## sexmale           0.1749868  0.7783100  0.2248292 8.221122e-01
## log2dose          0.9060364  0.1671016  5.4220678 5.891353e-08
## sexmale:log2dose  0.3529130  0.2699902  1.3071324 1.911678e-01

Different slopes but common intercept.

glm.outC <- glm(n.dead/n ~ log2dose + sex:log2dose, weights=n, data=worms, family=binomial(link=logit))
summary(glm.outC)$coef
##                    Estimate Std. Error   z value     Pr(>|z|)
## (Intercept)      -2.9072994  0.3892857 -7.468293 8.124171e-14
## log2dose          0.8823333  0.1275071  6.919875 4.520414e-12
## log2dose:sexmale  0.4070062  0.1247047  3.263759 1.099447e-03

Plot the data with the fitted curves.

par(las=1)
plot(n.dead/n ~ log2dose, data=worms, col=ifelse(sex=="male","blue","red"), lwd=2, ylim=c(0,1), 
     xlab="log2 dose", ylab="proportion dead")
u <- par("usr")
x <- seq(0,u[2],len=250)
y <- predict(glm.outA, data.frame(log2dose=x), type="response")
lines(x, y, col="green3")
ym <- predict(glm.outB, data.frame(log2dose=x,sex=factor(rep("male",length(x)))), type="response")
lines(x, ym, col="blue", lty=2)
yf <- predict(glm.outB, data.frame(log2dose=x,sex=factor(rep("female",length(x)))), type="response")
lines(x, yf, col="red", lty=2)
ym <- predict(glm.outC, data.frame(log2dose=x,sex=factor(rep("male",length(x)))), type="response")
lines(x, ym, col="blue")
yf <- predict(glm.outC, data.frame(log2dose=x,sex=factor(rep("female",length(x)))), type="response")
lines(x, yf, col="red")
legend(u[2], u[3], col=c("blue","blue","red","red","green3"), lty=c(1,2,1,2,1), xjust=1, yjust=0,
       c("common intercept (M)", "separate intercepts (M)", 
         "common intercept (F)", "separate intercepts (F)",
         "same curve"))