Statistics for Laboratory Scientists ( 140.615 )

Non-linear Regression

Example from class: Michaelis-Menten equations

The data.

library(SPH.140.615)
puro
##    conc vel     state
## 1  0.02  76   treated
## 2  0.02  47   treated
## 3  0.06  97   treated
## 4  0.06 107   treated
## 5  0.11 123   treated
## 6  0.11 139   treated
## 7  0.22 159   treated
## 8  0.22 152   treated
## 9  0.56 191   treated
## 10 0.56 201   treated
## 11 1.10 207   treated
## 12 1.10 200   treated
## 13 0.02  67 untreated
## 14 0.02  51 untreated
## 15 0.06  84 untreated
## 16 0.06  86 untreated
## 17 0.11  98 untreated
## 18 0.11 115 untreated
## 19 0.22 131 untreated
## 20 0.22 124 untreated
## 21 0.56 144 untreated
## 22 0.56 158 untreated
## 23 1.10 160 untreated

Plot the data, just for the untreated cells.

plot(vel ~ conc, data=puro, subset=(state=="untreated"), xlab="concentration", ylab="initial velocity", lwd=2)

The regression of 1/V on 1/C, just with the data on untreated cells.

temp <- lm(1/vel ~ I(1/conc), data=puro, subset=(state=="untreated"))
summary(temp)$coef
##                 Estimate   Std. Error   t value     Pr(>|t|)
## (Intercept) 0.0069721337 5.678297e-04 12.278565 6.331192e-07
## I(1/conc)   0.0002150012 2.479421e-05  8.671428 1.156213e-05
vmax <- 1/temp$coef[1]
vmax
## (Intercept) 
##    143.4281
km <- temp$coef[2]*vmax
km
##  I(1/conc) 
## 0.03083721

Plot the data with the regression line.

plot(I(1/vel) ~ I(1/conc), data=puro, subset=(state=="untreated"), 
     xlab="1 / concentration", ylab="1 / initial velocity", lwd=2)
abline(temp$coef, col="red", lty=2, lwd=2)

The non-linear fit.

nls.out <- nls(vel ~ (Vm*conc)/(K+conc), data=puro, subset=(state=="untreated"), start = c(Vm=143,K=0.031))
summary(nls.out)$param
##        Estimate  Std. Error   t value     Pr(>|t|)
## Vm 160.28001125 6.480243120 24.733642 1.384612e-09
## K    0.04770813 0.007781868  6.130678 1.727056e-04

Plot the data, with both fitted curves.

plot(vel ~ conc, data=puro, subset=(state=="untreated"), xlab="concentration", ylab="initial velocity", lwd=2)
u <- par("usr")
x <- seq(u[1],u[2],length=250)
y1 <- 1/predict(temp, data.frame(conc=x))
y2 <- predict(nls.out, data.frame(conc=x))
lines(x,y2, lwd=2, col="blue")
lines(x,y1, lwd=2, col="red", lty=2)

Non-linear regression estimation for just the treated cells.

nls.outB <- nls(vel ~ (Vm*conc)/(K+conc), data=puro, subset=(puro$state=="treated"), start=c(Vm=160,K=0.048))
summary(nls.outB)$param
##        Estimate  Std. Error   t value     Pr(>|t|)
## Vm 212.68377342 6.947156655 30.614507 3.241166e-11
## K    0.06412133 0.008280954  7.743229 1.565132e-05

Fit both regressions all in one.

puro$x <- ifelse(puro$state=="treated",1,0)
nls.outC <- nls(vel ~ ((Vm+dV*x)*conc)/(K+dK*x+conc), data=puro, start=c(Vm=160,K=0.048,dV=0,dK=0))
summary(nls.outC)$param
##        Estimate  Std. Error   t value     Pr(>|t|)
## Vm 160.28012878 6.896020420 23.242409 2.040955e-15
## K    0.04770831 0.008281174  5.761057 1.496420e-05
## dV  52.40364441 9.551022356  5.486705 2.712600e-05
## dK   0.01641302 0.011428988  1.436087 1.672387e-01

Fit with the K’s constrained to be equal.

nls.outD <- nls(vel ~ ((Vm+dV*x)*conc)/(K+conc), data=puro, start=c(Vm=160,K=0.048,dV=0))
summary(nls.outD)$param
##        Estimate  Std. Error   t value     Pr(>|t|)
## Vm 166.60407675 5.807428478 28.688098 1.006288e-17
## K    0.05797179 0.005910173  9.808816 4.372679e-09
## dV  42.02596380 6.272141186  6.700417 1.605939e-06

Plot data and both fits.

plot(vel ~ conc, data=puro, xlab="concentration", ylab="initial velocity", lwd=2,
     pch=as.numeric(puro$state)-1, col=ifelse(puro$state=="treated","blue","red"))
u <- par("usr")
x <- seq(u[1],u[2],len=250)
y1 <- predict(nls.out, data.frame(conc=x))
y2 <- predict(nls.outB, data.frame(conc=x))
y3 <- predict(nls.outD, data.frame(conc=x, x=rep(0,length(x))))
y4 <- predict(nls.outD, data.frame(conc=x, x=rep(1,length(x))))
lines(x, y1, lwd=2, col="red", lty=2)
lines(x, y2, lwd=2, col="blue", lty=2)
lines(x, y3, lwd=2, col="red")
lines(x, y4, lwd=2, col="blue")

Example from class: the mud snails

The data.

sediment
##    time pellets
## 1     6     119
## 2     6     111
## 3     6      87
## 4    12     348
## 5    12     210
## 6    12     272
## 7    18     438
## 8    18     778
## 9    18     548
## 10   24     774
## 11   24     896
## 12   24     670
## 13   42     896
## 14   42     729
## 15   42     886
## 16   78     763
## 17   78     954
## 18   78     954

Fit a cubic model. We need to use I() to get the square and cubic terms to work.

lm.out <- lm(pellets ~ time + I(time^2) + I(time^3), data=sediment)
summary(lm.out)
## 
## Call:
## lm(formula = pellets ~ time + I(time^2) + I(time^3), data = sediment)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -154.63  -83.57   28.61   54.98  196.21 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3.389e+02  1.274e+02  -2.660 0.018668 *  
## time         7.573e+01  1.543e+01   4.907 0.000231 ***
## I(time^2)   -1.545e+00  4.804e-01  -3.215 0.006228 ** 
## I(time^3)    9.942e-03  3.945e-03   2.520 0.024479 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 112.2 on 14 degrees of freedom
## Multiple R-squared:  0.8961, Adjusted R-squared:  0.8738 
## F-statistic: 40.23 on 3 and 14 DF,  p-value: 3.925e-07

Fit a “linear spline” model.

f <- function(x,b0,b1,x0) ifelse(x<x0, b0+b1*x, b0+b1*x0)
nls.out <- nls(pellets ~ f(time,b0,b1,x0), data=sediment, start=c(b0=0,b1=50,x0=30))
summary(nls.out)$param
##      Estimate Std. Error   t value     Pr(>|t|)
## b0 -146.00000  71.292691 -2.047896 5.849712e-02
## b1   38.90556   4.338735  8.967027 2.055707e-07
## x0   25.95173   1.780619 14.574561 2.911998e-10

Plot the data and the fitted curves.

plot(pellets ~ time, data=sediment)
u <- par("usr") 
x <- seq(u[1],u[2],length=250)
y <- predict(lm.out, data.frame(time=x))
lines(x, y, col="blue", lwd=2)
y2 <- predict(nls.out, data.frame(time=x))
lines(x,y2,col="red",lwd=2)

Example from class: Michaelis-Menten equations (revisited)

A closer look at the residual sum of squares and the inference for the parameters Vmax and K.

We are using the data from the untreated cells first.

nls.out <- nls(vel ~ (Vm*conc)/(K+conc), data=puro, subset=(state=="untreated"), start = c(Vm=143,K=0.031))
summary(nls.out)$param
##        Estimate  Std. Error   t value     Pr(>|t|)
## Vm 160.28001125 6.480243120 24.733642 1.384612e-09
## K    0.04770813 0.007781868  6.130678 1.727056e-04

Visualize the residual sum of squares in a neighborhood of the parameter estimates, show the location of the least squares estimates, and show the confidence intervals from the confint function. We are truncating the RSS in the image (red colors).

dat <- subset(puro,state=="untreated")
f <- function(z,dat) sum((dat$vel-(z[2]*dat$conc)/(z[1]+dat$conc))^2)
hm <- 101
K <- seq(0.01,0.1,length=hm)
V <- seq(130,200,length=hm)
z <- expand.grid(K,V)
rss <- apply(z,1,f,dat=dat)
cut <- 5000
rss[rss>cut] <- cut
fit <- summary(nls.out)$param[,1]
cis <- suppressMessages(confint(nls.out))
par(las=1)
image(V,K,matrix(rss,ncol=hm,byrow=T),xlab="Vmax",col=rev(rainbow(256,start=0,end=2/3)))
text(fit[1],fit[2],"x",col="white")
lines(cis[1,],rep(0.012,2),col="white",lwd=3)
lines(rep(fit[1],2),c(0.011,0.013),col="white",lwd=3)
lines(rep(132,2),cis[2,],col="white",lwd=3)
lines(c(131,133),rep(fit[2],2),col="white",lwd=3)

Notice that the confidence intervals are not symmetric! The confint function uses a more complicated method (profiling) to generate these intervals. The methods are described in ore detail in the book Modern Applied Statistics with S by Venables and Ripley.

https://www.stats.ox.ac.uk/pub/MASS4/

confint(nls.out)
## Waiting for profiling to be done...
##           2.5%        97.5%
## Vm 145.6369544 176.54440158
## K    0.0313746   0.07006131

Below are the symmetric confidence intervals based on a normality assumption of the parameter estimates. Note that the p-values in the summary are derived making that normality assumption!

fit <- summary(nls.out)$param
fit[1,1]+c(-1,1)*qt(0.975,9)*fit[1,2]
## [1] 145.6207 174.9393
fit[2,1]+c(-1,1)*qt(0.975,9)*fit[2,2]
## [1] 0.03010432 0.06531194

Revisiting our model using both the treated and untreated cells.

nls.outC <- nls(vel ~ ((Vm+dV*x)*conc)/(K+dK*x+conc), data=puro, start=c(Vm=160,K=0.048,dV=0,dK=0))
summary(nls.outC)$param
##        Estimate  Std. Error   t value     Pr(>|t|)
## Vm 160.28012878 6.896020420 23.242409 2.040955e-15
## K    0.04770831 0.008281174  5.761057 1.496420e-05
## dV  52.40364441 9.551022356  5.486705 2.712600e-05
## dK   0.01641302 0.011428988  1.436087 1.672387e-01
confint(nls.outC)
## Waiting for profiling to be done...
##            2.5%        97.5%
## Vm 145.85284480 176.28020955
## K    0.03158687   0.06965928
## dV  31.33677121  73.09295345
## dK  -0.01042896   0.04186662

The confidence interval for dK contains zero, so it is plausible that the rate constant K is the same for the treated and untreated cells.