The data.
library(SPH.140.615)
puro
## conc vel state
## 1 0.02 76 treated
## 2 0.02 47 treated
## 3 0.06 97 treated
## 4 0.06 107 treated
## 5 0.11 123 treated
## 6 0.11 139 treated
## 7 0.22 159 treated
## 8 0.22 152 treated
## 9 0.56 191 treated
## 10 0.56 201 treated
## 11 1.10 207 treated
## 12 1.10 200 treated
## 13 0.02 67 untreated
## 14 0.02 51 untreated
## 15 0.06 84 untreated
## 16 0.06 86 untreated
## 17 0.11 98 untreated
## 18 0.11 115 untreated
## 19 0.22 131 untreated
## 20 0.22 124 untreated
## 21 0.56 144 untreated
## 22 0.56 158 untreated
## 23 1.10 160 untreated
Plot the data, just for the untreated cells.
plot(vel ~ conc, data=puro, subset=(state=="untreated"), xlab="concentration", ylab="initial velocity", lwd=2)
The regression of 1/V on 1/C, just with the data on untreated cells.
temp <- lm(1/vel ~ I(1/conc), data=puro, subset=(state=="untreated"))
summary(temp)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0069721337 5.678297e-04 12.278565 6.331192e-07
## I(1/conc) 0.0002150012 2.479421e-05 8.671428 1.156213e-05
vmax <- 1/temp$coef[1]
vmax
## (Intercept)
## 143.4281
km <- temp$coef[2]*vmax
km
## I(1/conc)
## 0.03083721
Plot the data with the regression line.
plot(I(1/vel) ~ I(1/conc), data=puro, subset=(state=="untreated"),
xlab="1 / concentration", ylab="1 / initial velocity", lwd=2)
abline(temp$coef, col="red", lty=2, lwd=2)
The non-linear fit.
nls.out <- nls(vel ~ (Vm*conc)/(K+conc), data=puro, subset=(state=="untreated"), start = c(Vm=143,K=0.031))
summary(nls.out)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 160.28001125 6.480243120 24.733642 1.384612e-09
## K 0.04770813 0.007781868 6.130678 1.727056e-04
Plot the data, with both fitted curves.
plot(vel ~ conc, data=puro, subset=(state=="untreated"), xlab="concentration", ylab="initial velocity", lwd=2)
u <- par("usr")
x <- seq(u[1],u[2],length=250)
y1 <- 1/predict(temp, data.frame(conc=x))
y2 <- predict(nls.out, data.frame(conc=x))
lines(x,y2, lwd=2, col="blue")
lines(x,y1, lwd=2, col="red", lty=2)
Non-linear regression estimation for just the treated cells.
nls.outB <- nls(vel ~ (Vm*conc)/(K+conc), data=puro, subset=(puro$state=="treated"), start=c(Vm=160,K=0.048))
summary(nls.outB)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 212.68377342 6.947156655 30.614507 3.241166e-11
## K 0.06412133 0.008280954 7.743229 1.565132e-05
Fit both regressions all in one.
puro$x <- ifelse(puro$state=="treated",1,0)
nls.outC <- nls(vel ~ ((Vm+dV*x)*conc)/(K+dK*x+conc), data=puro, start=c(Vm=160,K=0.048,dV=0,dK=0))
summary(nls.outC)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 160.28012878 6.896020420 23.242409 2.040955e-15
## K 0.04770831 0.008281174 5.761057 1.496420e-05
## dV 52.40364441 9.551022356 5.486705 2.712600e-05
## dK 0.01641302 0.011428988 1.436087 1.672387e-01
Fit with the K’s constrained to be equal.
nls.outD <- nls(vel ~ ((Vm+dV*x)*conc)/(K+conc), data=puro, start=c(Vm=160,K=0.048,dV=0))
summary(nls.outD)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 166.60407675 5.807428478 28.688098 1.006288e-17
## K 0.05797179 0.005910173 9.808816 4.372679e-09
## dV 42.02596380 6.272141186 6.700417 1.605939e-06
Plot data and both fits.
plot(vel ~ conc, data=puro, xlab="concentration", ylab="initial velocity", lwd=2,
pch=as.numeric(puro$state)-1, col=ifelse(puro$state=="treated","blue","red"))
u <- par("usr")
x <- seq(u[1],u[2],len=250)
y1 <- predict(nls.out, data.frame(conc=x))
y2 <- predict(nls.outB, data.frame(conc=x))
y3 <- predict(nls.outD, data.frame(conc=x, x=rep(0,length(x))))
y4 <- predict(nls.outD, data.frame(conc=x, x=rep(1,length(x))))
lines(x, y1, lwd=2, col="red", lty=2)
lines(x, y2, lwd=2, col="blue", lty=2)
lines(x, y3, lwd=2, col="red")
lines(x, y4, lwd=2, col="blue")
The data.
sediment
## time pellets
## 1 6 119
## 2 6 111
## 3 6 87
## 4 12 348
## 5 12 210
## 6 12 272
## 7 18 438
## 8 18 778
## 9 18 548
## 10 24 774
## 11 24 896
## 12 24 670
## 13 42 896
## 14 42 729
## 15 42 886
## 16 78 763
## 17 78 954
## 18 78 954
Fit a cubic model. We need to use I() to get the square and cubic terms to work.
lm.out <- lm(pellets ~ time + I(time^2) + I(time^3), data=sediment)
summary(lm.out)
##
## Call:
## lm(formula = pellets ~ time + I(time^2) + I(time^3), data = sediment)
##
## Residuals:
## Min 1Q Median 3Q Max
## -154.63 -83.57 28.61 54.98 196.21
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.389e+02 1.274e+02 -2.660 0.018668 *
## time 7.573e+01 1.543e+01 4.907 0.000231 ***
## I(time^2) -1.545e+00 4.804e-01 -3.215 0.006228 **
## I(time^3) 9.942e-03 3.945e-03 2.520 0.024479 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 112.2 on 14 degrees of freedom
## Multiple R-squared: 0.8961, Adjusted R-squared: 0.8738
## F-statistic: 40.23 on 3 and 14 DF, p-value: 3.925e-07
Fit a “linear spline” model.
f <- function(x,b0,b1,x0) ifelse(x<x0, b0+b1*x, b0+b1*x0)
nls.out <- nls(pellets ~ f(time,b0,b1,x0), data=sediment, start=c(b0=0,b1=50,x0=30))
summary(nls.out)$param
## Estimate Std. Error t value Pr(>|t|)
## b0 -146.00000 71.292691 -2.047896 5.849712e-02
## b1 38.90556 4.338735 8.967027 2.055707e-07
## x0 25.95173 1.780619 14.574561 2.911998e-10
Plot the data and the fitted curves.
plot(pellets ~ time, data=sediment)
u <- par("usr")
x <- seq(u[1],u[2],length=250)
y <- predict(lm.out, data.frame(time=x))
lines(x, y, col="blue", lwd=2)
y2 <- predict(nls.out, data.frame(time=x))
lines(x,y2,col="red",lwd=2)
A closer look at the residual sum of squares and the inference for the parameters Vmax and K.
We are using the data from the untreated cells first.
nls.out <- nls(vel ~ (Vm*conc)/(K+conc), data=puro, subset=(state=="untreated"), start = c(Vm=143,K=0.031))
summary(nls.out)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 160.28001125 6.480243120 24.733642 1.384612e-09
## K 0.04770813 0.007781868 6.130678 1.727056e-04
Visualize the residual sum of squares in a neighborhood of the
parameter estimates, show the location of the least squares estimates,
and show the confidence intervals from the confint
function. We are truncating the RSS in the image (red colors).
dat <- subset(puro,state=="untreated")
f <- function(z,dat) sum((dat$vel-(z[2]*dat$conc)/(z[1]+dat$conc))^2)
hm <- 101
K <- seq(0.01,0.1,length=hm)
V <- seq(130,200,length=hm)
z <- expand.grid(K,V)
rss <- apply(z,1,f,dat=dat)
cut <- 5000
rss[rss>cut] <- cut
fit <- summary(nls.out)$param[,1]
cis <- suppressMessages(confint(nls.out))
par(las=1)
image(V,K,matrix(rss,ncol=hm,byrow=T),xlab="Vmax",col=rev(rainbow(256,start=0,end=2/3)))
text(fit[1],fit[2],"x",col="white")
lines(cis[1,],rep(0.012,2),col="white",lwd=3)
lines(rep(fit[1],2),c(0.011,0.013),col="white",lwd=3)
lines(rep(132,2),cis[2,],col="white",lwd=3)
lines(c(131,133),rep(fit[2],2),col="white",lwd=3)
Notice that the confidence intervals are not symmetric! The
confint
function uses a more complicated method (profiling)
to generate these intervals. The methods are described in ore detail in
the book Modern Applied Statistics with S by Venables and
Ripley.
https://www.stats.ox.ac.uk/pub/MASS4/
confint(nls.out)
## Waiting for profiling to be done...
## 2.5% 97.5%
## Vm 145.6369544 176.54440158
## K 0.0313746 0.07006131
Below are the symmetric confidence intervals based on a normality assumption of the parameter estimates. Note that the p-values in the summary are derived making that normality assumption!
fit <- summary(nls.out)$param
fit[1,1]+c(-1,1)*qt(0.975,9)*fit[1,2]
## [1] 145.6207 174.9393
fit[2,1]+c(-1,1)*qt(0.975,9)*fit[2,2]
## [1] 0.03010432 0.06531194
Revisiting our model using both the treated and untreated cells.
nls.outC <- nls(vel ~ ((Vm+dV*x)*conc)/(K+dK*x+conc), data=puro, start=c(Vm=160,K=0.048,dV=0,dK=0))
summary(nls.outC)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 160.28012878 6.896020420 23.242409 2.040955e-15
## K 0.04770831 0.008281174 5.761057 1.496420e-05
## dV 52.40364441 9.551022356 5.486705 2.712600e-05
## dK 0.01641302 0.011428988 1.436087 1.672387e-01
confint(nls.outC)
## Waiting for profiling to be done...
## 2.5% 97.5%
## Vm 145.85284480 176.28020955
## K 0.03158687 0.06965928
## dV 31.33677121 73.09295345
## dK -0.01042896 0.04186662
The confidence interval for dK contains zero, so it is plausible that the rate constant K is the same for the treated and untreated cells.