The data.
library(SPH.140.615)
puro
## conc vel state
## 1 0.02 76 treated
## 2 0.02 47 treated
## 3 0.06 97 treated
## 4 0.06 107 treated
## 5 0.11 123 treated
## 6 0.11 139 treated
## 7 0.22 159 treated
## 8 0.22 152 treated
## 9 0.56 191 treated
## 10 0.56 201 treated
## 11 1.10 207 treated
## 12 1.10 200 treated
## 13 0.02 67 untreated
## 14 0.02 51 untreated
## 15 0.06 84 untreated
## 16 0.06 86 untreated
## 17 0.11 98 untreated
## 18 0.11 115 untreated
## 19 0.22 131 untreated
## 20 0.22 124 untreated
## 21 0.56 144 untreated
## 22 0.56 158 untreated
## 23 1.10 160 untreated
Plot the data, just for the untreated cells.
plot(vel ~ conc, data=puro, subset=(state=="untreated"), xlab="concentration", ylab="initial velocity", lwd=2)
The regression of 1/V on 1/C, just with the data on untreated cells.
temp <- lm(1/vel ~ I(1/conc), data=puro, subset=(state=="untreated"))
summary(temp)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0069721337 5.678297e-04 12.278565 6.331192e-07
## I(1/conc) 0.0002150012 2.479421e-05 8.671428 1.156213e-05
vmax <- 1/temp$coef[1]
vmax
## (Intercept)
## 143.4281
km <- temp$coef[2]*vmax
km
## I(1/conc)
## 0.03083721
Plot the data with the regression line.
plot(I(1/vel) ~ I(1/conc), data=puro, subset=(state=="untreated"),
xlab="1 / concentration", ylab="1 / initial velocity", lwd=2)
abline(temp$coef, col="red", lty=2, lwd=2)
The non-linear fit.
nls.out <- nls(vel ~ (Vm*conc)/(K+conc), data=puro, subset=(state=="untreated"), start = c(Vm=143,K=0.031))
summary(nls.out)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 160.28001125 6.480243120 24.733642 1.384612e-09
## K 0.04770813 0.007781868 6.130678 1.727056e-04
Plot the data, with both fitted curves.
plot(vel ~ conc, data=puro, subset=(state=="untreated"), xlab="concentration", ylab="initial velocity", lwd=2)
u <- par("usr")
x <- seq(u[1],u[2],length=250)
y1 <- 1/predict(temp, data.frame(conc=x))
y2 <- predict(nls.out, data.frame(conc=x))
lines(x,y2, lwd=2, col="blue")
lines(x,y1, lwd=2, col="red", lty=2)
Non-linear regression estimation for just the treated cells.
nls.outB <- nls(vel ~ (Vm*conc)/(K+conc), data=puro, subset=(puro$state=="treated"), start=c(Vm=160,K=0.048))
summary(nls.outB)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 212.68377342 6.947156655 30.614507 3.241166e-11
## K 0.06412133 0.008280954 7.743229 1.565132e-05
Fit both regressions all in one.
puro$x <- ifelse(puro$state=="treated",1,0)
nls.outC <- nls(vel ~ ((Vm+dV*x)*conc)/(K+dK*x+conc), data=puro, start=c(Vm=160,K=0.048,dV=0,dK=0))
summary(nls.outC)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 160.28012878 6.896020420 23.242409 2.040955e-15
## K 0.04770831 0.008281174 5.761057 1.496420e-05
## dV 52.40364441 9.551022356 5.486705 2.712600e-05
## dK 0.01641302 0.011428988 1.436087 1.672387e-01
Fit with the K’s constrained to be equal.
nls.outD <- nls(vel ~ ((Vm+dV*x)*conc)/(K+conc), data=puro, start=c(Vm=160,K=0.048,dV=0))
summary(nls.outD)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 166.60407675 5.807428478 28.688098 1.006288e-17
## K 0.05797179 0.005910173 9.808816 4.372679e-09
## dV 42.02596380 6.272141186 6.700417 1.605939e-06
Plot data and both fits.
plot(vel ~ conc, data=puro, xlab="concentration", ylab="initial velocity", lwd=2,
pch=as.numeric(puro$state)-1, col=ifelse(puro$state=="treated","blue","red"))
u <- par("usr")
x <- seq(u[1],u[2],len=250)
y1 <- predict(nls.out, data.frame(conc=x))
y2 <- predict(nls.outB, data.frame(conc=x))
y3 <- predict(nls.outD, data.frame(conc=x, x=rep(0,length(x))))
y4 <- predict(nls.outD, data.frame(conc=x, x=rep(1,length(x))))
lines(x, y1, lwd=2, col="red", lty=2)
lines(x, y2, lwd=2, col="blue", lty=2)
lines(x, y3, lwd=2, col="red")
lines(x, y4, lwd=2, col="blue")
The data.
sediment
## time pellets
## 1 6 119
## 2 6 111
## 3 6 87
## 4 12 348
## 5 12 210
## 6 12 272
## 7 18 438
## 8 18 778
## 9 18 548
## 10 24 774
## 11 24 896
## 12 24 670
## 13 42 896
## 14 42 729
## 15 42 886
## 16 78 763
## 17 78 954
## 18 78 954
Fit a cubic model. We need to use I() to get the square and cubic terms to work.
lm.out <- lm(pellets ~ time + I(time^2) + I(time^3), data=sediment)
summary(lm.out)
##
## Call:
## lm(formula = pellets ~ time + I(time^2) + I(time^3), data = sediment)
##
## Residuals:
## Min 1Q Median 3Q Max
## -154.63 -83.57 28.61 54.98 196.21
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.389e+02 1.274e+02 -2.660 0.018668 *
## time 7.573e+01 1.543e+01 4.907 0.000231 ***
## I(time^2) -1.545e+00 4.804e-01 -3.215 0.006228 **
## I(time^3) 9.942e-03 3.945e-03 2.520 0.024479 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 112.2 on 14 degrees of freedom
## Multiple R-squared: 0.8961, Adjusted R-squared: 0.8738
## F-statistic: 40.23 on 3 and 14 DF, p-value: 3.925e-07
Fit a “linear spline” model.
f <- function(x,b0,b1,x0) ifelse(x<x0, b0+b1*x, b0+b1*x0)
nls.out <- nls(pellets ~ f(time,b0,b1,x0), data=sediment, start=c(b0=0,b1=50,x0=30))
summary(nls.out)$param
## Estimate Std. Error t value Pr(>|t|)
## b0 -146.00000 71.292691 -2.047896 5.849712e-02
## b1 38.90556 4.338735 8.967027 2.055707e-07
## x0 25.95173 1.780619 14.574561 2.911998e-10
Plot the data and the fitted curves.
plot(pellets ~ time, data=sediment)
u <- par("usr")
x <- seq(u[1],u[2],length=250)
y <- predict(lm.out, data.frame(time=x))
lines(x, y, col="blue", lwd=2)
y2 <- predict(nls.out, data.frame(time=x))
lines(x,y2,col="red",lwd=2)
A closer look at the residual sum of squares and the inference for the parameters Vmax and K.
We are using the data from the untreated cells first.
nls.out <- nls(vel ~ (Vm*conc)/(K+conc), data=puro, subset=(state=="untreated"), start = c(Vm=143,K=0.031))
summary(nls.out)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 160.28001125 6.480243120 24.733642 1.384612e-09
## K 0.04770813 0.007781868 6.130678 1.727056e-04
Visualize the residual sum of squares in a neighborhood of the
parameter estimates, show the location of the least squares estimates,
and show the confidence intervals from the confint
function. We are truncating the RSS in the image (red colors).
dat <- subset(puro,state=="untreated")
f <- function(z,dat) sum((dat$vel-(z[2]*dat$conc)/(z[1]+dat$conc))^2)
hm <- 101
K <- seq(0.01,0.1,length=hm)
V <- seq(130,200,length=hm)
z <- expand.grid(K,V)
rss <- apply(z,1,f,dat=dat)
cut <- 5000
rss[rss>cut] <- cut
fit <- summary(nls.out)$param[,1]
cis <- suppressMessages(confint(nls.out))
par(las=1)
image(V,K,matrix(rss,ncol=hm,byrow=T),xlab="Vmax",col=rev(rainbow(256,start=0,end=2/3)))
text(fit[1],fit[2],"x",col="white")
lines(cis[1,],rep(0.012,2),col="white",lwd=3)
lines(rep(fit[1],2),c(0.011,0.013),col="white",lwd=3)
lines(rep(132,2),cis[2,],col="white",lwd=3)
lines(c(131,133),rep(fit[2],2),col="white",lwd=3)
Notice that the confidence intervals are not symmetric! The
confint
function uses a more complicated method (profiling)
to generate these intervals. The methods are described in ore detail in
the book Modern Applied Statistics with S by Venables and
Ripley.
https://www.stats.ox.ac.uk/pub/MASS4/
confint(nls.out)
## Waiting for profiling to be done...
## 2.5% 97.5%
## Vm 145.6369544 176.54440158
## K 0.0313746 0.07006131
Below are the symmetric confidence intervals based on a normality assumption of the parameter estimates. Note that the p-values in the summary are derived making that normality assumption!
fit <- summary(nls.out)$param
fit[1,1]+c(-1,1)*qt(0.975,9)*fit[1,2]
## [1] 145.6207 174.9393
fit[2,1]+c(-1,1)*qt(0.975,9)*fit[2,2]
## [1] 0.03010432 0.06531194
Revisiting our model using both the treated and untreated cells.
nls.outC <- nls(vel ~ ((Vm+dV*x)*conc)/(K+dK*x+conc), data=puro, start=c(Vm=160,K=0.048,dV=0,dK=0))
summary(nls.outC)$param
## Estimate Std. Error t value Pr(>|t|)
## Vm 160.28012878 6.896020420 23.242409 2.040955e-15
## K 0.04770831 0.008281174 5.761057 1.496420e-05
## dV 52.40364441 9.551022356 5.486705 2.712600e-05
## dK 0.01641302 0.011428988 1.436087 1.672387e-01
confint(nls.outC)
## Waiting for profiling to be done...
## 2.5% 97.5%
## Vm 145.85284480 176.28020955
## K 0.03158687 0.06965928
## dV 31.33677121 73.09295345
## dK -0.01042896 0.04186662
The confidence interval for dK contains zero, so it is plausible that the rate constant K is the same for the treated and untreated cells.
For some experiments the standard curves are not linear for the entire range of concentrations. This can happen if we reach the limit of detection for the lower concentrations or the measurement limits of our instrument for the higher concentrations. Below is one such example, where the log10 fluorescence intensity is not entirely linear in the log10 measurements of the spike-in concentrations.
x <- c(-0.155, 0.204, 0.982, 1.453, 2.107, 2.716, 3.289)
y <- c( 1.557, 1.753, 2.343, 2.828, 3.421, 3.856, 4.084)
dat <- data.frame(cbind(x,y))
par(las=1, xaxs="i", yaxs="i")
plot(dat, xlab="log10 ( concentration )", ylab="log10 ( fluorescence intensity )", xlim=c(-1,4), ylim=c(1,5))
In this settings a generalized logistic function is often helpful. One particular example is the 4 parameter logistic “4PL” or “Hill” model, also “S-curve”. The expected vaue for the log10 response is given by the following equation, where \(x\) is the log10 concentration:
\[f(x|\theta) = \theta_1 + (\theta_4-\theta_1) / (1+10^{\theta_3 (x-\theta_2)})\]
f.4 <- function(x,p1,p2,p3,p4) p1 + (p4-p1)/(1+(10^(p3*(x-p2))))
In these types of experiments the spiked-in concentrations often span multiple orders of magnitude and fold changes are used (1, 10, 100, etc). Thus logarithmic transformations of concentration and output should be used. Vendor software packages sometimes do these calculations on the un-transformed concentrations and readings, however. Make sure you understand what you are looking at!
The parameters \(\theta_1\) and \(\theta_4\) are called the upper asymptote and lower asymptote respectively, since the curve approaches \(\theta_1\) when the log10 concentrations approach \(+ \infty\) and \(\theta_4\) when the log10 concentrations approach \(- \infty\).
The S-curve is symmetric around \(\theta_2\), which is the log10 concentration where you have reached half the overall log10 fluorescence intensity increase.
The parameter \(\theta_3\) gives you the tangent (slope) for the S-curve at log10 concentrations \(\theta_2\).
To illustrate how these S-curves look like we plot some examples with \(\theta_1\) = 5 and \(\theta_4\) = 0 below.
In the first example we chose \(\theta_3\) = 1 and three different values for \(\theta_2\).
par(las=1)
curve(f.4(x,p1=5,p2=0,p3=1,p4=0),
from=-5,to=5, lwd=2,
xlab="log10 ( concentration )",
ylab="log10 ( fluorescence intensity )")
curve(f.4(x,p1=5,p2=-1,p3=1,p4=0), col="red", add=TRUE, lwd=2)
curve(f.4(x,p1=5,p2=1,p3=1,p4=0), col="blue", add=TRUE, lwd=2)
axis(4,0,expression(theta[4]))
axis(4,5,expression(theta[1]))
legend("topleft",
c(expression(paste(theta[2]," = -1")),
expression(paste(theta[2]," = 0")),
expression(paste(theta[2]," = 1"))),
col=c("red","black","blue"), lty=1, lwd=2)
In the first example we chose \(\theta_2\) = 0 and three different values for \(\theta_3\).
f.4 <- function(x,p1,p2,p3,p4) p1 + (p4-p1)/(1+(10^(x-p2))^p3)
par(las=1,xaxs="i")
curve(f.4(x,p1=5,p2=0,p3=1,p4=0),
from=-5,to=5, lwd=2,
xlab="log10 ( concentration ) ",
ylab="log10 ( fluorescence intensity )")
curve(f.4(x,p1=5,p2=0,p3=0.5,p4=0), col="red", add=TRUE, lwd=2)
curve(f.4(x,p1=5,p2=0,p3=2,p4=0), col="blue", add=TRUE, lwd=2)
axis(4,0,expression(theta[4]))
axis(4,5,expression(theta[1]))
legend("topleft",
c(expression(paste(theta[3]," = 1/2")),
expression(paste(theta[3]," = 1")),
expression(paste(theta[3]," = 2"))),
col=c("red","black","blue"),
lty=1, lwd=2)
For our observed data, let’s fit the 4 parameter S-curve using non-linear least squares.
nls.out <- nls(y ~ f.4(x,p1,p2,p3,p4), data=dat, start = c(p1=5,p2=1,p3=1,p4=1))
summary(nls.out)
##
## Formula: y ~ f.4(x, p1, p2, p3, p4)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## p1 4.35541 0.03578 121.73 1.22e-06 ***
## p2 1.39761 0.02020 69.19 6.65e-06 ***
## p3 0.54817 0.02103 26.07 0.000124 ***
## p4 1.16722 0.04559 25.60 0.000131 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0123 on 3 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 2.723e-07
The parameter estimates we obtained using non-linear least squares:
p <- summary(nls.out)$parameters[,1]
p
## p1 p2 p3 p4
## 4.3554127 1.3976129 0.5481687 1.1672242
Let’s plot the data and add the fitted curve.
par(las=1)
plot(dat, xlab="log10 ( concentration )", ylab="log10 ( fluorescence intensity )")
curve(f.4(x,p1=p[1],p2=p[2],p3=p[3],p4=p[4]), col="green3", add=TRUE, lwd=2)
You can better see the S-shape when plotting the curve on a wider range.
par(las=1)
plot(dat, xlab="log10 ( concentration )", ylab="log10 ( fluorescence intensity )", xlim=c(-4,7), ylim=c(1,5))
curve(f.4(x,p1=p[1],p2=p[2],p3=p[3],p4=p[4]), col="green3", add=TRUE, lwd=2)
abline(h=p[c(1,4)],lty="dotted")
Assume you get log10 fluorescence intensities for five more samples.
new.y <- c(3.796, 2.463, 3.506, 1.985, 3.238)
If you solve the above equation for \(x\) you can get the log10 concentration estimates for these five new measurements.
f.4.inv <- function(y,p1,p2,p3,p4) log10((p4-p1)/(y-p1)-1)/p3+p2
new.x <- round(f.4.inv(new.y,p1=p[1],p2=p[2],p3=p[3],p4=p[4]),3)
new.x
## [1] 2.624 1.098 2.200 0.554 1.886
Here is a plot.
par(las=1)
plot(dat, xlab="log10 ( concentration )", ylab="log10 ( fluorescence intensity )")
curve(f.4(x,p1=p[1],p2=p[2],p3=p[3],p4=p[4]), col="green3", add=TRUE, lwd=2)
for(k in 1:5) lines(c(-4,new.x[k]),rep(new.y[k],2),lty="dotted")
for(k in 1:5) lines(rep(new.x[k],2),c(new.y[k],1),lty="dotted")
points(new.x,new.y,pch=20)
Note that you can estimate the concentration for any log10 fluorescence intensity between \(\theta_4\) and \(\theta_1\). Sometimes it happens that a new intensity is outside that range and you will end up with a missing data point.
The above defined S-curve offers a lot of flexibility, but assumes that a symmetric S shaped curve describes your observed data well. This is very often the case. There is an extension for the above model that uses a fifth parameter to allow for a departure from symmetry. This makes the model fitting more complicated and may or may not improve the model fit. See generalalized logistic functions.