Generate some random data, 20 design points equally spaced on the x-axis, and assume E[y|x] = 10 + 5x. Assume Gaussian noise with mean zero and standard deviation 10.
set.seed(1)
x <- 1:20
x
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
eps <- rnorm(20, sd=10)
eps
## [1] -6.2645381 1.8364332 -8.3562861 15.9528080 3.2950777 -8.2046838 4.8742905 7.3832471
## [9] 5.7578135 -3.0538839 15.1178117 3.8984324 -6.2124058 -22.1469989 11.2493092 -0.4493361
## [17] -0.1619026 9.4383621 8.2122120 5.9390132
y <- 10 + 5*x + eps
y
## [1] 8.735462 21.836433 16.643714 45.952808 38.295078 31.795316 49.874291 57.383247 60.757814
## [10] 56.946116 80.117812 73.898432 68.787594 57.853001 96.249309 89.550664 94.838097 109.438362
## [19] 113.212212 115.939013
plot(x,y)
Fit a linear regression model.
xydat <- data.frame(x,y)
head(xydat)
## x y
## 1 1 8.735462
## 2 2 21.836433
## 3 3 16.643714
## 4 4 45.952808
## 5 5 38.295078
## 6 6 31.795316
lm.fit <- lm(y ~ x, data=xydat)
summary(lm.fit)
##
## Call:
## lm(formula = y ~ x, data = xydat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.808 -5.168 1.875 4.833 15.450
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.6391 4.3158 2.233 0.0385 *
## x 5.2158 0.3603 14.477 2.33e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.291 on 18 degrees of freedom
## Multiple R-squared: 0.9209, Adjusted R-squared: 0.9165
## F-statistic: 209.6 on 1 and 18 DF, p-value: 2.33e-11
confint(lm.fit)
## 2.5 % 97.5 %
## (Intercept) 0.571931 18.706212
## x 4.458915 5.972736
The fitted values.
lm.fit$fitted
## 1 2 3 4 5 6 7 8 9 10 11
## 14.85490 20.07072 25.28655 30.50237 35.71820 40.93402 46.14985 51.36568 56.58150 61.79733 67.01315
## 12 13 14 15 16 17 18 19 20
## 72.22898 77.44480 82.66063 87.87645 93.09228 98.30810 103.52393 108.73976 113.95558
fitted(lm.fit)
## 1 2 3 4 5 6 7 8 9 10 11
## 14.85490 20.07072 25.28655 30.50237 35.71820 40.93402 46.14985 51.36568 56.58150 61.79733 67.01315
## 12 13 14 15 16 17 18 19 20
## 72.22898 77.44480 82.66063 87.87645 93.09228 98.30810 103.52393 108.73976 113.95558
predict(lm.fit)
## 1 2 3 4 5 6 7 8 9 10 11
## 14.85490 20.07072 25.28655 30.50237 35.71820 40.93402 46.14985 51.36568 56.58150 61.79733 67.01315
## 12 13 14 15 16 17 18 19 20
## 72.22898 77.44480 82.66063 87.87645 93.09228 98.30810 103.52393 108.73976 113.95558
Make a picture with the regression line and the fitted values.
plot(xydat$x, xydat$y, xlab="x", ylab="y")
abline(lsfit(xydat$x, xydat$y))
points(xydat$x, predict(lm.fit), pch=21, bg="grey")
You can use the ‘predict’ function to get predicted values and confidence limits for the mean response for any value of x.
predict(lm.fit, data.frame(x=c(5,10,20)), interval="confidence")
## fit lwr upr
## 1 35.71820 29.68662 41.74978
## 2 61.79733 57.41639 66.17826
## 3 113.95558 105.54399 122.36717
predict(lm.fit, data.frame(x=c(5,10,20)), interval="confidence", level=0.99)
## fit lwr upr
## 1 35.71820 27.45442 43.98198
## 2 61.79733 55.79508 67.79958
## 3 113.95558 102.43100 125.48016
Plot the confidence band for the mean response.
xx <- seq(1, 20, by=0.1)
predict.mean <- predict(lm.fit, data.frame(x=xx), interval="confidence")
plot(xydat$x, xydat$y, xlab="x", ylab="y", ylim=range(predict.mean), col="lightgrey")
lines(xx, predict.mean[,1], lwd=2)
lines(xx, predict.mean[,2], lty=2)
lines(xx, predict.mean[,3], lty=2)
You can also use the ‘predict’ function to get prediction intervals for new observations.
predict(lm.fit, data.frame(x=c(5,10,20)), interval="prediction")
## fit lwr upr
## 1 35.71820 15.28863 56.14777
## 2 61.79733 41.79283 81.80182
## 3 113.95558 92.70136 135.20980
Plot the confidence band for the mean response, and the prediction interval for new observations.
predict.new <- predict(lm.fit, data.frame(x=xx), interval="prediction")
plot(xydat$x, xydat$y, xlab="x", ylab="y", ylim=range(predict.new), col="lightgrey")
lines(xx, predict.mean[,1], lwd=2)
lines(xx, predict.mean[,2], lty=2)
lines(xx, predict.mean[,3], lty=2)
lines(xx, predict.new[,2], lty=2, col="blue")
lines(xx, predict.new[,3], lty=2, col="blue")
h2o2 <- rep(c(0,10,25,50), each=3)
pf3d7 <- c(0.3399,0.3563,0.3538,
0.3168,0.3054,0.3174,
0.2460,0.2618,0.2848,
0.1535,0.1613,0.1525)
dat <- data.frame(conc=h2o2, od=pf3d7)
The regression of optical density on hydrogen peroxide concentration.
lm.fit <- lm(od ~ conc, data=dat)
summary(lm.fit)$coef
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.353050073 0.0049955194 70.67335 7.844454e-15
## conc -0.003870984 0.0001759324 -22.00268 8.426407e-10
Predict the optical density at H202 concentration 30.
predict(lm.fit, data.frame(conc=30))
## 1
## 0.2369206
Predict the optical density at H202 concentrations 20, 30, and 40.
predict(lm.fit, data.frame(conc=c(20,30,40)))
## 1 2 3
## 0.2756304 0.2369206 0.1982107
Predict the optical density at H202 concentrations 20, 30, and 40, and get the 95% confidence intervals.
predict(lm.fit, data.frame(conc=c(20,30,40)), interval="confidence")
## fit lwr upr
## 1 0.2756304 0.2682315 0.2830293
## 2 0.2369206 0.2287800 0.2450611
## 3 0.1982107 0.1877931 0.2086283
Predict the optical density at H202 concentrations 20, 30, and 40, and get the 95% prediction intervals.
predict(lm.fit, data.frame(conc=c(20,30,40)),interval="prediction")
## fit lwr upr
## 1 0.2756304 0.2490074 0.3022533
## 2 0.2369206 0.2100820 0.2637591
## 3 0.1982107 0.1705961 0.2258253
Plot the confidence band for the mean response, and the prediction interval for new observations.
xx <- seq(0, 50, length=101)
predict.mean <- predict(lm.fit, data.frame(conc=xx), interval="confidence")
predict.new <- predict(lm.fit, data.frame(conc=xx), interval="prediction")
plot(dat$conc, dat$od, xlab="H2O2 concentration", ylab="OD", ylim=range(predict.new), col="lightgrey")
lines(xx, predict.mean[,1], lwd=2)
lines(xx, predict.mean[,2], lty=2)
lines(xx, predict.mean[,3], lty=2)
lines(xx, predict.new[,2], lty=2, col="blue")
lines(xx, predict.new[,3], lty=2, col="blue")
dat <- data.frame(quinine=c( 0, 0, 0,
12,12,12,
24,24,24,
36,36,36),
fluor=c(100.45, 98.92,101.33,
133.19,127.33,126.78,
152.72,157.43,160.81,
188.73,191.96,183.70))
dat
## quinine fluor
## 1 0 100.45
## 2 0 98.92
## 3 0 101.33
## 4 12 133.19
## 5 12 127.33
## 6 12 126.78
## 7 24 152.72
## 8 24 157.43
## 9 24 160.81
## 10 36 188.73
## 11 36 191.96
## 12 36 183.70
Plot the data.
plot(dat)
abline(lsfit(dat$quinine, dat$fluor), col="blue")
Using the ‘calibrate’ function from the SPH.140.615 package, estimate the concentration corresponding to the fluorescence of 143.70.
library(SPH.140.615)
calibrate(dat$quinine, dat$fluor, 143.70)
## est lo hi
## 18.03601 14.97447 21.09755
Estimate the concentration on the basis of 3 independent measurements.
newy <- c(148.56, 149.36, 150.29)
calibrate(dat$quinine, dat$fluor, newy)
## est lo hi
## 20.38325 18.46910 22.29739