Statistics for Laboratory Scientists ( 140.615 )

Linear Regression - Advanced

Example from class: David Sullivan’s heme data

h2o2 <- rep(c(0,10,25,50), each=3)
pf3d7 <- c(0.3399,0.3563,0.3538,
           0.3168,0.3054,0.3174,
           0.2460,0.2618,0.2848,
           0.1535,0.1613,0.1525)
The regression of optical density on hydrogen peroxide concentration, from scratch
n <- length(h2o2)
n
## [1] 12
yb <- mean(pf3d7)
yb
## [1] 0.2707917
xb <- mean(h2o2)
xb
## [1] 21.25
sxy <- sum(h2o2*pf3d7) - n*xb*yb
sxy
## [1] -16.47587
# alternatively
sum((h2o2-xb) * (pf3d7-yb))
## [1] -16.47587
sxx <- sum(h2o2^2) - n*xb^2
sxx
## [1] 4256.25
# alternatively
sum( (h2o2-xb)^2 )
## [1] 4256.25
b1hat <- sxy/sxx
b1hat
## [1] -0.003870984
b0hat <- yb-b1hat*xb
b0hat
## [1] 0.3530501
yhat <- b0hat + b1hat*h2o2
yhat
##  [1] 0.3530501 0.3530501 0.3530501 0.3143402 0.3143402 0.3143402 0.2562755 0.2562755 0.2562755 0.1595009
## [11] 0.1595009 0.1595009
rss <- sum((pf3d7-yhat)^2)
rss
## [1] 0.001317403
sigmahat <- sqrt(rss/(n-2))
sigmahat
## [1] 0.01147782

Compare to the results from the built-in ‘lm’ function.

lm.out <- lm(pf3d7 ~ h2o2)
lm.sum <- summary(lm.out)
lm.sum$coef[,1]
##  (Intercept)         h2o2 
##  0.353050073 -0.003870984
lm.sum$sigma
## [1] 0.01147782
Getting estimated standard errors for the regression parameters

The parameter estimate variance-covariance matrix (not including sigma).

lm.sum$cov.unscaled   
##              (Intercept)          h2o2
## (Intercept)  0.189427313 -0.0049926579
## h2o2        -0.004992658  0.0002349486

The square root of the diagonal elements.

diag(lm.sum$cov)
##  (Intercept)         h2o2 
## 0.1894273128 0.0002349486
sqrt(diag(lm.sum$cov))
## (Intercept)        h2o2 
##  0.43523248  0.01532803

The estimated parameter standard errors.

lm.sum$sigma * sqrt(diag(lm.sum$cov))
##  (Intercept)         h2o2 
## 0.0049955194 0.0001759324

Compare to the output from the ‘lm’ function.

lm.sum$coefficients
##                 Estimate   Std. Error   t value     Pr(>|t|)
## (Intercept)  0.353050073 0.0049955194  70.67335 7.844454e-15
## h2o2        -0.003870984 0.0001759324 -22.00268 8.426407e-10
lm.sum$coefficients[,2]
##  (Intercept)         h2o2 
## 0.0049955194 0.0001759324
Building confidence intervals for the regression parameters

The residual degrees of freedom.

df <- lm.out$df.residual 
df
## [1] 10

The t-statistic for a 95% confidence interval.

tmult <- qt(0.975, df)  
tmult
## [1] 2.228139

The parameter estimates.

beta <- lm.sum$coef[,1] 
beta
##  (Intercept)         h2o2 
##  0.353050073 -0.003870984

The estimated standard errors.

se <- lm.sum$coef[,2] 
se
##  (Intercept)         h2o2 
## 0.0049955194 0.0001759324

The 95% confidence interval for the intercept.

beta[1] - tmult*se[1]
## (Intercept) 
##   0.3419194
beta[1] + tmult*se[1]
## (Intercept) 
##   0.3641808
beta[1] + c(-1,1)*tmult*se[1]
## [1] 0.3419194 0.3641808

The 95% confidence interval for the slope.

beta[2] - tmult*se[2]
##         h2o2 
## -0.004262986
beta[2] + tmult*se[2]
##         h2o2 
## -0.003478982
beta[2] + c(-1,1)*tmult*se[2]
## [1] -0.004262986 -0.003478982

Compare to the output from the ‘confint’ function.

confint(lm.out)
##                    2.5 %       97.5 %
## (Intercept)  0.341919363  0.364180784
## h2o2        -0.004262986 -0.003478982