Loading the SPH.140.615 package.
library(SPH.140.615)
The Pearson & Lee (1906) data.
example(pear)
##
## pear> str(pear)
## 'data.frame': 1376 obs. of 2 variables:
## $ father : num 63.6 64 65.5 58.8 59.4 62.5 62.9 65.7 67.3 58.7 ...
## $ daughter: num 52.6 53.9 55.8 56.2 56.1 56.1 56.9 56.3 56.7 57.2 ...
##
## pear> summary(pear)
## father daughter
## Min. :58.40 Min. :52.60
## 1st Qu.:65.80 1st Qu.:62.10
## Median :67.80 Median :63.80
## Mean :67.68 Mean :63.84
## 3rd Qu.:69.60 3rd Qu.:65.60
## Max. :76.00 Max. :72.60
Calculate the regression of daughter’s height on father’s height (i.e., for predicting daughter from father).
lm.outA <- lm(daughter ~ father, data=pear)
summary(lm.outA)
##
## Call:
## lm(formula = daughter ~ father, data = pear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.2148 -1.4814 0.0221 1.4914 8.3047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.26347 1.49937 20.18 <2e-16 ***
## father 0.49609 0.02214 22.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.274 on 1374 degrees of freedom
## Multiple R-squared: 0.2677, Adjusted R-squared: 0.2672
## F-statistic: 502.3 on 1 and 1374 DF, p-value: < 2.2e-16
Calculate the regression of father’s height on daughter’s height (i.e., for predicting father from daughter).
lm.outB <- lm(father ~ daughter, data=pear)
summary(lm.outB)
##
## Call:
## lm(formula = father ~ daughter, data = pear)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.8487 -1.5024 0.0714 1.6102 7.5563
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.23102 1.53840 21.60 <2e-16 ***
## daughter 0.53961 0.02408 22.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.371 on 1374 degrees of freedom
## Multiple R-squared: 0.2677, Adjusted R-squared: 0.2672
## F-statistic: 502.3 on 1 and 1374 DF, p-value: < 2.2e-16
The intercept and slope for regression A.
coA <- lm.outA$coef
coA
## (Intercept) father
## 30.2634711 0.4960904
The intercept and slope for regression B.
coB <- lm.outB$coef
coB
## (Intercept) daughter
## 33.231025 0.539609
Transform regression B coefficients: y = mx + b \(\Rightarrow\) x = y/m - b/m
coB[1] <- -coB[1]/coB[2]
coB[2] <- 1/coB[2]
coB
## (Intercept) daughter
## -61.583525 1.853194
Plot the data with the two regression lines.
plot(pear)
abline(coA, lwd=2, col="green")
abline(coB, lwd=2, col="orange")
The data.
plot(span, xlab="span [ inches ]", ylab="height [ inches ]")
abline(lsfit(span$span,span$stature), col="red", lty=2, lwd=2)
Predicting height from span.
lm.fit <- lm(stature~span, data=span)
summary(lm.fit)
##
## Call:
## lm(formula = stature ~ span, data = span)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3080 -1.1447 -0.0186 1.1502 7.5810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.70025 1.13670 19.09 <2e-16 ***
## span 0.66939 0.01653 40.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.707 on 1048 degrees of freedom
## Multiple R-squared: 0.6101, Adjusted R-squared: 0.6097
## F-statistic: 1640 on 1 and 1048 DF, p-value: < 2.2e-16
summary(lm.fit)$coef[,1]
## (Intercept) span
## 21.7002483 0.6693937
Residual standard deviation.
summary(lm.fit)$sigma
## [1] 1.707371
The correlation.
r <- cor(span$span, span$stature)
r
## [1] 0.7810676
cor(span)
## span stature
## span 1.0000000 0.7810676
## stature 0.7810676 1.0000000
Slope of the regression line.
r * sd(span$stature)/sd(span$span)
## [1] 0.6693937
Height standard deviation.
sd(span$stature)
## [1] 2.732911
Typical prediction error using span to predict height.
sd(span$stature) * sqrt(1-r^2)
## [1] 1.706557