Statistics for Laboratory Scientists ( 140.615 )

Correlation

Example from class: fathers’ and daughters’ heights

The Pearson & Lee (1906) data.

library(SPH.140.615)
example(pear)
## 
## pear> str(pear)
## 'data.frame':    1376 obs. of  2 variables:
##  $ father  : num  63.6 64 65.5 58.8 59.4 62.5 62.9 65.7 67.3 58.7 ...
##  $ daughter: num  52.6 53.9 55.8 56.2 56.1 56.1 56.9 56.3 56.7 57.2 ...
## 
## pear> summary(pear)
##      father         daughter    
##  Min.   :58.40   Min.   :52.60  
##  1st Qu.:65.80   1st Qu.:62.10  
##  Median :67.80   Median :63.80  
##  Mean   :67.68   Mean   :63.84  
##  3rd Qu.:69.60   3rd Qu.:65.60  
##  Max.   :76.00   Max.   :72.60

Plot the data.

plot(pear)

Calculate the sample means and the sample standard deviations of the fathers’ and daughters’ heights.

mes <- apply(pear, 2, mean)
mes
##   father daughter 
## 67.67871 63.83823
sds <- apply(pear, 2, sd)
sds
##   father daughter 
## 2.770190 2.656137

Calculate the Pearson correlation between the fathers’ and daughters’ heights.

cor(pear) 
##             father  daughter
## father   1.0000000 0.5173923
## daughter 0.5173923 1.0000000
cor(pear)[1,2]
## [1] 0.5173923
cor(pear$father, pear$daughter)
## [1] 0.5173923

Spearman’s rank correlation coefficient.

cor(pear$father, pear$daughter, method="spearman")
## [1] 0.5081574

Plot the ranks.

plot(rank(pear$father), rank(pear$daughter), cex=0.5)

cor(rank(pear$father), rank(pear$daughter))
## [1] 0.5081574

Test for association between the fathers’ and daughters’ heights.

cor.test(pear$father, pear$daughter)
## 
##  Pearson's product-moment correlation
## 
## data:  pear$father and pear$daughter
## t = 22.411, df = 1374, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4776055 0.5550614
## sample estimates:
##       cor 
## 0.5173923