Statistics for Laboratory Scientists ( 140.615 )

Random Variables

The Binomial distribution

The example from class: the number of red balls in 9 draws with replacement from an urn for which the proportion of red balls is 20%.

The Binomial coefficient

One red ball.

choose(9,1)
## [1] 9

Two red balls.

choose(9,2)
## [1] 36
factorial(9)
## [1] 362880
factorial(2)
## [1] 2
factorial(9-2)
## [1] 5040
factorial(9)/(factorial(2)*factorial(9-2))
## [1] 36

All possibilities.

choose(9,0:9)
##  [1]   1   9  36  84 126 126  84  36   9   1
The outcome probabilities

No red balls.

(1-0.2)^9
## [1] 0.1342177
choose(9,0)*(0.2^0)*(0.8^9)
## [1] 0.1342177
dbinom(0,9,0.2)
## [1] 0.1342177

All red balls.

0.2^9
## [1] 5.12e-07
choose(9,9)*(0.2^9)*(0.8^0)
## [1] 5.12e-07
dbinom(9,9,0.2)
## [1] 5.12e-07

One red ball.

choose(9,1)*(0.2^1)*(0.8^8)
## [1] 0.3019899
dbinom(1,9,0.2)
## [1] 0.3019899

All possibilities.

dbinom(0:9,9,0.2)
##  [1] 0.134217728 0.301989888 0.301989888 0.176160768 0.066060288 0.016515072 0.002752512 0.000294912
##  [9] 0.000018432 0.000000512
barplot(dbinom(0:9,9,0.2),names=0:9)

The cumulative distribution function
dbinom(0:4,9,0.2)
## [1] 0.13421773 0.30198989 0.30198989 0.17616077 0.06606029
sum(dbinom(0:4,9,0.2))
## [1] 0.9804186
pbinom(4,9,0.2)
## [1] 0.9804186
1- pbinom(4,9,0.2)
## [1] 0.01958144
pbinom(4,9,0.2,lower=FALSE)
## [1] 0.01958144
Simulate Binomial outcomes
set.seed(4)
rbinom(1,9,0.2)
## [1] 2
rbinom(10,9,0.2)
##  [1] 0 1 1 3 1 2 3 4 0 3
x <- rbinom(100,9,0.2)
table(x)
## x
##  0  1  2  3  4  5 
## 11 24 33 21  9  2
prop.table(table(x))
## x
##    0    1    2    3    4    5 
## 0.11 0.24 0.33 0.21 0.09 0.02
table(x)/length(x)
## x
##    0    1    2    3    4    5 
## 0.11 0.24 0.33 0.21 0.09 0.02
round(dbinom(0:9,9,0.2),2)
##  [1] 0.13 0.30 0.30 0.18 0.07 0.02 0.00 0.00 0.00 0.00

Poisson distribution

The probability function
dpois(3,5)
## [1] 0.1403739
exp(-5)*(5^3)/factorial(3)
## [1] 0.1403739
dpois(0:15,5)
##  [1] 0.0067379470 0.0336897350 0.0842243375 0.1403738958 0.1754673698 0.1754673698 0.1462228081 0.1044448630
##  [9] 0.0652780393 0.0362655774 0.0181327887 0.0082421767 0.0034342403 0.0013208616 0.0004717363 0.0001572454
barplot(dpois(0:15,5),names=0:15)

The cumulative distribution function
dpois(0:7,5)
## [1] 0.006737947 0.033689735 0.084224337 0.140373896 0.175467370 0.175467370 0.146222808 0.104444863
sum(dpois(0:7,5))
## [1] 0.8666283
ppois(7,5)
## [1] 0.8666283
1 - ppois(7,5)
## [1] 0.1333717
ppois(7,5,lower=FALSE)
## [1] 0.1333717
Simulate Poisson outcomes
rpois(1,5)
## [1] 4
rpois(20,5)
##  [1]  4  5  6  5  4  7  8  2  3  4  7  3  2  6 10  3  5  4  4  3
x <- rpois(100,5)
table(x)
## x
##  0  1  2  3  4  5  6  7  8  9 10 11 
##  1  3  8 14 17 18 16 10  5  5  2  1
prop.table(table(x))
## x
##    0    1    2    3    4    5    6    7    8    9   10   11 
## 0.01 0.03 0.08 0.14 0.17 0.18 0.16 0.10 0.05 0.05 0.02 0.01
round(dpois(0:10,5),2)
##  [1] 0.01 0.03 0.08 0.14 0.18 0.18 0.15 0.10 0.07 0.04 0.02

Poisson approximation for the Binomial

n <- 50000
p <- 1/100000
lambda <- n*p
lambda
## [1] 0.5
dbinom(0:5,n,p)
## [1] 0.6065291434 0.3032676044 0.0758161429 0.0126356447 0.0015793766 0.0001579266
dpois(0:5,lambda)
## [1] 0.6065306597 0.3032653299 0.0758163325 0.0126360554 0.0015795069 0.0001579507
dbinom(0:5,n,p) - dpois(0:5,lambda)
## [1] -1.516335e-06  2.274509e-06 -1.895494e-07 -4.106761e-07 -1.303081e-07 -2.408655e-08

The Gaussian distribution

The N(0,1) density function
curve(dnorm(x), from=-3, to=3)

curve(dnorm(x), from=-3, to=3, ylab="f(x)", main="Standard Gaussian")

dnorm(0)
## [1] 0.3989423
dnorm(1)
## [1] 0.2419707
dnorm(3)
## [1] 0.004431848
The N(0,1) cumulative distribution function
curve(pnorm(x), from=-3, to=3, ylab="P(X < x)", main="CDF")

pnorm(0)
## [1] 0.5
pnorm(2)
## [1] 0.9772499
pnorm(-2)
## [1] 0.02275013
pnorm(2) - pnorm(-2)
## [1] 0.9544997
The N(0,1) inverse cumulative distribution function
curve(qnorm(x), from=0, to=1, main="Inverse CDF", xlab="p", ylab="qnorm(p)")

qnorm(0.5)
## [1] 0
qnorm(0.95)
## [1] 1.644854
qnorm(0.975)
## [1] 1.959964
qnorm(0.025)
## [1] -1.959964
Area under the curve

Highlight 95% of the area under the standard Gaussian N(0,1).

L <- qnorm(0.025)
L
## [1] -1.959964
U <- qnorm(0.975)
U
## [1] 1.959964
curve(dnorm(x), from=-4, to=4)
abline(h=0)
abline(v=c(L,U), col="red", lty="dotted")

Simulation

Simulate some standard Gaussian data, plot the histogram, and superimpose the density.

rnorm(10)
##  [1] -0.7977097 -0.6041972  1.7150106 -0.7159483 -0.1332356 -0.9997651  1.8737601 -0.3373884  0.9732703
## [10]  0.9878279
hist(rnorm(100), breaks=11)

y <- rnorm(10000)
hist(y, breaks=seq(-4.5,4.5,by=0.25))

hist(y, breaks=seq(-4.5,4.5,by=0.25), prob=TRUE)
curve(dnorm(x), from=-4.5, to=4.5, add=TRUE, col="red", lwd=2)

The Normal with a mean of 5 and standard deviation of 2
curve(dnorm(x,mean=5,sd=2), from=-2, to=12, ylab="f(x)", main="Normal(mean=5,sd=2)")

curve(pnorm(x,mean=5,sd=2), from=-2, to=12, ylab="P(X < x)", main="CDF")

curve(qnorm(x,mean=5,sd=2), from=0, to=1, main="Inverse CDF", xlab="p", ylab="qnorm(p)")

Examples from class
curve(dnorm(x,mean=69,sd=3), from=57, to=81, ylab="f(x)", main="Normal(mean=69,sd=3)")

What proportion of men are taller than 5’7”?

pnorm(67,mean=69,sd=3,lower=FALSE)
## [1] 0.7475075
pnorm(67,69,3,lower=FALSE)
## [1] 0.7475075
1 - pnorm(67,69,3)
## [1] 0.7475075
pnorm((67-69)/3,lower=FALSE)
## [1] 0.7475075
pnorm(2/3)
## [1] 0.7475075

What proportion of men are between 5’3” and 6’?

pnorm(72,69,3)
## [1] 0.8413447
pnorm(63,69,3)
## [1] 0.02275013
pnorm(72,69,3) - pnorm(63,69,3)
## [1] 0.8185946
pnorm((72-69)/3) - pnorm((63-69)/3)
## [1] 0.8185946
pnorm(1) - pnorm(-2)
## [1] 0.8185946

The Uniform distribution

The Uniform (0,1) density function
curve(dunif(x), from=-1, to=2, ylab="f(x)", main="Uniform(0,1)")

dunif(1)
## [1] 1
dunif(0)
## [1] 1
dunif(0.5)
## [1] 1
dunif(2)
## [1] 0
dunif(-1)
## [1] 0
The Uniform (0,1) cumulative distribution function
curve(punif(x), from=-1, to=2, ylab="P(X < x)", main="CDF")

punif(1)
## [1] 1
punif(0)
## [1] 0
punif(0.5)
## [1] 0.5
The Uniform (0,1) inverse cumulative distribution function
curve(qunif(x), main="Inverse CDF")

qunif(0.5)
## [1] 0.5
qunif(0.95)
## [1] 0.95
qunif(0.05)
## [1] 0.05
The Uniform (0,360)
curve(dunif(x,0,360), from=-40, to=400, ylab="f(x)", main="Uniform(0,360)")

1/360
## [1] 0.002777778
curve(punif(x,0,360), from=-40, to=400, ylab="P(X < x)", main="CDF")

curve(qunif(x,0,360), main="Inverse CDF")

qunif(0.5,0,360)
## [1] 180