c.rvd.knit

Random Variables

The Binomial distribution

The example from class: the number of red balls in 9 draws with replacement from an urn for which the proportion of red balls is 20%.

The Binomial coefficient

One red ball.

choose(9,1)

## [1] 9

Two red balls.

choose(9,2)

## [1] 36

factorial(9)

## [1] 362880

factorial(2)

## [1] 2

factorial(9-2)

## [1] 5040

factorial(9)/(factorial(2)*factorial(9-2))

## [1] 36

All possibilities.

choose(9,0:9)

##  [1]   1   9  36  84 126 126  84  36   9   1

The outcome probabilities

No red balls.

(1-0.2)^9

## [1] 0.1342177

choose(9,0)*(0.2^0)*(0.8^9)

## [1] 0.1342177

dbinom(0,9,0.2)

## [1] 0.1342177

All red balls.

0.2^9

## [1] 5.12e-07

choose(9,9)*(0.2^9)*(0.8^0)

## [1] 5.12e-07

dbinom(9,9,0.2)

## [1] 5.12e-07

One red ball.

choose(9,1)*(0.2^1)*(0.8^8)

## [1] 0.3019899

dbinom(1,9,0.2)

## [1] 0.3019899

All possibilities.

dbinom(0:9,9,0.2)

##  [1] 0.134217728 0.301989888 0.301989888 0.176160768 0.066060288 0.016515072 0.002752512 0.000294912
##  [9] 0.000018432 0.000000512

barplot(dbinom(0:9,9,0.2),names=0:9)

The cumulative distribution function

dbinom(0:4,9,0.2)

## [1] 0.13421773 0.30198989 0.30198989 0.17616077 0.06606029

sum(dbinom(0:4,9,0.2))

## [1] 0.9804186

pbinom(4,9,0.2)

## [1] 0.9804186

1- pbinom(4,9,0.2)

## [1] 0.01958144

pbinom(4,9,0.2,lower=FALSE)

## [1] 0.01958144

Simulate Binomial outcomes

set.seed(4)
rbinom(1,9,0.2)

## [1] 2

rbinom(10,9,0.2)

##  [1] 0 1 1 3 1 2 3 4 0 3

x <- rbinom(100,9,0.2)
table(x)

## x
##  0  1  2  3  4  5 
## 11 24 33 21  9  2

prop.table(table(x))

## x
##    0    1    2    3    4    5 
## 0.11 0.24 0.33 0.21 0.09 0.02

table(x)/length(x)

## x
##    0    1    2    3    4    5 
## 0.11 0.24 0.33 0.21 0.09 0.02

round(dbinom(0:9,9,0.2),2)

##  [1] 0.13 0.30 0.30 0.18 0.07 0.02 0.00 0.00 0.00 0.00

Poisson distribution

The probability function

dpois(3,5)

## [1] 0.1403739

exp(-5)*(5^3)/factorial(3)

## [1] 0.1403739

dpois(0:15,5)

##  [1] 0.0067379470 0.0336897350 0.0842243375 0.1403738958 0.1754673698 0.1754673698 0.1462228081 0.1044448630
##  [9] 0.0652780393 0.0362655774 0.0181327887 0.0082421767 0.0034342403 0.0013208616 0.0004717363 0.0001572454

barplot(dpois(0:15,5),names=0:15)

The cumulative distribution function

dpois(0:7,5)

## [1] 0.006737947 0.033689735 0.084224337 0.140373896 0.175467370 0.175467370 0.146222808 0.104444863

sum(dpois(0:7,5))

## [1] 0.8666283

ppois(7,5)

## [1] 0.8666283

1 - ppois(7,5)

## [1] 0.1333717

ppois(7,5,lower=FALSE)

## [1] 0.1333717

Simulate Poisson outcomes

rpois(1,5)

## [1] 4

rpois(20,5)

##  [1]  4  5  6  5  4  7  8  2  3  4  7  3  2  6 10  3  5  4  4  3

x <- rpois(100,5)
table(x)

## x
##  0  1  2  3  4  5  6  7  8  9 10 11 
##  1  3  8 14 17 18 16 10  5  5  2  1

prop.table(table(x))

## x
##    0    1    2    3    4    5    6    7    8    9   10   11 
## 0.01 0.03 0.08 0.14 0.17 0.18 0.16 0.10 0.05 0.05 0.02 0.01

round(dpois(0:10,5),2)

##  [1] 0.01 0.03 0.08 0.14 0.18 0.18 0.15 0.10 0.07 0.04 0.02

Poisson approximation for the Binomial

n <- 50000
p <- 1/100000
lambda <- n*p
lambda

## [1] 0.5

dbinom(0:5,n,p)

## [1] 0.6065291434 0.3032676044 0.0758161429 0.0126356447 0.0015793766 0.0001579266

dpois(0:5,lambda)

## [1] 0.6065306597 0.3032653299 0.0758163325 0.0126360554 0.0015795069 0.0001579507

dbinom(0:5,n,p) - dpois(0:5,lambda)

## [1] -1.516335e-06  2.274509e-06 -1.895494e-07 -4.106761e-07 -1.303081e-07 -2.408655e-08

The Gaussian distribution

The N(0,1) density function

curve(dnorm(x), from=-3, to=3)

curve(dnorm(x), from=-3, to=3, ylab="f(x)", main="Standard Gaussian")

dnorm(0)

## [1] 0.3989423

dnorm(1)

## [1] 0.2419707

dnorm(3)

## [1] 0.004431848

The N(0,1) cumulative distribution function

curve(pnorm(x), from=-3, to=3, ylab="P(X < x)", main="CDF")

pnorm(0)

## [1] 0.5

pnorm(2)

## [1] 0.9772499

pnorm(-2)

## [1] 0.02275013

pnorm(2) - pnorm(-2)

## [1] 0.9544997

The N(0,1) inverse cumulative distribution function

curve(qnorm(x), from=0, to=1, main="Inverse CDF", xlab="p", ylab="qnorm(p)")

qnorm(0.5)

## [1] 0

qnorm(0.95)

## [1] 1.644854

qnorm(0.975)

## [1] 1.959964

qnorm(0.025)

## [1] -1.959964

Area under the curve

Highlight 95% of the area under the standard Gaussian N(0,1).

L <- qnorm(0.025)
L

## [1] -1.959964

U <- qnorm(0.975)
U

## [1] 1.959964

curve(dnorm(x), from=-4, to=4)
abline(h=0)
abline(v=c(L,U), col="red", lty="dotted")

Simulation

Simulate some standard Gaussian data, plot the histogram, and superimpose the density.

rnorm(10)

##  [1] -0.7977097 -0.6041972  1.7150106 -0.7159483 -0.1332356 -0.9997651  1.8737601 -0.3373884  0.9732703
## [10]  0.9878279

hist(rnorm(100), breaks=11)

y <- rnorm(10000)
hist(y, breaks=seq(-4.5,4.5,by=0.25))

hist(y, breaks=seq(-4.5,4.5,by=0.25), prob=TRUE)
curve(dnorm(x), from=-4.5, to=4.5, add=TRUE, col="red", lwd=2)

The Normal with a mean of 5 and standard deviation of 2

curve(dnorm(x,mean=5,sd=2), from=-2, to=12, ylab="f(x)", main="Normal(mean=5,sd=2)")

curve(pnorm(x,mean=5,sd=2), from=-2, to=12, ylab="P(X < x)", main="CDF")

curve(qnorm(x,mean=5,sd=2), from=0, to=1, main="Inverse CDF", xlab="p", ylab="qnorm(p)")

Examples from class

curve(dnorm(x,mean=69,sd=3), from=57, to=81, ylab="f(x)", main="Normal(mean=69,sd=3)")

What proportion of men are taller than 5’7”?

pnorm(67,mean=69,sd=3,lower=FALSE)

## [1] 0.7475075

pnorm(67,69,3,lower=FALSE)

## [1] 0.7475075

1 - pnorm(67,69,3)

## [1] 0.7475075

pnorm((67-69)/3,lower=FALSE)

## [1] 0.7475075

pnorm(2/3)

## [1] 0.7475075

What proportion of men are between 5’3” and 6’?

pnorm(72,69,3)

## [1] 0.8413447

pnorm(63,69,3)

## [1] 0.02275013

pnorm(72,69,3) - pnorm(63,69,3)

## [1] 0.8185946

pnorm((72-69)/3) - pnorm((63-69)/3)

## [1] 0.8185946

pnorm(1) - pnorm(-2)

## [1] 0.8185946

The Uniform distribution

The Uniform (0,1) density function

curve(dunif(x), from=-1, to=2, ylab="f(x)", main="Uniform(0,1)")

dunif(1)

## [1] 1

dunif(0)

## [1] 1

dunif(0.5)

## [1] 1

dunif(2)

## [1] 0

dunif(-1)

## [1] 0

The Uniform (0,1) cumulative distribution function

curve(punif(x), from=-1, to=2, ylab="P(X < x)", main="CDF")

punif(1)

## [1] 1

punif(0)

## [1] 0

punif(0.5)

## [1] 0.5

The Uniform (0,1) inverse cumulative distribution function

curve(qunif(x), main="Inverse CDF")

qunif(0.5)

## [1] 0.5

qunif(0.95)

## [1] 0.95

qunif(0.05)

## [1] 0.05

The Uniform (0,360)

curve(dunif(x,0,360), from=-40, to=400, ylab="f(x)", main="Uniform(0,360)")

1/360

## [1] 0.002777778

curve(punif(x,0,360), from=-40, to=400, ylab="P(X < x)", main="CDF")

curve(qunif(x,0,360), main="Inverse CDF")

qunif(0.5,0,360)

## [1] 180

Statistics for Laboratory Scientists ( 140.615 )

Random Variables

The Binomial distribution

The Binomial coefficient

The outcome probabilities

The cumulative distribution function

Simulate Binomial outcomes

Poisson distribution

The probability function

The cumulative distribution function

Simulate Poisson outcomes

Poisson approximation for the Binomial

The Gaussian distribution

The N(0,1) density function

The N(0,1) cumulative distribution function

The N(0,1) inverse cumulative distribution function

Area under the curve

Simulation

The Normal with a mean of 5 and standard deviation of 2

Examples from class

The Uniform distribution

The Uniform (0,1) density function

The Uniform (0,1) cumulative distribution function

The Uniform (0,1) inverse cumulative distribution function

The Uniform (0,360)