Statistics for Laboratory Scientists ( 140.615 )

Random Variables - Advanced

Another Binomial example, with plots side by side

dbinom(0:7,7,0.2)
## [1] 0.2097152 0.3670016 0.2752512 0.1146880 0.0286720 0.0043008 0.0003584 0.0000128
dbinom(0:7,7,0.6)
## [1] 0.0016384 0.0172032 0.0774144 0.1935360 0.2903040 0.2612736 0.1306368 0.0279936
barplot(dbinom(0:7,7,0.2),names=0:7)

barplot(dbinom(0:7,7,0.6),names=0:7)

par(mfrow=c(1,2))
barplot(dbinom(0:7,7,0.2),main="Binomial(n=7,p=0.1)",ylim=c(0,0.45),names=0:7,col="red")
barplot(dbinom(0:7,7,0.6),main="Binomial(n=7,p=0.6)",ylim=c(0,0.45),names=0:7,col="blue")

z <- rbind(dbinom(0:7,7,0.2),dbinom(0:7,7,0.6))
z
##           [,1]      [,2]      [,3]     [,4]     [,5]      [,6]      [,7]      [,8]
## [1,] 0.2097152 0.3670016 0.2752512 0.114688 0.028672 0.0043008 0.0003584 0.0000128
## [2,] 0.0016384 0.0172032 0.0774144 0.193536 0.290304 0.2612736 0.1306368 0.0279936
par(mfrow=c(1,1))
barplot(z,beside=T,names=0:7,col=c("red","blue"),space=c(0,0.5),ylim=c(0,0.45))
legend("topright",legend=c("Binomial(n=7,p=0.2)","Binomial(n=7,p=0.6)"),
       pch=22,pt.bg=c("red","blue"),pt.cex=2,bty="n")

Remember: you can look at the help file using the ‘?’ character. Type ‘?legend’ to see all options. Type ‘example(legend)’ to see examples.

The Poisson approximation for the Binomial

n <- 50000
p <- 1/100000
lambda <- n*p
lambda
## [1] 0.5
dbinom(0:5,n,p)
## [1] 0.6065291434 0.3032676044 0.0758161429 0.0126356447 0.0015793766 0.0001579266
dpois(0:5,lambda)
## [1] 0.6065306597 0.3032653299 0.0758163325 0.0126360554 0.0015795069 0.0001579507
z <- rbind(dbinom(0:5,n,p),dpois(0:5,lambda))
z
##           [,1]      [,2]       [,3]       [,4]        [,5]         [,6]
## [1,] 0.6065291 0.3032676 0.07581614 0.01263564 0.001579377 0.0001579266
## [2,] 0.6065307 0.3032653 0.07581633 0.01263606 0.001579507 0.0001579507
barplot(z,beside=T,names=0:5,col=c("red","blue"),space=c(0,0.5),ylim=c(0,0.65))
legend(4,0.65,legend=c("Binomial","Poisson"),pch=22,pt.bg=c("red","blue"),pt.cex=2,bty="n")

Non-Binomial vs Binomial

This is the example from class, about Mendel’s pea experiments.

We will be using the for() loop, and you will also need the sample() and ifelse() functions.

Here is how for() loops work.

a <- 1:10
b <- 2:11
d <- rep(NA,10)
d
##  [1] NA NA NA NA NA NA NA NA NA NA
# multiply two vectors
for(i in 1:10){
  d[i]<-a[i]*b[i]
}
d
##  [1]   2   6  12  20  30  42  56  72  90 110
# compute the inner product
s<-0
for(i in 1:10){
  s<-s+d[i]
}
s
## [1] 440

Note that for loops can often be avoided by using vectorized operations.

# multiply two vectors
d <- a*b
d
##  [1]   2   6  12  20  30  42  56  72  90 110
# compute the inner product
s <- sum(a*b)
s
## [1] 440

Note that for() can iterate over any sequence:

for(j in c(3,1,4,1,5,9,2,7)) print(j)
## [1] 3
## [1] 1
## [1] 4
## [1] 1
## [1] 5
## [1] 9
## [1] 2
## [1] 7
for(j in LETTERS[1:10]) print(j)
## [1] "A"
## [1] "B"
## [1] "C"
## [1] "D"
## [1] "E"
## [1] "F"
## [1] "G"
## [1] "H"
## [1] "I"
## [1] "J"

This is how the sample() and ifelse() function work.

?sample
sample(1:10)
##  [1]  1  4  2  5  8  7  3  9 10  6
sample(1:10,replace=TRUE)
##  [1] 5 5 3 9 7 1 4 5 6 1
sample(1:10,repl=T)
##  [1] 3 5 7 8 7 7 6 5 4 8
f1 <- c("A","a")
f2 <- sample(f1,2,repl=T)
f2
## [1] "a" "A"
?ifelse
ifelse(3+5<9,"Yes","No")
## [1] "Yes"
ifelse(3+8<9,"Yes","No")
## [1] "No"
f2
## [1] "a" "A"
ifelse("A"%in%f2,1,0)
## [1] 1

Consider Mendel’s pea experiments.

Purple or white flowers, purple dominant to white.

F0 genotypes are PP and ww, F1 genotypes are Pw.

Pick a random F2.

Self it and acquire 10 progeny.

The number of progeny with purple flowers is not binomial.

nit <- 10000
npurple <- rep(0,nit)
f1 <- c("A","a")
for(k in 1:nit){
  f2 <- sample(f1,2,repl=T)
  purple <- rep(0,10)
  for(j in 1:10){
    f3 <- sample(f2,2,repl=T)
    purple[j] <- ifelse("A"%in%f3,1,0)
  }
  npurple[k] <- sum(purple)
}

table(npurple)
## npurple
##    0    2    3    4    5    6    7    8    9   10 
## 2496    2   17   90  273  715 1246 1418  991 2752
table(factor(npurple,levels=0:10))
## 
##    0    1    2    3    4    5    6    7    8    9   10 
## 2496    0    2   17   90  273  715 1246 1418  991 2752
par(mfrow=c(1,1))
barplot(table(factor(npurple,levels=0:10)))

Pick 10 random F2’s.

Self each and take a child from each.

The number of progeny with purple flowers is binomial.

nit <- 10000
npurple2 <- rep(0,nit)
f1 <- c("A","a")
for(k in 1:nit){
  purple <- rep(0,10)
  for(j in 1:10){
    f2 <- sample(f1,2,repl=T)
    f3 <- sample(f2,2,repl=T)
    purple[j] <- ifelse("A"%in%f3,1,0)
  }
  npurple2[k] <- sum(purple)
}

table(npurple2)
## npurple2
##    1    2    3    4    5    6    7    8    9   10 
##   10   63  296  868 1714 2526 2398 1463  580   82
table(factor(npurple2,levels=0:10))
## 
##    0    1    2    3    4    5    6    7    8    9   10 
##    0   10   63  296  868 1714 2526 2398 1463  580   82
barplot(table(factor(npurple2,levels=0:10)))

par(mfrow=c(1,2))
barplot(table(factor(npurple,levels=0:10)),ylim=c(0,3000),main="Non - Binomial")
barplot(table(factor(npurple2,levels=0:10)),ylim=c(0,3000),main="Binomial")