```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE, fig.align="center")
```

#### Statistics for Laboratory Scientists ( 140.615 )

## Random Variables - Advanced                                      

#### Another Binomial example, with plots side by side

```{r}
dbinom(0:7,7,0.2)
dbinom(0:7,7,0.6)

barplot(dbinom(0:7,7,0.2),names=0:7)
barplot(dbinom(0:7,7,0.6),names=0:7)

par(mfrow=c(1,2))
barplot(dbinom(0:7,7,0.2),main="Binomial(n=7,p=0.1)",ylim=c(0,0.45),names=0:7,col="red")
barplot(dbinom(0:7,7,0.6),main="Binomial(n=7,p=0.6)",ylim=c(0,0.45),names=0:7,col="blue")

z <- rbind(dbinom(0:7,7,0.2),dbinom(0:7,7,0.6))
z

par(mfrow=c(1,1))
barplot(z,beside=T,names=0:7,col=c("red","blue"),space=c(0,0.5),ylim=c(0,0.45))
legend("topright",legend=c("Binomial(n=7,p=0.2)","Binomial(n=7,p=0.6)"),
       pch=22,pt.bg=c("red","blue"),pt.cex=2,bty="n")
```

Remember: you can look at the help file using the '?' character. Type '?legend' to see all options. Type 'example(legend)' to see examples.

#### The Poisson approximation for the Binomial

```{r}
n <- 50000
p <- 1/100000
lambda <- n*p
lambda

dbinom(0:5,n,p)
dpois(0:5,lambda)

z <- rbind(dbinom(0:5,n,p),dpois(0:5,lambda))
z

barplot(z,beside=T,names=0:5,col=c("red","blue"),space=c(0,0.5),ylim=c(0,0.65))
legend(4,0.65,legend=c("Binomial","Poisson"),pch=22,pt.bg=c("red","blue"),pt.cex=2,bty="n")
```

#### Non-Binomial vs Binomial

This is the example from class, about Mendel's pea experiments.

We will be using the for() loop, and you will also need the sample() and ifelse() functions.

Here is how for() loops work.

```{r}
a <- 1:10
b <- 2:11
d <- rep(NA,10)
d

# multiply two vectors
for(i in 1:10){
  d[i]<-a[i]*b[i]
}
d

# compute the inner product
s<-0
for(i in 1:10){
  s<-s+d[i]
}
s
```

Note that for loops can often be avoided by using vectorized operations. 

```{r}
# multiply two vectors
d <- a*b
d

# compute the inner product
s <- sum(a*b)
s
```

Note that for() can iterate over any sequence:

```{r}
for(j in c(3,1,4,1,5,9,2,7)) print(j)
for(j in LETTERS[1:10]) print(j)
```

This is how the sample() and ifelse() function work.

```{r}
?sample
sample(1:10)
sample(1:10,replace=TRUE)
sample(1:10,repl=T)

f1 <- c("A","a")
f2 <- sample(f1,2,repl=T)
f2

?ifelse
ifelse(3+5<9,"Yes","No")
ifelse(3+8<9,"Yes","No")

f2
ifelse("A"%in%f2,1,0)
```

Consider Mendel's pea experiments.

Purple or white flowers, purple dominant to white.

F0 genotypes are PP and ww, F1 genotypes are Pw.

Pick a random F2.

Self it and acquire 10 progeny.

The number of progeny with purple flowers is not binomial.

```{r}
nit <- 10000
npurple <- rep(0,nit)
f1 <- c("A","a")
for(k in 1:nit){
  f2 <- sample(f1,2,repl=T)
  purple <- rep(0,10)
  for(j in 1:10){
    f3 <- sample(f2,2,repl=T)
    purple[j] <- ifelse("A"%in%f3,1,0)
  }
  npurple[k] <- sum(purple)
}

table(npurple)
table(factor(npurple,levels=0:10))

par(mfrow=c(1,1))
barplot(table(factor(npurple,levels=0:10)))
```

Pick 10 random F2's.

Self each and take a child from each.

The number of progeny with purple flowers is binomial.

```{r}
nit <- 10000
npurple2 <- rep(0,nit)
f1 <- c("A","a")
for(k in 1:nit){
  purple <- rep(0,10)
  for(j in 1:10){
    f2 <- sample(f1,2,repl=T)
    f3 <- sample(f2,2,repl=T)
    purple[j] <- ifelse("A"%in%f3,1,0)
  }
  npurple2[k] <- sum(purple)
}

table(npurple2)
table(factor(npurple2,levels=0:10))
barplot(table(factor(npurple2,levels=0:10)))

par(mfrow=c(1,2))
barplot(table(factor(npurple,levels=0:10)),ylim=c(0,3000),main="Non - Binomial")
barplot(table(factor(npurple2,levels=0:10)),ylim=c(0,3000),main="Binomial")
```