```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE, fig.align="center")
```

#### Statistics for Laboratory Scientists ( 140.615 )

## Sample Size and Power Calculations - Advanced

#### The power curves from class

Special case: equal standard deviations and sample sizes.

```{r}
par(yaxs="i")
delta <- seq(-2.3,2.3,length=501)
se5 <- sqrt(2/5)
se10 <- sqrt(2/10)
se20 <- sqrt(2/20)
C <- qnorm(0.975)
pow5 <- 1-pnorm(C-delta/se5)+pnorm(-C-delta/se5)
pow10 <- 1-pnorm(C-delta/se10)+pnorm(-C-delta/se10)
pow20 <- 1-pnorm(C-delta/se20)+pnorm(-C-delta/se20)
plot(delta,pow5*100,type="l",xlab="",bty="n",ylab="Power",lwd=2,xaxt="n",ylim=c(0,100))
abline(h=c(0,20,40,60,80,100),lty=3,col="gray40")
abline(h=0)
lines(delta,pow10*100,lwd=2,col="blue",lty=2)
lines(delta,pow20*100,lwd=2,col="red",lty=3)
axis(1,-2:2,c(expression(paste(-2*sigma,"  ")),
              expression(paste(-sigma,"  ")),
              expression(0),
              expression(sigma),
              expression(2*sigma)))
mtext(expression(Delta),cex=1.3,side=1,line=2.5)
legend("bottomright",rev(c("n=5","n=10","n=20")),lty=3:1,lwd=2,col=rev(c("black","blue","red")))
```

#### Example from hypothesis testing, continued

Simulate ten cases, ten controls, under the **alternative**. The difference in population means is 0.5, and the population standard deviations are one in each group. How much power do we have to detect the difference in means?

A simulation experiment.

```{r,fig.width=9,fig.height=5}
n <- 10
nit <- 10000
p2 <- t2 <- rep(NA,nit)
set.seed(1)
for(j in 1:nit){
  x <- rnorm(n)  
  y <- rnorm(n)-0.5
  tmp <- t.test(x,y,var.equal=TRUE)
  t2[j] <- tmp$stat
  p2[j] <- tmp$p.value
}

par(mfrow=c(1,2),las=1)
xr <- c(-6,6)
cval <- qt(0.975,2*n-2)
hist(t2,xlim=xr,ylim=c(0,0.45),prob=TRUE,col=grey(0.95),breaks=21,xlab="test statistic",ylab="",main="")
curve(dt(x,2*n-2,ncp=0.5/sqrt(2/n)),col="red",lwd=3,add=TRUE)
abline(v=c(-cval,cval),lty="dotted",col="red",lwd=1.5)
title("Under the alternative") 
hist(p2,xlim=c(0,1),ylim=c(0,5),prob=TRUE,col="lightgray",breaks=21,xlab="p-value",ylab="",main="")
title(paste0("Power : ",formatC(mean(ifelse(p2<0.05,1,0)),format="f",digits=2)))
```

Estimated power.

```{r}
cval <- qt(0.975,2*n-2)
mean(ifelse(t2>cval,1,0))
mean(ifelse(t2<(-cval),1,0))
mean(ifelse(t2>cval,1,0))+mean(ifelse(t2<(-cval),1,0))
mean(ifelse(p2<0.05,1,0))
```

The above is an estimate of the power. What is the true value for the power?

```{r}
power.t.test(n=10,delta=0.5)$power
power.t.test(n=10,delta=0.5,strict=TRUE)$power
```

How much power would we have if the difference in means was one within-group standard deviation?

```{r}
power.t.test(n=10,delta=1,strict=TRUE)
```

How many experimental units would we need per group to have 80% power, assuming the difference in means was one within-group standard deviation?

```{r}
power.t.test(power=0.8,delta=1,strict=TRUE)
power.t.test(power=0.8,delta=1,strict=TRUE)$n
ceiling(power.t.test(power=0.8,delta=1,strict=TRUE)$n)
```

Let's make some power curves for n=10 and n=20 per group.

```{r}
nx <- 101
d <- seq(-2,2,length=nx)
pwr10 <- rep(NA,nx)
pwr20 <- rep(NA,nx)
for(k in 1:nx) pwr10[k] <- power.t.test(n=10,delta=d[k],strict=TRUE)$power
for(k in 1:nx) pwr20[k] <- power.t.test(n=20,delta=d[k],strict=TRUE)$power
plot(range(d),c(0,1),xlab=expression(Delta/sigma),ylab="power",type="n")
abline(h=seq(0,1,0.1),lty="dotted",col="lightgray")
abline(v=seq(-2,2,0.5),lty="dotted",col="lightgray")
lines(d,pwr10,col="green3",lwd=2)
lines(d,pwr20,col="blue",lwd=2)
```

#### Power in case population SDs are known

Population SDs.

```{r}
sigA <- 2
sigB <- 4
```

Sample sizes.

```{r}
n <- 5
m <- 15
```

Standard deviation of $\bar{X}$ - $\bar{Y}$.

```{r}
se <- sqrt(sigA^2/n + sigB^2/m)
```

The difference between the population means.

```{r}
delta <- 3
```

The critical value for $\alpha$ = 0.05.

```{r}
C <- qnorm(0.975)
```

The power.

```{r}
1 - pnorm(C - delta/se) + pnorm(-C - delta/se)
```

#### Power in case population SDs are not known but are equal

Same parameters as above, but assume we don't know $\sigma_A$ and $\sigma_B$.

The critical value for $\alpha$ = 0.05.

```{r}
C <- qt(0.975, n+m-2)
```

The power.

```{r}
1 - pt(C, n+m-2, delta/se) + pt(-C, n+m-2, delta/se)
```

#### Computer simulation to estimate power in the case that population SDs are neither known nor equal

A function to simulate normal data and get the p-value from the t-test.

```{r}
# n,m = sample sizes
# sigA,sigB = population SDs
# delta = difference between population means

sim <- function(n, m, sigA, sigB, delta){
  x <- rnorm(n, delta, sigA)
  y <- rnorm(m, 0, sigB)
  t.test(x,y)$p.value
}
```

For n=5, m=10, $\sigma_A$ = 1, $\sigma_B$ = 2, and $\Delta$ = 0, 0.5, 1.0, ..., 2.5, run 1000 simulations and get the p-value.

```{r}
delta <- c(0.0, 0.5, 1.0, 1.5, 2.0, 2.5)
p <- matrix(nrow=1000,ncol=length(delta))
for(i in 1:length(delta)){
  for(j in 1:nrow(p)){
    p[j,i] <- sim(5, 10, 1, 2, delta[i])
  }
}
```

Estimate the power.

```{r,fig.width=6}
pow <- apply(p, 2, function(a) mean(a<0.05))
print(pow)
par(las=1)
plot(delta,100*pow,xlab=expression(Delta),ylab="power [ % ]",ylim=c(0,100),pch=21,bg="grey",cex=1.25)
abline(h=seq(0,100,10),lty="dotted",col="lightgrey")
```