```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE, fig.align="center") ``` #### Statistics for Laboratory Scientists ( 140.615 ) ## Sample Size and Power Calculations - Advanced #### The power curves from class Special case: equal standard deviations and sample sizes. ```{r} par(yaxs="i") delta <- seq(-2.3,2.3,length=501) se5 <- sqrt(2/5) se10 <- sqrt(2/10) se20 <- sqrt(2/20) C <- qnorm(0.975) pow5 <- 1-pnorm(C-delta/se5)+pnorm(-C-delta/se5) pow10 <- 1-pnorm(C-delta/se10)+pnorm(-C-delta/se10) pow20 <- 1-pnorm(C-delta/se20)+pnorm(-C-delta/se20) plot(delta,pow5*100,type="l",xlab="",bty="n",ylab="Power",lwd=2,xaxt="n",ylim=c(0,100)) abline(h=c(0,20,40,60,80,100),lty=3,col="gray40") abline(h=0) lines(delta,pow10*100,lwd=2,col="blue",lty=2) lines(delta,pow20*100,lwd=2,col="red",lty=3) axis(1,-2:2,c(expression(paste(-2*sigma," ")), expression(paste(-sigma," ")), expression(0), expression(sigma), expression(2*sigma))) mtext(expression(Delta),cex=1.3,side=1,line=2.5) legend("bottomright",rev(c("n=5","n=10","n=20")),lty=3:1,lwd=2,col=rev(c("black","blue","red"))) ``` #### Example from hypothesis testing, continued Simulate ten cases, ten controls, under the **alternative**. The difference in population means is 0.5, and the population standard deviations are one in each group. How much power do we have to detect the difference in means? A simulation experiment. ```{r,fig.width=9,fig.height=5} n <- 10 nit <- 10000 p2 <- t2 <- rep(NA,nit) set.seed(1) for(j in 1:nit){ x <- rnorm(n) y <- rnorm(n)-0.5 tmp <- t.test(x,y,var.equal=TRUE) t2[j] <- tmp$stat p2[j] <- tmp$p.value } par(mfrow=c(1,2),las=1) xr <- c(-6,6) cval <- qt(0.975,2*n-2) hist(t2,xlim=xr,ylim=c(0,0.45),prob=TRUE,col=grey(0.95),breaks=21,xlab="test statistic",ylab="",main="") curve(dt(x,2*n-2,ncp=0.5/sqrt(2/n)),col="red",lwd=3,add=TRUE) abline(v=c(-cval,cval),lty="dotted",col="red",lwd=1.5) title("Under the alternative") hist(p2,xlim=c(0,1),ylim=c(0,5),prob=TRUE,col="lightgray",breaks=21,xlab="p-value",ylab="",main="") title(paste0("Power : ",formatC(mean(ifelse(p2<0.05,1,0)),format="f",digits=2))) ``` Estimated power. ```{r} cval <- qt(0.975,2*n-2) mean(ifelse(t2>cval,1,0)) mean(ifelse(t2<(-cval),1,0)) mean(ifelse(t2>cval,1,0))+mean(ifelse(t2<(-cval),1,0)) mean(ifelse(p2<0.05,1,0)) ``` The above is an estimate of the power. What is the true value for the power? ```{r} power.t.test(n=10,delta=0.5)$power power.t.test(n=10,delta=0.5,strict=TRUE)$power ``` How much power would we have if the difference in means was one within-group standard deviation? ```{r} power.t.test(n=10,delta=1,strict=TRUE) ``` How many experimental units would we need per group to have 80% power, assuming the difference in means was one within-group standard deviation? ```{r} power.t.test(power=0.8,delta=1,strict=TRUE) power.t.test(power=0.8,delta=1,strict=TRUE)$n ceiling(power.t.test(power=0.8,delta=1,strict=TRUE)$n) ``` Let's make some power curves for n=10 and n=20 per group. ```{r} nx <- 101 d <- seq(-2,2,length=nx) pwr10 <- rep(NA,nx) pwr20 <- rep(NA,nx) for(k in 1:nx) pwr10[k] <- power.t.test(n=10,delta=d[k],strict=TRUE)$power for(k in 1:nx) pwr20[k] <- power.t.test(n=20,delta=d[k],strict=TRUE)$power plot(range(d),c(0,1),xlab=expression(Delta/sigma),ylab="power",type="n") abline(h=seq(0,1,0.1),lty="dotted",col="lightgray") abline(v=seq(-2,2,0.5),lty="dotted",col="lightgray") lines(d,pwr10,col="green3",lwd=2) lines(d,pwr20,col="blue",lwd=2) ``` #### Power in case population SDs are known Population SDs. ```{r} sigA <- 2 sigB <- 4 ``` Sample sizes. ```{r} n <- 5 m <- 15 ``` Standard deviation of $\bar{X}$ - $\bar{Y}$. ```{r} se <- sqrt(sigA^2/n + sigB^2/m) ``` The difference between the population means. ```{r} delta <- 3 ``` The critical value for $\alpha$ = 0.05. ```{r} C <- qnorm(0.975) ``` The power. ```{r} 1 - pnorm(C - delta/se) + pnorm(-C - delta/se) ``` #### Power in case population SDs are not known but are equal Same parameters as above, but assume we don't know $\sigma_A$ and $\sigma_B$. The critical value for $\alpha$ = 0.05. ```{r} C <- qt(0.975, n+m-2) ``` The power. ```{r} 1 - pt(C, n+m-2, delta/se) + pt(-C, n+m-2, delta/se) ``` #### Computer simulation to estimate power in the case that population SDs are neither known nor equal A function to simulate normal data and get the p-value from the t-test. ```{r} # n,m = sample sizes # sigA,sigB = population SDs # delta = difference between population means sim <- function(n, m, sigA, sigB, delta){ x <- rnorm(n, delta, sigA) y <- rnorm(m, 0, sigB) t.test(x,y)$p.value } ``` For n=5, m=10, $\sigma_A$ = 1, $\sigma_B$ = 2, and $\Delta$ = 0, 0.5, 1.0, ..., 2.5, run 1000 simulations and get the p-value. ```{r} delta <- c(0.0, 0.5, 1.0, 1.5, 2.0, 2.5) p <- matrix(nrow=1000,ncol=length(delta)) for(i in 1:length(delta)){ for(j in 1:nrow(p)){ p[j,i] <- sim(5, 10, 1, 2, delta[i]) } } ``` Estimate the power. ```{r,fig.width=6} pow <- apply(p, 2, function(a) mean(a<0.05)) print(pow) par(las=1) plot(delta,100*pow,xlab=expression(Delta),ylab="power [ % ]",ylim=c(0,100),pch=21,bg="grey",cex=1.25) abline(h=seq(0,100,10),lty="dotted",col="lightgrey") ```