```{r setup, include=FALSE} knitr::opts_chunk$set(echo=TRUE, fig.align="center") ``` #### Statistics for Laboratory Scientists ( 140.615 ) ## Goodness of Fit - Advanced ### Example from class: simulations ```{r} o <- c(35,43,22) p <- c(0.25,0.50,0.25) e <- sum(o)*p e ``` Functions to calculate the test statistics. ```{r} f.xsq <- function(o,p){ e <- (p*sum(o)) sum((o-e)^2/e) } f.lrt <- function(o,p){ e <- (p*sum(o)) 2*sum(o*log(o/e),na.rm=TRUE) } xsq <- f.xsq(o,p) xsq lrt <- f.lrt(o,p) lrt ``` The Multinomial distribution. ```{r} set.seed(1) rmultinom(1,100,c(0.25,0.50,0.25)) rmultinom(5,100,c(0.25,0.50,0.25)) ``` Simulate 10,000 test statistics under the null. ```{r} set.seed(45) simdat <- rmultinom(10000,100,c(0.25,0.50,0.25)) dim(simdat) res.xsq <- apply(simdat,2,f.xsq,p=c(0.25,0.50,0.25)) str(res.xsq) res.lrt <- apply(simdat,2,f.lrt,p=c(0.25,0.50,0.25)) str(res.lrt) ``` Plot the histograms of the test statistics, and estimate the p-values. ```{r} hist(res.xsq,col="lightgrey",prob=TRUE,breaks=seq(0,20,1)) abline(v=xsq,col="red",lwd=2) mean(res.xsq>xsq) hist(res.lrt,col="lightgrey",prob=TRUE,breaks=seq(0,20,1)) abline(v=lrt,col="red",lwd=2) mean(res.lrt>lrt) ``` ### Example from class: fit of Poisson counts ```{r} x <- c(2, 2, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 0, 3, 0, 0, 0, 3, 0, 5, 0, 1, 0, 1, 2, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0) ob <- table(x) ob mu <- mean(x) mu ``` Binning 5+ together. ```{r} dpois(0:10,mu) dpois(0:4,mu) ppois(4,mu) ppois(4,mu,lower=FALSE) ppois(4,mu)+ppois(4,mu,lower=FALSE) p <- c(dpois(0:4,mu),ppois(4,mu,lower=FALSE)) p e <- p*length(x) e ``` The parametric hypothesis tests. ```{r} lrt <- f.lrt(ob,p) lrt pchisq(lrt,4,lower=FALSE) xsq <- f.xsq(ob,p) xsq pchisq(xsq,4,lower=FALSE) ``` Inference via simulations. ```{r} n <- length(x) n set.seed(1) tmp <- rpois(n,mu) tmp table(tmp) table(factor(tmp)) table(factor(tmp,levels=0:10)) tmp[tmp>5] <-5 table(factor(tmp,levels=0:5)) nit <- 10000 res.lrt <- rep(NA,nit) res.xsq <- rep(NA,nit) set.seed(1) for(j in 1:nit){ tmp <- rpois(n,mu) tmp[tmp>5] <-5 x <- table(factor(tmp,levels=0:5)) res.lrt[j] <- f.lrt(x,p) res.xsq[j] <- f.xsq(x,p) } ``` Plot the histograms of the test statistics, and estimate the p-values. ```{r} hist(res.lrt,col="lightgrey",prob=TRUE,breaks=101) abline(v=lrt,col="red",lwd=2) mean(res.lrt>lrt) hist(res.xsq,col="lightgrey",prob=TRUE,breaks=101) abline(v=xsq,col="red",lwd=2) mean(res.xsq>xsq) ```