First some utility functions.
# returns binary representation of 1:(2^n)
binary.v <- function(n){
x <- 1:(2^n)
mx <- max(x)
digits <- floor(log2(mx))
ans <- 0:(digits-1); lx <- length(x)
x <- matrix(rep(x,rep(digits, lx)),ncol=lx)
x <- (x %/% 2^ans) %% 2
}
# function to perform a paired permutation test.
paired.perm.test <- function(d,n.perm=1000){
n <- length(d)
if(is.null(n.perm)) { # do exact test
ind <- binary.v(n)
allt <- apply(ind,2,function(x,y)
t.test((2*x-1)*y)$statistic,d)
}
else { # do n.perm samples
allt <- 1:n.perm
for(i in 1:n.perm)
allt[i] <- t.test(d*sample(c(-1,1),n,repl=TRUE))$statistic
}
allt
}
The paired data.
x <- c(117.3, 100.1, 94.5, 135.5, 92.9, 118.9, 144.8, 103.9, 103.8, 153.6, 163.1)
y <- c(145.9, 94.8, 108.0, 122.6, 130.2, 143.9, 149.9, 138.5, 91.7, 162.6, 202.5)
d <- y-x
d
## [1] 28.6 -5.3 13.5 -12.9 37.3 25.0 5.1 34.6 -12.1 9.0 39.4
Permutation test, all possible permutations.
set.seed(1)
tall <- paired.perm.test(d,n.perm=NULL)
# p-value
tobs <- t.test(d)$statistic
mean(abs(tall) >= abs(tobs))
## [1] 0.03710938
Permutation test, 1000 permutations.
set.seed(2)
tsamp <- paired.perm.test(d,n.perm=1000)
# p-value
mean(abs(tsamp) >= abs(tobs))
## [1] 0.043
A utility function.
# function to perform permutation test
perm.test <- function(x, y, n.perm=1000, var.equal=TRUE){
# number of data points
kx <- length(x)
ky <- length(y)
n <- kx + ky
# Data re-compiled
X <- c(x,y)
z <- rep(1:0,c(kx,ky))
if(is.null(n.perm)) { # do exact permutation test
o <- binary.v(n) # indicator of all possible samples
o <- o[,apply(o,2,sum)==kx]
nc <- choose(n,kx)
allt <- 1:nc
for(i in 1:nc) {
xn <- X[o[,i]==1]
yn <- X[o[,i]==0]
allt[i] <- t.test(xn,yn,var.equal=var.equal)$statistic
}
}
else { # do 1000 permutations of the data
allt <- 1:n.perm
for(i in 1:n.perm) {
z <- sample(z)
xn <- X[z==1]
yn <- X[z==0]
allt[i] <- t.test(xn,yn,var.equal=var.equal)$statistic
}
}
allt
}
The two-sample data.
x <- c(43.3, 57.1, 35.0, 50.0, 38.2, 61.2)
y <- c(51.9, 95.1, 90.0, 49.7, 101.5, 74.1, 84.5, 46.8, 75.1)
Permutation test, all possible permutations.
set.seed(3)
tall <- perm.test(x,y,n.perm=NULL)
# p-value
tobs <- t.test(x,y,var.equal=TRUE)$statistic
mean(abs(tall) >= abs(tobs))
## [1] 0.01498501
Permutation test, 1000 permutations.
set.seed(4)
tsamp <- perm.test(x,y,n.perm=1000)
# p-value
mean(abs(tsamp) >= abs(tobs))
## [1] 0.024
xA <- c(27.0,54.6,33.5,27.6,46.0,22.2,44.2,17.3,15.9,32.8)
xB <- c(17.4,20.5,13.9,14.8,27.9,10.6,33.7,15.4,25.0,24.1)
stripchart(list(xA,xB),vertical=T,pch=21,xlim=c(0.5,2.5))
segments(0.9,mean(xA),1.1,mean(xA),lwd=2,col="red")
segments(1.9,mean(xB),2.1,mean(xB),lwd=2,col="blue")
abline(h=mean(c(xA,xB)),lty=2)
xAnew <- xA
xAnew[2] <- 99.9
stripchart(list(xAnew,xB),vertical=T,pch=21,xlim=c(0.5,2.5))
segments(0.9,mean(xAnew),1.1,mean(xAnew),lwd=2,col="red")
segments(1.9,mean(xB),2.1,mean(xB),lwd=2,col="blue")
abline(h=mean(c(xAnew,xB)),lty=2)
For the permutation test, we use the test statistic from the two sample t-test, but we do not use the parametric null distribution.
t.test(xA,xB)$statistic
## t
## 2.533564
Let’s write a function first, to make things easier. Note that the input are two vectors, generically called xa and xb.
my.perm <- function(xa,xb){
# combine the two vectors
x <- c(xa,xb)
# calculate the total length of observations
n <- length(x)
# calculate the length of the first vector
na <- length(xa)
# shuffle the order of the observations across groups
z <- x[sample(1:n)]
# return the t test statistic from the shuffled data
return(t.test(z[1:na],z[-(1:na)])$stat)
}
Example:
set.seed(1)
# let's try one with our xA and xB
my.perm(xA,xB)
## t
## 1.12726
# and onther one
my.perm(xA,xB)
## t
## 1.238832
# and onther one
my.perm(xA,xB)
## t
## 0.06278934
The permutation test.
# let's do 10000 shuffles
nit <- 10000
# create a vector of length nit (10000)
myres <- rep(NA,nit)
# fill in the slots of that vector with the permutation test t statistics
for(j in 1:nit) myres[j] <- my.perm(xA,xB)
Let’s plot the distribution of the t statistics, add the parametric t, and show the observed test statistic.
hist(myres,breaks=51,col="lightgrey",probability=TRUE)
tstat <- t.test(xA,xB)$stat
tstat
## t
## 2.533564
df <- t.test(xA,xB)$parameter
df
## df
## 14.22405
curve(dt(x,df),lwd=2,col="blue",add=TRUE)
abline(v=tstat,col="red")
Calculate the permutation p-value.
mean(abs(myres)>abs(tstat))
## [1] 0.021
Let’s do the same for the data with the outlier.
myres <- rep(NA,nit)
for(j in 1:nit) myres[j] <- my.perm(xAnew,xB)
hist(myres,breaks=51,col="lightgrey",probability=TRUE)
curve(dt(x,df),lwd=2,col="blue",add=TRUE)
abline(v=tstat,col="red")
mean(abs(myres)>abs(tstat))
## [1] 0.0011