library(MASS) library(RColorBrewer) library(class) mycols <- brewer.pal(8, "Dark2")[c(3,2)] sink("classification-out.txt") ######### # Plots showing decision boundaries s <- sqrt(1/5) set.seed(30) makeX <- function(M, n=100, sigma=diag(2)*s) { z <- sample(1:nrow(M), n, replace=TRUE) m <- M[z,] return(t(apply(m,1,function(mu) mvrnorm(1,mu,sigma)))) } M0 <- mvrnorm(10, c(1,0), diag(2)) # generate 10 means x0 <- makeX(M0) ## the final values for y0=blue M1 <- mvrnorm(10, c(0,1), diag(2)) x1 <- makeX(M1) x <- rbind(x0, x1) y <- c(rep(0,100), rep(1,100)) cols <- mycols[y+1] GS <- 75 # put data in a Gs x Gs grid XLIM <- range(x[,1]) tmpx <- seq(XLIM[1], XLIM[2], len=GS) YLIM <- range(x[,2]) tmpy <- seq(YLIM[1], YLIM[2], len=GS) newx <- expand.grid(tmpx, tmpy) # linear regression # these are the covariates X1 <- x[,1] X2 <- x[,2] linear.fit <- lm(y~X1+X2) m <- -linear.fit$coef[2]/linear.fit$coef[3] b <- (0.5 - linear.fit$coef[1])/linear.fit$coef[3] # prediction on grid yhat <- predict(linear.fit, newdata=data.frame(X1=newx[,1],X2=newx[,2])); yhat <- as.numeric(yhat>0.5) colshat <- mycols[as.numeric(yhat>0.5)+1] #### MAKE PNG HERE png("Plots/classification-plots-01.png", width=864, height=864, pointsize=12) par(mfrow=c(2,2), mgp=c(1.25,.5,0), mar=c(2.25, 2.1, 1,1)) plot(x,col=cols,xlab="X1",ylab="X2",xlim=XLIM,ylim=YLIM,type="n") abline(b,m) points(newx,col=colshat,pch=".") points(x,col=cols) title("Linear regression") # KNN (1) yhat <- knn(x, newx, y, k=1) colshat <- mycols[as.numeric(yhat)] plot(x, xlab="X1", ylab="X2", xlim=XLIM, ylim=YLIM, type="n") points(newx, col=colshat, pch=".") contour(tmpx, tmpy, matrix(as.numeric(yhat),GS,GS), levels=c(1,2), add=TRUE, drawlabels=FALSE) points(x, col=cols) title("KNN (1)") # KNN (15) yhat <- knn(x, newx, y, k=15) colshat <- mycols[as.numeric(yhat)] plot(x, xlab="X1", ylab="X2", xlim=XLIM, ylim=YLIM, type="n") points(newx, col=colshat, pch=".") contour(tmpx, tmpy, matrix(as.numeric(yhat),GS,GS), levels=c(1,2), add=TRUE, drawlabels=FALSE) points(x, col=cols) title("KNN (15)") # Bayes classifier # probability of Y given X p <- function(x) { p0 <- mean(dnorm(x[1], M0[,1], s) * dnorm(x[2], M0[,2], s)) p1 <- mean(dnorm(x[1], M1[,1], s) * dnorm(x[2], M1[,2], s)) p1/(p0+p1) } bayesrule <- apply(newx, 1, p) colshat <- mycols[as.numeric(bayesrule>0.5)+1] plot(x, xlab="X1", ylab="X2", xlim=XLIM, ylim=YLIM, type="n") points(newx, col=colshat, pch=".") contour(tmpx, tmpy, matrix(round(bayesrule),GS,GS), levels=c(1,2), add=TRUE, drawlabels=FALSE) points(x, col=cols) title("Bayes rule") dev.off() ################## # Plots showing linear regression boundaries s <- sqrt(1/500) s1 <- sqrt(1/350) mycols <- brewer.pal(8, "Dark2")[c(6,3,2)] set.seed(30) GS <- 300 N <- 333 M1 <- mvrnorm(10, c(-1/2,0), matrix(c(3*s1,0,0,s1),2,2)) M1[1,] <- c(0,1.5) M1[2,] <- c(0,.5) M1[3,] <- c(-.5,.2) x1 <- makeX(M1, n=N, sigma=diag(2)*s) M2 <- mvrnorm(10, c(-1,1), diag(2)*s1) x2 <- makeX(M2, n=N, sigma=diag(2)*s) M3 <- mvrnorm(10, c(1,.5), matrix(c(s1/10, 0,0, s1*4),2,2)) M3[1,] <- c(0.5,-0.25) M3[2,] <- c(0.4,1.75) x3 <- makeX(M3, n=N, sigma=diag(2)*s) X <- rbind(x1,x2,x3) Index <- rep(1:3, c(N,N,N)) X1 <- X[,1] X2 <- X[,2] XLIM <- range(X1) tmpx <- seq(XLIM[1], XLIM[2], len=GS) YLIM <- range(X2) tmpy <- seq(YLIM[1], YLIM[2], len=GS) newx <- as.matrix(expand.grid(tmpx,tmpy)) Y <- t(sapply(Index, function(x) as.numeric((1:3)%in% x))) # linear regression on indicator matrix B <- apply(Y, 2, function(y) lm(y~X1+X2)$coef) GridY <- cbind(1,newx) %*% B GridG <- apply(GridY, 1, which.max) png("Plots/classification-plots-02.png", width=864, height=432, pointsize=12) par(mfrow=c(1,2), mgp=c(1.25,.5,0), mar=c(2.25, 2.1, 1,1)) plot(X1, X2, col=mycols[Index], pch=as.character(Index)) contour(tmpx, tmpy, matrix(GridG, GS, GS), levels=c(1,2,3), add=TRUE, drawlabels=FALSE) # linear regression with quadratic terms X3 <- X1^2 X4 <- X2^2 X5 <- X1*X2 B <- apply(Y, 2, function(y) lm(y~X1+X2+X3+X4+X5)$coef) GridY <- cbind(1,newx,newx^2,newx[,1]*newx[,2]) %*% B GridG <- apply(GridY, 1, which.max) plot(X1, X2, col=mycols[Index], pch=as.character(Index)) contour(tmpx, tmpy, matrix(GridG,GS,GS), levels=c(1,2), add=TRUE, drawlabels=FALSE) dev.off() ### # plots showing lda and qda decision boundaries Y <- as.factor(Index) # lda fit1 <- lda(Y~X1+X2) Ghat <- as.numeric(predict(fit1, newdata=data.frame(X1=newx[,1], X2=newx[,2]))$class) png("Plots/classification-plots-07.png", width=864, height=864, pointsize=12) par(mfrow=c(2,2), mgp=c(1.25,.5,0), mar=c(2.25, 2.1, 1,1)) plot(X1,X2, col=mycols[Index], pch=as.character(Index),cex=1.25) contour(tmpx, tmpy, matrix(Ghat,GS,GS), levels=c(1,2), add=TRUE, drawlabels=FALSE) title("LDA") # lda with quadratic terms fit2 <- lda(Y~X1+X2+X3+X4+X5) Ghat <- as.numeric(predict(fit2, newdata=data.frame(X1=newx[,1],X2=newx[,2],X3=newx[,1]^2,X4=newx[,2]^2,X5=newx[,1]*newx[,2]))$class) plot(X1,X2, col=mycols[Index], pch=as.character(Index), cex=1.25) contour(tmpx, tmpy, matrix(Ghat,GS,GS), levels=c(1,2), add=TRUE, drawlabels=FALSE) title("LDA with quadratic terms") # qda fit3 <- qda(Y~X1+X2) Ghat <- as.numeric(predict(fit3, newdata=data.frame(X1=newx[,1],X2=newx[,2]))$class) plot(X1, X2, col=mycols[Index], pch=as.character(Index), cex=1.25) contour(tmpx, tmpy, matrix(Ghat, GS, GS), levels=c(1,2), add=TRUE, drawlabels=FALSE) title("QDA") dev.off() ###### # Figure showing problem with indicator matrix regression N <- 300 Gs <- 300 set.seed(3) sigma <- 0.10 rho <- -0.15 Index <- sample(1:3, N, replace=TRUE) mx <- c(-1,0,1)[Index] X <- t(sapply(mx, function(mu) mvrnorm(1, c(mu,mu), Sigma=sigma*matrix(c(1,rho,rho,1),2,2)))) X1 <- X[,1] X2 <- X[,2] Y <- t(sapply(Index, function(x) as.numeric(1:3 %in% x))) XLIM <- range(X1) tmpx <- seq(XLIM[1], XLIM[2], len=GS) YLIM <- range(X2) tmpy <- seq(YLIM[1], YLIM[2], len=GS) newx <- as.matrix(expand.grid(tmpx, tmpy)) B <- apply(Y, 2, function(y) lm(y~X1+X2)$coef) Yhat1 <- cbind(1,X) %*% B GridY <- cbind(1,newx) %*% B GridG <- apply(GridY, 1, which.max) png("Plots/classification-plots-03.png", width=864, height=432, pointsize=12) par(mfrow=c(1,2), mgp=c(1.25,.5,0), mar=c(2.25, 2.1, 1,1)) plot(X1, X2, col=mycols[Index], pch=as.character(Index)) contour(tmpx, tmpy, matrix(GridG, GS, GS), levels=c(1,2,3), add=TRUE, drawlabels=FALSE) X3 <- X[,1]^2 X4 <- X[,2]^2 X5 <- X[,1] * X[,2] B <- apply(Y, 2, function(y) lm(y~X1+X2+X3+X4+X5)$coef) Yhat2 <- cbind(1,X,X^2,X[,1]*X[,2]) %*% B GridY <- cbind(1,newx,newx^2,newx[,1]*newx[,2]) %*% B GridG <- apply(GridY, 1, which.max) plot(X1, X2, col=mycols[Index], pch=as.character(Index)) contour(tmpx, tmpy, matrix(GridG, GS, GS), levels=c(1,2,3), add=TRUE, drawlabels=FALSE) dev.off() png("Plots/classification-plots-04.png", width=864, height=432, pointsize=12) par(mfrow=c(1,2), mgp=c(1.25,.5,0), mar=c(2.25, 2.1, 1,1)) U1 <- X %*% c(1/sqrt(2), 1/sqrt(2)) matplot(U1, Yhat1, col=mycols) for (i in 1:3) rug(U1[Index==i], col=mycols[i]) matplot(U1, Yhat2, col=mycols) for (i in 1:3) rug(U1[Index==i], col=mycols[i]) dev.off() ##################### # vowel data library(ElemStatLearn) data(vowel.train) mycols2 <- brewer.pal(11, "Paired") png("Plots/classification-plots-05.png", width=432, height=432, pointsize=12) par(mfrow=c(1,1), mgp=c(1.25,.5,0), mar=c(2.25, 2.1, 1,1)) plot(vowel.train$x.1, vowel.train$x.2, type="n", xlab="First Coordinate", ylab="Second Coordinate") text(vowel.train$x.1, vowel.train$x.2, as.character(vowel.train$y), col=mycols2[vowel.train$y], cex=1.2) means <- apply(vowel.train[c("x.1","x.2")], 2, function(x) tapply(x, vowel.train$y, mean,simplify=TRUE)) points(means, col=mycols2, pch=16, cex=2) dev.off() ######################## # lda plots sigma <- .15 rho <- 0.5 Sigma <- matrix(c(1,rho,rho,1),2,2)*sigma mu <- matrix(c(-1/sqrt(2),0,1/sqrt(2),0,1,0),3,2) XLIM <- c(-2,2) YLIM <- c(-2,2) tmpx <- seq(XLIM[1], XLIM[2], len=GS) tmpy <- seq(YLIM[1], YLIM[2], len=GS) newx <- as.matrix(expand.grid(tmpx, tmpy)) iSigma <- solve(Sigma) dkx <- newx %*% iSigma %*% t(mu) tmp <- diag(0.5 * mu %*% iSigma %*% t(mu)) + log(1/3) Yhat <- sweep(dkx, 2, tmp) Ghat <- apply(Yhat, 1, which.max) library(ellipse) png("Plots/classification-plots-06.png", width=864, height=432, pointsize=12) par(mfrow=c(1,2), mgp=c(1.25,.5,0), mar=c(2.25, 2.1, 1,1)) plot(0,0, type="n", ylim=YLIM, xlim=XLIM, xlab="", ylab="") for (i in 1:3) { lines(ellipse(Sigma, centre=mu[i,]), col=mycols[i], lwd=2) } points(mu, pch=3) contour(tmpx, tmpy, matrix(Ghat, GS,GS), levels=c(1,2,3), add=TRUE, drawlabels=FALSE) K <- 3 X <- t(apply(mu[Index,], 1, function(mu) mvrnorm(1, c(mu[1],mu[2]), Sigma=Sigma))) muhat <- t(sapply(1:3, function(i) colMeans(X[Index==i,]))) Sigmahat <- sapply(1:3, function(i) { X <- sweep(X[Index==i,],2,muhat[i,]) cov(X) }) Sigmahat <- matrix(rowMeans(Sigmahat), 2,2) plot(X, col=mycols[Index], pch=as.character(Index), xlab="", ylab="") contour(tmpx, tmpy, matrix(Ghat, GS,GS), levels=c(1,2,3), add=TRUE, drawlabels=FALSE, lty=2) iSigma <- solve(Sigmahat) dkx <- newx %*% iSigma %*% t(muhat) tmp <- diag(0.5 * muhat %*% iSigma %*% t(muhat)) + log(1/3) Yhat <- sweep(dkx, 2, tmp) Ghat <- apply(Yhat, 1, which.max) contour(tmpx, tmpy, matrix(Ghat, GS,GS), levels=c(1,2,3), add=TRUE, drawlabels=FALSE) dev.off() ###### # separating hyperplanes s <- sqrt(1/5) set.seed(23) M0 <- mvrnorm(3, c(1,1), diag(2)) x0 <- makeX(M0, n=10, sigma=diag(2)*s) M1 <- mvrnorm(3, c(-1,-1), diag(2)) x1 <- makeX(M1, n=10, sigma=diag(2)*s) mycols <- brewer.pal(8, "Dark2")[c(3,2)] x <- rbind(x0, x1) y <- c(rep(-1,10), rep(1,10)) cols <- mycols[y/2+1.5] source("rosenblatt.R") beta1 <- rosen(cbind(1,x), y, beta0=1) beta2 <- rosen(cbind(1,x), y, beta0=2) png("Plots/classification-plots-08.png", width=432, height=432, pointsize=12) par(mgp=c(1.25,.5,0), mar=c(2.25, 2.1, 1,1)) plot(x, col=cols, pch=19) m <- -beta1[2]/beta1[3] b <- -beta1[1]/beta1[3] abline(b,m) m <- -beta2[2]/beta2[3] b <- -beta2[1]/beta2[3] abline(b,m) X1 <- x[,1] X2 <- x[,2] y <- ifelse(y==-1,0,1) lmfit <- lm(y~X1+X2) beta3 <- lmfit$coef m <- -beta3[2]/beta3[3] b <- (0.5 - beta3[1])/beta3[3] abline(b,m,col="orange") dev.off()