### ### R Lab 1 4/2/10 - Data Loading and Normalization ### ### For further examples see: http://www.bioconductor.org/workshops/2003/Milan/PDF/Lab3b.pdf ### ## Load the appropriate libraries library(Biobase) library(genefilter) library(affy) ## Set your working directory setwd("./celfiles") ## Check help files on how to list celfiles ? list.celfiles help.search("celfiles") ## Get the list of cel files celfilenames <- list.celfiles() ## Load in the data into an AffyBatch affybatch1 <- ReadAffy(filenames=celfilenames) affybatch1 ## Visualize the affybatch image(affybatch1) ## Figure out the dimensions of the raw data dim(exprs(affybatch1)) ## Boxplot of the first 10,000 features for the raw data rawexprs <- exprs(affybatch1) boxplot(rawexprs[1:10000,],col=1:4) ## Boxplot of the first 10,000 features for the raw data log2 transformed lrawexprs <- log2(rawexprs) boxplot(lrawexprs[1:10000,],col=1:4) ## Make density plots density1 <- density(lrawexprs[,1]) plot(density1) density2 <- density(lrawexprs[,2]) lines(density2,col="red") density3 <- density(lrawexprs[,3]) lines(density3,col="blue") ## Get the gene names gnames <- geneNames(affybatch1) gnames[1:10] ## Find the number of probes per gene nrow(rawexprs)/length(gnames) ## Normalize with rma eset <- expresso(affybatch1,bgcorrect.method="rma", normalize.method="quantiles", pmcorrect.method="pmonly", summary.method="medianpolish") eset ## Options for normalization # Remember we talked about rma background correction bgcorrect.methods() # Remember we talked about quantile regression normalize.AffyBatch.methods() # Remember we talked about median polish express.summary.stat.methods() ## Alternative loading (quite a bit faster) eset2 <- justRMA(celfilenames) ?justRMA ## Look at the normalized expression values (note they have been log2 transformed) normexprs <- exprs(eset) boxplot(normexprs,col=1:4) ## Load in a different data set library(affydata) data(Dilution) class(Dilution) Dilution ## Look at pheno data for Affybatch (this is the covariate table) pData(Dilution)