###
###  R Lab 1 4/2/10 - Data Loading and Normalization
###
###  For further examples see: http://www.bioconductor.org/workshops/2003/Milan/PDF/Lab3b.pdf
###


## Load the appropriate libraries

library(Biobase)
library(genefilter)
library(affy)

## Set your working directory

setwd("./celfiles")

## Check help files on how to list celfiles

? list.celfiles
help.search("celfiles")

## Get the list of cel files

celfilenames <- list.celfiles()

## Load in the data into an AffyBatch

affybatch1 <- ReadAffy(filenames=celfilenames)
affybatch1

## Visualize the affybatch

image(affybatch1)


## Figure out the dimensions of the raw data

dim(exprs(affybatch1))

## Boxplot of the first 10,000 features for the raw data

rawexprs <- exprs(affybatch1)

boxplot(rawexprs[1:10000,],col=1:4)

## Boxplot of the first 10,000 features for the raw data log2 transformed

lrawexprs <- log2(rawexprs)

boxplot(lrawexprs[1:10000,],col=1:4)

## Make density plots

density1 <- density(lrawexprs[,1])
plot(density1)

density2 <- density(lrawexprs[,2])
lines(density2,col="red")

density3 <- density(lrawexprs[,3])
lines(density3,col="blue")


## Get the gene names
gnames <- geneNames(affybatch1)
gnames[1:10]


## Find the number of probes per gene
nrow(rawexprs)/length(gnames)


## Normalize with rma

eset <- expresso(affybatch1,bgcorrect.method="rma", 
normalize.method="quantiles", 
pmcorrect.method="pmonly", 
summary.method="medianpolish")
eset

## Options for normalization

# Remember we talked about rma background correction
bgcorrect.methods()

# Remember we talked about quantile regression
normalize.AffyBatch.methods()

# Remember we talked about median polish

express.summary.stat.methods()


## Alternative loading (quite a bit faster)

eset2 <- justRMA(celfilenames)
?justRMA

## Look at the normalized expression values (note they have been log2 transformed)
normexprs <- exprs(eset)
boxplot(normexprs,col=1:4)

## Load in a different data set

library(affydata)
data(Dilution)
class(Dilution)
Dilution

## Look at pheno data for Affybatch (this is the covariate table)
pData(Dilution)