Report Subset1 UVV

Loading the packages:

library(devtools)
install_bitbucket("chrisbcl/metabolomicsPackage")
library(metabolomicsUM)

Reading .csv files and creating the dataset:

setwd("~/Dropbox/fernanda")
source("metadata_uvv.R")
samplelist = read.csvs.folder("uvv_samples")

## [1] "Reading sample  uvv_samples/varfruglut.csv"
## [1] "Reading sample  uvv_samples/varfrutose.csv"
## [1] "Reading sample  uvv_samples/varglicose.csv"
## [1] "Reading sample  uvv_samples/vargliglut.csv"
## [1] "Reading sample  uvv_samples/varsacarose.csv"
## [1] "Reading sample  uvv_samples/varsacglut.csv"

get.metadata("uvv_samples", write.file = T, file.name = "metadata_uvv.csv")
metadata = read.metadata("metadata_uvv.csv")
ds = dataset.from.peaks(samplelist, type = "uvv-spectra", metadata = metadata)
ds$labels$x = "wavelength"
ds$labels$val = "absorbance"
sample.names = get.sample.names(ds)
sample.names = gsub("var","", sample.names)
sample.names = gsub("gli","glu",sample.names)
sample.names = gsub("sacarose","sucrose", sample.names)
sample.names = gsub("sac","suc", sample.names)
ds = set.sample.names(ds, sample.names)
sum.dataset(ds)

## Dataset summary:
## Valid dataset
## Description:   
## Type of data:  uvv-spectra 
## Number of samples:  6 
## Number of data points 601 
## Number of metadata variables:  2 
## Label of x-axis values:  wavelength 
## Label of data points:  absorbance 
## Number of missing values in data:  0 
## Mean of data values:  0.6119147 
## Median of data values:  0.3575127 
## Standard deviation:  0.668702 
## Range of values:  0.01892327 2.269357 
## Quantiles: 
##         0%        25%        50%        75%       100% 
## 0.01892327 0.13567692 0.35751266 0.82376963 2.26935750

sub1.ds = subset.x.values.by.interval(ds, 280, 450)

Plotting the spectras:

plot.spectra(sub1.ds, "treatment", cex = 0.7)

Baseline correction and savitzky-golay smoothing interpolation method used. Also, savitzky-golay with the first derivative was calculated:

sub1.bl = baseline.correction(sub1.ds, method = "als")
plot.spectra(sub1.bl, "treatment", cex = 0.7)

sub1.bl.sg = smoothing.interpolation(sub1.bl, method = "savitzky.golay", 
                                     window = 15, p.order = 3, deriv = 0)
plot.spectra(sub1.bl.sg, "treatment", cex = 0.7)

sub1.bl.sg.fd = smoothing.interpolation(sub1.bl, method = "savitzky.golay", 
                                     window = 15, p.order = 3, deriv = 1)
plot.spectra(sub1.bl.sg.fd, "treatment", cex = 0.7)

Univariate analysis. Fold change and t-tests were calculated:

#t-tests on bl savutzjt-golay first derivative
ttest.bl.sg.fd = tTests.dataset(sub1.bl.sg.fd, "glutamine")
ttest.bl.sg.fd[1:10,]

##         p.value   -log10       fdr
## 381 0.001111538 2.954076 0.1900730
## 306 0.014228405 1.846844 0.7592543
## 380 0.015673657 1.804830 0.7592543
## 298 0.033239795 1.478342 0.7592543
## 341 0.042528407 1.371321 0.7592543
## 340 0.052104739 1.283123 0.7592543
## 387 0.053814215 1.269103 0.7592543
## 342 0.063142777 1.199676 0.7592543
## 396 0.068175041 1.166375 0.7592543
## 299 0.073182686 1.135592 0.7592543

plot.ttests(sub1.bl.sg.fd, ttest.bl.sg.fd, tt.threshold = 0.01)

PCA Analysis:

#pca on bl and savitzky-golay
pca.bl.sg = pca.analysis.dataset(sub1.bl.sg)
summary(pca.bl.sg)

## Importance of components:
##                            PC1    PC2     PC3     PC4     PC5       PC6
## Standard deviation     10.2594 6.3149 4.11109 2.45370 1.71612 1.192e-14
## Proportion of Variance  0.6155 0.2332 0.09884 0.03521 0.01722 0.000e+00
## Cumulative Proportion   0.6155 0.8487 0.94757 0.98278 1.00000 1.000e+00

pca.scoresplot2D(sub1.bl.sg, pca.bl.sg, "glutamine", ellipses = T, labels = T, 
                 leg.pos = "none")

#pca on bl savitzky-golay first derivative
pca.bl.sg.fd = pca.analysis.dataset(sub1.bl.sg.fd)
summary(pca.bl.sg.fd)

## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5       PC6
## Standard deviation     8.1502 6.7484 6.0021 3.71989 3.02838 1.015e-14
## Proportion of Variance 0.3885 0.2663 0.2107 0.08092 0.05363 0.000e+00
## Cumulative Proportion  0.3885 0.6548 0.8655 0.94637 1.00000 1.000e+00

pca.scoresplot2D(sub1.bl.sg.fd, pca.bl.sg.fd, "glutamine", ellipses = T, labels = T, 
                 leg.pos = "none")

Clustering analysis with hierarchical clustering and kmeans:

#hc on bl and savitzky-golay
hc.bl.ds = clustering(sub1.bl.sg, method = "hc")
dendrogram.plot.col(sub1.bl.sg, hc.bl.ds, "glutamine", leg.pos = "none")

#hc on bl savitzky-golay first derivative
hc.bl.sg.fd = clustering(sub1.bl.sg.fd, method = "hc")
dendrogram.plot.col(sub1.bl.sg.fd, hc.bl.sg.fd, "glutamine", leg.pos = "none")

#kmeans on bl and savitzky-golay
kmeans.bl.sg = clustering(sub1.bl.sg, method = "kmeans", num.clusters = 2)
kmeans.plot(sub1.bl.sg, kmeans.bl.sg)

kmeans.result.df(kmeans.bl.sg, 2)

##   cluster                 samples
## 1       1 fruglut frutose glucose
## 2       2 gluglut sucrose sucglut

#kmeans on bl savutzky-golay first derivative
kmeans.bl.sg.fd = clustering(sub1.bl.sg.fd, method = "kmeans", num.clusters = 2)
kmeans.plot(sub1.bl.sg.fd, kmeans.bl.sg.fd)

kmeans.result.df(kmeans.bl.sg.fd, 2)

##   cluster                 samples
## 1       1 gluglut sucrose sucglut
## 2       2 fruglut frutose glucose