R plot

Con R podemos también visualizar los resultados. El script mg_classifier en sus últimas versiones (v1.7.7) genera un archivo (families_plot.csv) con las 15 familias más abundantes listas para ser graficadas con un script en R, ya probado en R Studio.

El archivo families_plot.csv tiene la siguiente estructura, la primer línea es un comentario, por eso empieza con #

# Only the 15 most abundant families
phylum-family,F3D0,F3D150,F3D9,Mock
Bacteroidetes-S24-7_f,3813,3086,2575,10
Firmicutes-Lachnospiraceae,2143,1305,2124,2
Bacteroidetes-Bacteroidaceae,189,216,686,274
Firmicutes-Ruminococcaceae,477,203,483,0
Bacteroidetes-Rikenellaceae,198,138,490,0
Firmicutes-Lactobacillaceae,123,157,256,141
Firmicutes-Staphylococcaceae,0,0,0,628
Firmicutes-Streptococcaceae,5,0,0,492
Firmicutes-Clostridiaceae,132,7,0,310
Proteobacteria-Moraxellaceae,0,0,0,412
Firmicutes-Bacillaceae,0,0,0,389
Actinobacteria-Actinomycetaceae,0,0,0,366
Proteobacteria-Helicobacteraceae,0,0,0,363
Proteobacteria-Neisseriaceae,0,0,0,305
Proteobacteria-Enterobacteriaceae,6,0,2,200

El script families_plot.r puedes copiar y pegarlo en un archivo o bien descargarlo de aquí

# To be used with the families_plot.csv file produced by mg_classifier.v1.7.7
# Import csv with headings
families_plot <- read.csv("families_plot.csv")
leyenda <- (families_plot$phylum.family) # gets the data for the legend
mymat <- t(families_plot[-1]) # convert table to matrix except column 1
data_percentage=apply(t(mymat), 2, function(x){x*100/sum(x,na.rm=T)}) # calculates the percentages
library(RColorBrewer) # library for color pallets
coul = brewer.pal(12, "Set3") #12 color pallete
barplot(
  data_percentage,
  main = "Family", family = "mono",
  #AXES FORMAT
  las = 2, # sample name rotated
  ylab = "percentage", # y axis label
  #xlim = c(0, 35), # reduces the width of the x axis
  cex.axis = 1, # font size of y axis
  cex.names = 0.8, # font of the bar labels (x)
  # LEGEND
  legend.text = leyenda, # legend from the variable above
  args.legend = list(x = "right", cex = 1, title ="Phylum-Family"),
  # PALLET
  col=coul) # color pallet

Phyla

También tenemos scripts para graficar con R o R-Studio, los datos de los archivos con número de phyla.

phyla15plot.r

# To be used with the phyla15_plot file produced by mg_classifier.v1.7.7
# Import with headings
leyenda <- (phyla15_plot$phylum) # gets the data for the legend, row 1
mymat <- t(phyla15_plot[-1]) # convert table to matrix except column 1
data_percentage=apply(t(mymat), 2, function(x){x*100/sum(x,na.rm=T)}) # calculates the percentages
par(mar = c(6, 4, 4, 2) + 0.1) # set bottom margin to a higher value
barplot(
  data_percentage,
  main = "95% of phyla", family = "mono",
  #AXES FORMAT
  #las = 2, # sample name rotated
  ylab = "percentage", # y axis label
  xlim = c(0, 6.5), # reduces the width of the x axis, higher value shorter x
  cex.axis = 1.1, # font size of y axis
  cex.names = 1.1, # font of the bar labels (x)
  # LEGEND
  legend.text = leyenda, # legend from the variable above
  args.legend = list(x = "right", cex = 1.2, title ="Phylum", ncol = 1, inset = 0, xpd=TRUE),
 # args.legend = list(x = "right", ncol = 1, cex = 1.3, inset = 0, xpd=TRUE),
  # COLOR PALLET
  col=c("#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F", "#FF9864", "#9997FC", "#9C9C9C")
) # end of plot
# Jugar con xlim e inset para ajustar el tamano del grafico y la legenda

phyla_plor.r

#!/usr/bin/env Rscript
# To be used with the phyla_plot.csv file produced by mg_classifier.v1.7.9
# Import with headings
phyla_plot <- read.csv("phyla_plot.csv", comment.char="#")
leyenda <- (phyla_plot$phylum) # gets the data for the legend, row 1
mymat <- t(phyla_plot[-1]) # convert table to matrix except column 1
data_percentage=apply(t(mymat), 2, function(x){x*100/sum(x,na.rm=T)}) # calculates the percentages
# PLOT
png(file="phyla.png",width=2000,height=1600,res=300)
barplot(
  data_percentage,
  main = "Phyla", family = "mono", cex = 0.6,
  # BARS
  width = 1, # width of each bar
  #AXES FORMAT
  las = 2, # sample name rotated
  ylab = "percentage", cex = 0.6, # y axis label
  xlim = c(1, 40), # reduces the width of the x axis, higher value shorter x
  cex.axis = 0.6, # font size of y axis
  cex.names = 0.8, # font of the bar labels (x)
  # LEGEND
  legend.text = leyenda, # legend from the variable above
  args.legend = list(x = "topright", cex = 0.6, title ="Phylum", inset=c(-0.02,0), xpd=TRUE),
  # COLOR PALLET
  col=c("#8DD3C7", "#FFFFB3", "#BEBADA", "#FB8072", "#80B1D3", "#FDB462", "#B3DE69", "#FCCDE5", "#D9D9D9", "#BC80BD", "#CCEBC5", "#FFED6F", "#FF9864", "#9997FC", "#9C9C9C")
) # end of plot
dev.off()
# Jugar con xlim e inset para ajustar el tamano del grafico y la legenda