# Jacob Mills - 2020-2021 ## Rare genera differentiate urban green space soil bacterial communities in three cities across the world ### R s script for statistical analysis #### Things to set up before starting # make sure your taxonomic table is named "taxmat.csv" # make sure your otu table is named "otumat.csv" # make sure your metadata table is named "metadata.csv" # you'll need to manually input column names for otumat and taxmat # you'll need to change your metadata variable of interest throughout, here mine is labelled "veg_type" # paste 'usearch' executable into your working directory # make a folder within your working directory called "results" dir.create("alpha") dir.create("ordination") dir.create("mvabund") dir.create("da.heatmap") dir.create("./da.heatmap/genus") dir.create("./da.heatmap/ASV") library(phyloseq) library(ggplot2) library(RColorBrewer) library(dplyr) library(plyr) library(vegan) library(reshape2) library(DESeq2) library(readr) library(knitr) library(pairwiseAdonis) library(Biostrings) library(microbiome) library(reltools) library(mvabund) library(pheatmap) library(viridis) library(readxl) library(MASS) library(magrittr) library(pscl) library(lsmeans) library(ape) library(psych) library(usdm) library(pairwiseAdonis) # set theme for ggplot2 theme_set(theme_bw()) # import data (tax_table, otu_table, sample_data) taxmat = as.data.frame(read_csv("taxmat.csv")) otumat = as.data.frame(read_csv("otumat.csv")) metadata = as.data.frame(read_csv("metadata.csv")) refseqsdata = as.data.frame(read_csv("refseqs.csv")) ref.seqs = as.factor(refseqsdata$refseqs) # set otu sample names colnames(otumat) = c("OTUID", "B.Lo1", "B.Lo2", "B.Lo3", "B.Lo4", "B.Lo5", "B.Hi1", "B.Hi2", "B.Hi3", "B.Hi4", "B.Hi5", "B.Hi6", "H.Lo1", "H.Lo2", "H.Lo3", "H.Lo4", "H.Lo5", "H.Lo6", "H.Hi1", "H.Hi2", "H.Hi3", "H.Hi4", "H.Hi5", "P.Lo1", "P.Lo2", "P.Lo3", "P.Lo4", "P.Lo5", "P.Hi1", "P.Hi2", "P.Hi3", "P.Hi4", "P.Hi5") # set taxonomy rank names colnames(taxmat) = c("OTUID", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") # clean taxmat data frame taxmat[ ,-1] = data.frame(lapply(taxmat[ ,-1], function(x) {gsub("k__", "", x)})) taxmat[ ,-1] = data.frame(lapply(taxmat[ ,-1], function(x) {gsub("p__", "", x)})) taxmat[ ,-1] = data.frame(lapply(taxmat[ ,-1], function(x) {gsub("c__", "", x)})) taxmat[ ,-1] = data.frame(lapply(taxmat[ ,-1], function(x) {gsub("o__", "", x)})) taxmat[ ,-1] = data.frame(lapply(taxmat[ ,-1], function(x) {gsub("f__", "", x)})) taxmat[ ,-1] = data.frame(lapply(taxmat[ ,-1], function(x) {gsub("g__", "", x)})) taxmat[ ,-1] = data.frame(lapply(taxmat[ ,-1], function(x) {gsub("s__", "", x)})) taxmat[ ,-1] = data.frame(lapply(taxmat[ ,-1], function(x) {gsub("[(0-9)]", "", x)})) # check that otumat and taxmat rows are aligned by "OTUID" identical(taxmat$OTUID, otumat$OTUID) identical(taxmat$OTUID, refseqsdata$OTUID) # create objects to merge into phyloseq object OTU = otu_table(otumat[,-1], taxa_are_rows = TRUE) TAX = tax_table(taxmat) colnames(TAX) = c("OTUID", "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species") # combine OTU, TAX into one phyloseq object physeq = phyloseq(OTU, TAX) # create metadata object META for input to physeq object META = sample_data(metadata) row.names(META) = sample_names(physeq) META$City = factor(META$City, levels = c("Bournemouth", "Haikou", "Playford")) META$VegetationType = factor(META$VegetationType, levels = c("Parkland", "Lawn", "Regenerated", "Remnant", "Revegetated", "OldRegen", "YoungRegen", "Vacant")) META$VegetationDiv = factor(META$VegetationDiv, levels = c("Low", "High")) physeq = merge_phyloseq(physeq, META) refseqs = DNAStringSet(ref.seqs, use.names = FALSE) names(refseqs) = taxa_names(physeq) physeq = merge_phyloseq(physeq, refseqs) # set META sample names to otumat sample names rownames(META) = c("B.Lo1", "B.Lo2", "B.Lo3", "B.Lo4", "B.Lo5", "B.Hi1", "B.Hi2", "B.Hi3", "B.Hi4", "B.Hi5", "B.Hi6", "H.Lo1", "H.Lo2", "H.Lo3", "H.Lo4", "H.Lo5", "H.Lo6", "H.Hi1", "H.Hi2", "H.Hi3", "H.Hi4", "H.Hi5", "P.Lo1", "P.Lo2", "P.Lo3", "P.Lo4", "P.Lo5", "P.Hi1", "P.Hi2", "P.Hi3", "P.Hi4", "P.Hi5") ### preprocess data i.e. remove rare taxa and rarefy #-------- # remove rare taxa physeq = prune_taxa(taxa_sums(physeq) >= 10, physeq) # construct phylogenetic tree library(msa) refseqs.phy = refseq(physeq) refseqs.phy mult <- msa(refseqs.phy, method="ClustalW", type="dna", order="input") library("phangorn") phang.align <- as.phyDat(mult, type="DNA", names=names(refseqs.phy)) dm <- dist.ml(phang.align) treeNJ <- NJ(dm) # Note, tip order != sequence order fit = pml(treeNJ, data=phang.align) ## negative edges length changed to 0! fitGTR <- update(fit, k=4, inv=0.2) fitGTR <- optim.pml(fitGTR, model="GTR", optInv=TRUE, optGamma=TRUE, rearrangement = "stochastic", control = pml.control(trace = 0)) # merge phylogenetic tree into physeq object physeq = merge_phyloseq(physeq, phy_tree(fitGTR$tree)) # rarefaction for alpha-diversity # calculate sample sums and delete to check adequate read depth sample_sums(physeq) min(sample_sums(physeq)) seed = 123 r.physeq = rarefy_even_depth(physeq, sample.size = min(sample_sums(physeq)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) #export rarefied pruned otumat for use in heatmaps and alpha write_phyloseq(r.physeq, type = "all", path = "./da.heatmap/ASV/") write_phyloseq(r.physeq, type = "all", path = "./alpha/") ###!!!! use unrarefied 'physeq' object for beta-diversity # checks min(taxa_sums(physeq)) ntaxa(physeq) sample_sums(physeq) nsamples(physeq) sample_names(physeq) rank_names(physeq) sample_variables(physeq) # checks min(taxa_sums(r.physeq)) ntaxa(r.physeq) sample_sums(r.physeq) nsamples(r.physeq) sample_names(r.physeq) rank_names(r.physeq) sample_variables(r.physeq) #export pruned otumat for use in Tax4Fun2 functional assignment write_phyloseq(physeq, type = "all", path = "./") #export unrarefied and pruned otumat for use in mvabund write_phyloseq(physeq, type = "all", path = "./mvabund") # checks min(taxa_sums(genus.glom)) ntaxa(genus.glom) sample_sums(genus.glom) nsamples(genus.glom) sample_names(genus.glom) rank_names(genus.glom) sample_variables(genus.glom) #------ # Differential abundance testing using DESeq2 #------ # perform diff abund test with Genera ## agglomerate ASVs to genera # make new phyloseq object from original physeq.g = physeq # Define the tax_table so exclude the OTU IDs in first column tax_table(physeq.g) = tax_table(physeq.g)[,2:8] # Agglomerate all ASVs within a genus to get total reads for each genus genus.glom = tax_glom(physeq.g,taxrank = "Genus") genus.glom # Do differential abundance test on unrarefied data library(DESeq2) library(raster) library(BiocGenerics) diagdds.g = phyloseq_to_deseq2(genus.glom, ~ VegetationDiv) diagdds.g = DESeq(diagdds.g, test = "Wald", fitType = "parametric") res = results(diagdds.g, cooksCutoff = FALSE) alpha = 0.05 sigtab.g = res[which(res$padj < alpha),] sigtab.g = cbind(as(sigtab.g, "data.frame"), as(tax_table(genus.glom)[rownames(sigtab.g),], "matrix")) head(sigtab.g) dim(sigtab.g) # write the sigtab to .csv, use this to tell you which genera to use in heatmaps write.csv(sigtab.g, file = "./da.heatmap/genus/diff.abund.test.g.csv") min(sample_sums(genus.glom)) # rarefy and write for data to use in heatmaps (NB: DESeq2 does a normalisation within itself) set.seed(123) r.genus.glom = rarefy_even_depth(genus.glom, sample.size = min(sample_sums(genus.glom)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) # use these data to make heatmaps write_phyloseq(r.genus.glom, type = "all", path = "./da.heatmap/genus/") # in excel, combine the otu and tax outputs from above onto a single sheet (make sure they're aligned correctly) and delete any that didn't show up as differentially abundant # make a new otu table with genus as row names and samples as columns > use this for heatmaps ##### # Plot heatmap of differentially abundant genera in pheatmap library(pheatmap) library(dplyr) da.otumat.g = read.csv(file = "./da.heatmap/genus/da.otumat.g.csv", header = TRUE, row.names = 1) metadata = read.csv(file = "metadata.csv", header = TRUE, row.names = 1) meta.veg = select(metadata, VegetationDiv) meta.veg$VegetationDiv = factor(meta.veg$VegetationDiv, levels = c("Low", "High")) VegetationCol = c("#999999", "#000000") names(VegetationCol) = levels(meta.veg$VegetationDiv) AnnColour = list(VegetationDiv = VegetationCol) da.heat.g = pheatmap(da.otumat.g, color = colorRampPalette(rev(brewer.pal(n = 7, name = "BuPu")))(100), scale = "row", clustering_distance_rows = "manhattan", clustering_distance_cols = "manhattan", clustering_method = "average", annotation_col = meta.veg, annotation_colors = AnnColour, cutree_rows = 1, main = "All cities") da.heat.g ggsave(plot = da.heat.g, filename = paste0("./da.heatmap/genus/da.heat.g.pdf"), width = 14.5, height = 6, units = "cm", dpi = 600) ########### GENUS AGGREGATED ANALYSIS ############## dir.create("./alpha/genus") r.genus.glom = rarefy_even_depth(genus.glom, sample.size = min(sample_sums(genus.glom)), rngseed = seed, replace = FALSE, trimOTUs = TRUE, verbose = TRUE) write_phyloseq(r.genus.glom, type = "all", path = "./alpha/genus/") # checks min(taxa_sums(r.genus.glom)) ntaxa(r.genus.glom) sample_sums(r.genus.glom) nsamples(r.genus.glom) sample_names(r.genus.glom) rank_names(r.genus.glom) sample_variables(r.genus.glom) sample_data(r.genus.glom)$CityVegComp = factor(c("B.Low","B.Low","B.Low","B.Low","B.Low","B.High","B.High","B.High","B.High","B.High","B.High","H.Low","H.Low","H.Low","H.Low","H.Low","H.Low","H.High","H.High","H.High","H.High","H.High","P.Low","P.Low","P.Low","P.Low","P.Low","P.High","P.High","P.High","P.High","P.High")) #alpha-diversity # manually give "./alpha/otu_table.csv" a column 1 title -> OTUID g.otumat.alpha = as.matrix(read_csv("./alpha/genus/otu_table.csv")) # transpose data frame for picante g.otumat.alpha.t = t(g.otumat.alpha) # write csv of transposed df so you can manually remove top row added during transposition write.csv(g.otumat.alpha.t, file = "./alpha/genus/g.otumat.alpha.t.csv") # manually delete top row in "./alpha/otumat.alpha.t.csv" and reinsert g.otumat.alpha.t = as.matrix(read_csv("./alpha/genus/g.otumat.alpha.t.csv")) library(ape) # rarefied tree as phylo objecf seed = 123 g.phylo.tree.r = as.phylo(phy_tree(r.genus.glom)) g.phylo.tree.r # calculate phylogenetic diversity (PD) and species richness (SR) library(picante) g.pd.pic = pd(g.otumat.alpha.t, g.phylo.tree.r, include.root = FALSE) g.pd.pic capture.output(g.pd.pic, file = "./alpha/g.PD.SR.txt") ### alpha-diversity AFTER RARefaction g.a.div.r = estimate_richness(r.genus.glom, measures = c("Observed", "Chao1", "Shannon")) g.a.div.r$VegetationType = metadata$VegetationType g.a.div.r$VegetationDiv = metadata$VegetationDiv g.a.div.r$City = metadata$City g.a.div.r$PD = g.pd.pic$PD g.a.div.r$SR = g.pd.pic$SR g.a.div.r write.csv(g.a.div.r, file = "./alpha/genus/g.alpha.csv") capture.output(g.a.div.r, file = "./alpha/genus/g.Alpha diversity.txt") g.molt.a.div.r = melt(as.matrix(g.a.div.r), varnames = c('Sample', 'Measure'), value.name = 'Alpha diversity') g.molt.a.div.r # create an appropriate input file from "Alpha diversity.txt" # put an 'a' infront of revegetation in Vegetation column to make sure it is intercept in GLMs g.alpha <- as.data.frame(read_csv("./alpha/genus/g.alpha.csv")) View(g.alpha) g.alpha$VegetationDiv = factor(g.alpha$VegetationDiv) g.alpha$VegetationType = factor(g.alpha$VegetationType) g.alpha$City = factor(g.alpha$City) g.alpha$CityVD = factor(g.alpha$CityVD) g.alpha$Site = factor(g.alpha$Site) ## GLMEs # histograms hist(g.alpha[g.alpha$City == "Bournemouth" & g.alpha$VegetationDiv == "High",]$Observed) hist(g.alpha[g.alpha$City == "Bournemouth" & g.alpha$VegetationDiv == "Low",]$Observed) hist(g.alpha[g.alpha$City == "Haikou" & g.alpha$VegetationDiv == "High",]$Observed) hist(g.alpha[g.alpha$City == "Haikou" & g.alpha$VegetationDiv == "Low",]$Observed) hist(g.alpha[g.alpha$City == "Playford" & g.alpha$VegetationDiv == "High",]$Observed) hist(g.alpha[g.alpha$City == "Playford" & g.alpha$VegetationDiv == "Low",]$Observed) hist(g.alpha[g.alpha$City == "Bournemouth" & g.alpha$VegetationDiv == "High",]$PD) hist(g.alpha[g.alpha$City == "Bournemouth" & g.alpha$VegetationDiv == "Low",]$PD) hist(g.alpha[g.alpha$City == "Haikou" & g.alpha$VegetationDiv == "High",]$PD) hist(g.alpha[g.alpha$City == "Haikou" & g.alpha$VegetationDiv == "Low",]$PD) hist(g.alpha[g.alpha$City == "Playford" & g.alpha$VegetationDiv == "High",]$PD) hist(g.alpha[g.alpha$City == "Playford" & g.alpha$VegetationDiv == "Low",]$PD) hist(g.alpha[g.alpha$City == "Bournemouth" & g.alpha$VegetationDiv == "High",]$Shannon) hist(g.alpha[g.alpha$City == "Bournemouth" & g.alpha$VegetationDiv == "Low",]$Shannon) hist(g.alpha[g.alpha$City == "Haikou" & g.alpha$VegetationDiv == "High",]$Shannon) hist(g.alpha[g.alpha$City == "Haikou" & g.alpha$VegetationDiv == "Low",]$Shannon) hist(g.alpha[g.alpha$City == "Playford" & g.alpha$VegetationDiv == "High",]$Shannon) hist(g.alpha[g.alpha$City == "Playford" & g.alpha$VegetationDiv == "Low",]$Shannon) library(lme4) #Pairwise library(multcomp) library(lsmeans) ####### GLM #### Observed richness # negative binomial model g.glm.nb.obs = glm.nb(Observed ~ City*VegetationDiv, g.alpha) g.glm.nb.obs.sum = summary(g.glm.nb.obs) g.glm.nb.obs.sum # goodness-of-fit of model g.obs.nb.chi.gof = 1 - pchisq(summary(g.glm.nb.obs)$deviance, summary(g.glm.nb.obs)$df.residual) g.obs.nb.chi.gof #capture capture.output(g.glm.nb.obs.sum, file = "./alpha/genus/g.obs glm.nb.txt") capture.output(g.obs.nb.chi.gof, file = "./alpha/genus/g.obs nb.chi.gof.txt") ## GLM obs ANOVA glme.obs.anova = Anova(glme.obs) glme.obs.anova capture.output(glme.obs.anova, file = "./alpha/genus/ GLM obs ANOVA.txt") #post-hoc comparison g.glm.nb.obs.pw = summary(glht(g.glm.nb.obs, lsm(pairwise ~ City*VegetationDiv)), test=adjusted(type = "holm")) g.glm.nb.obs.pw capture.output(g.glm.nb.obs.pw, file = "./alpha/genus/ GLM obs pairwise.txt") #### Chao1 # negative binomial model g.glm.nb.ch = glm.nb(Chao1 ~ VegetationDiv, g.alpha) g.glm.nb.ch.sum = summary(g.glm.nb.ch) g.glm.nb.ch.sum # goodness-of-fit of model g.ch.nb.chi.gof = 1 - pchisq(summary(g.glm.nb.ch)$deviance, summary(g.glm.nb.ch)$df.residual) g.ch.nb.chi.gof #capture capture.output(g.glm.nb.ch.sum, file = "./alpha/genus/g.ch glm.nb.txt") capture.output(g.ch.nb.chi.gof, file = "./alpha/genus/g.ch nb.chi.gof.txt") #post-hoc comparison g.ch.leastsquares = lsmeans(g.glm.nb.ch, pairwise ~ VegetationDiv, adjust = "tukey") g.ch.leastsquares capture.output(g.ch.leastsquares, file = "./alpha/genus/g.ch post.txt") g.ch.confint = confint(g.ch.leastsquares$contrasts) g.ch.confint capture.output(g.ch.confint, file = "./alpha/genus/g.ch confint.txt") ### Shannons # negative binomial model g.glm.nb.h = glm.nb(Shannon ~ City*VegetationDiv, g.alpha) g.glm.nb.h.sum = summary(g.glm.nb.h) g.glm.nb.h.sum # goodness-of-fit of model g.h.nb.gof.poi = 1 - pchisq(summary(g.glm.nb.h)$deviance, summary(g.glm.nb.h)$df.residual) g.h.nb.gof.poi #capture capture.output(g.glm.nb.h.sum, file = "./alpha/genus/g.h nb glm.txt") capture.output(g.h.nb.gof.poi, file = "./alpha/genus/g.h nb gof.txt") # anova g.glm.nb.h.anova = Anova(g.glm.nb.h) g.glm.nb.h.anova capture.output(g.glm.nb.h.anova, file = "./alpha/genus/ GLM h ANOVA.txt") #post-hoc comparison g.glm.nb.h.pw = summary(glht(g.glm.nb.h, lsm(pairwise ~ City*VegetationDiv)), test=adjusted(type = "holm")) g.glm.nb.h.pw capture.output(g.glm.nb.h.pw, file = "./alpha/genus/ GLM h pairwise.txt") #### Phylogenetic diversity # negative binomial model g.glm.nb.pd = glm.nb(PD ~ City*VegetationDiv, g.alpha) g.glm.nb.pd.sum = summary(g.glm.nb.pd) g.glm.nb.pd.sum # goodness-of-fit of model g.pd.nb.chi.gof = 1 - pchisq(summary(g.glm.nb.pd)$deviance, summary(g.glm.nb.pd)$df.residual) g.pd.nb.chi.gof #capture capture.output(g.glm.nb.pd.sum, file = "./alpha/genus/g.pd glm.nb.txt") capture.output(g.pd.nb.chi.gof, file = "./alpha/genus/g.pd nb.chi.gof.txt") # anova g.glm.nb.pd.anova = Anova(g.glm.nb.pd) g.glm.nb.pd.anova capture.output(g.glm.nb.pd.anova, file = "./alpha/genus/ GLM pd ANOVA.txt") #post-hoc comparison g.glm.nb.pd.pw = summary(glht(g.glm.nb.pd, lsm(pairwise ~ City*VegetationDiv)), test=adjusted(type = "holm")) g.glm.nb.pd.pw capture.output(g.glm.nb.pd.pw, file = "./alpha/genus/ GLM pd pairwise.txt") #### Species richness # negative binomial model g.glm.nb.sr = glm.nb(SR ~ VegetationDiv, g.alpha) g.glm.nb.sr.sum = summary(g.glm.nb.sr) g.glm.nb.sr.sum # goodness-of-fit of model g.sr.nb.chi.gof = 1 - pchisq(summary(g.glm.nb.sr)$deviance, summary(g.glm.nb.sr)$df.residual) g.sr.nb.chi.gof #capture capture.output(g.glm.nb.sr.sum, file = "./alpha/genus/g.sr glm.nb.txt") capture.output(g.sr.nb.chi.gof, file = "./alpha/genus/g.sr nb.chi.gof.txt") #post-hoc comparison g.sr.leastsquares = lsmeans(g.glm.nb.sr, pairwise ~ VegetationDiv, adjust = "tukey") g.sr.leastsquares capture.output(g.sr.leastsquares, file = "./alpha/genus/g.sr post.txt") g.sr.confint = confint(g.sr.leastsquares$contrasts) g.sr.confint capture.output(g.sr.confint, file = "./alpha/genus/g.sr confint.txt") # Poisson model g.glm.h = glm(Shannon ~ VegetationDiv, g.alpha, family = poisson) g.glm.h.sum = summary(g.glm.h) g.glm.h.sum # goodness-of-fit of model g.h.gof.poi = 1 - pchisq(summary(g.glm.h)$deviance, summary(g.glm.h)$df.residual) g.h.gof.poi #capture capture.output(g.glm.h.sum, file = "./alpha/genus/g.h glm.txt") capture.output(g.h.gof.poi, file = "./alpha/genus/g.h gof.txt") #post-hoc comparison g.h.leastsquares = lsmeans(g.glm.h, pairwise ~ Vegetation, adjust = "tukey") g.h.leastsquares capture.output(g.h.leastsquares, file = "./alpha/genus/g.h post.txt") g.h.confint = confint(g.h.leastsquares$contrasts) capture.output(g.h.confint, file = "./alpha/genus/g.h confint.txt") ####### GENUS ordinations ######### dir.create("./ordination/genus") ### PCA (bray) #colour blind friendly palette # the FULL model set.seed(123) g.pca.bray.ord = ordinate(genus.glom, "PCoA", "bray") g.pca.bray.ord.sum = summary(g.pca.bray.ord) g.pca.bray.ord.sum capture.output(g.pca.bray.ord.sum, file = "./ordination/genus/g.pca summary.txt") # the plot g.pca.bray = plot_ordination(genus.glom, g.pca.bray.ord, type = "samples", color = "VegetationDiv", shape = "City", label = "") g.pca.bray.p = g.pca.bray + theme_bw() + theme(panel.grid = element_blank()) + ggtitle("Bray-Curtis") + geom_point(size = 2) + scale_color_manual(values = c("#999999", "#000000"), name = "Vegetation complexity") g.pca.bray.p ggsave("./ordination/genus/g.pca.bray.p.pdf", g.pca.bray.p, width = 12, height = 8.5, units = "cm", dpi = 600) ### PCA (wunifrac) #colour blind friendly palette # the FULL model set.seed(123) g.pca.wun.ord = ordinate(genus.glom, "PCoA", "wunifrac") g.pca.wun.ord.sum = summary(g.pca.wun.ord) g.pca.wun.ord.sum capture.output(g.pca.wun.ord.sum, file = "./ordination/genus/g.pca summary.txt") # the plot g.pca.wun = plot_ordination(genus.glom, g.pca.wun.ord, type = "samples", color = "VegetationDiv", shape = "City", label = "") g.pca.wun.p = g.pca.wun + theme_bw() + theme(panel.grid = element_blank()) + ggtitle("Weighted unifrac") + geom_point(size = 2) + scale_color_manual(values = c("#999999", "#000000"), name = "Vegetation complexity") g.pca.wun.p ggsave("./ordination/genus/g.pca.wun.p.pdf", g.pca.wun.p, width = 12, height = 8.5, units = "cm", dpi = 600) ### PCA (unifrac) #colour blind friendly palette # the FULL model set.seed(123) g.pca.un.ord = ordinate(genus.glom, "PCoA", "unifrac") g.pca.un.ord.sum = summary(g.pca.un.ord) g.pca.un.ord.sum capture.output(g.pca.un.ord.sum, file = "./ordination/genus/g.pca summary.txt") # the plot g.pca.un = plot_ordination(genus.glom, g.pca.un.ord, type = "samples", color = "VegetationDiv", shape = "City", label = "") g.pca.un.p = g.pca.un + theme_bw() + theme(panel.grid = element_blank()) + ggtitle("Unweighted unifrac") + geom_point(size = 2) + scale_color_manual(values = c("#999999", "#000000"), name = "Vegetation complexity") g.pca.un.p ggsave("./ordination/genus/g.pca.un.p.pdf", g.pca.un.p, width = 12, height = 8.5, units = "cm", dpi = 600) ### PCA (jaccard) jaccard = phyloseq::distance(genus.glom, method = "jaccard", binary = TRUE) #colour blind friendly palette # the FULL model set.seed(123) g.pca.jac.ord = ordinate(genus.glom, "PCoA", "jaccard") g.pca.jac.ord.sum = summary(g.pca.jac.ord) g.pca.jac.ord.sum # the plot g.pca.jac = plot_ordination(genus.glom, g.pca.jac.ord, type = "samples", color = "VegetationDiv", shape = "City", label = "") g.pca.jac.p = g.pca.jac + theme_bw() + theme(panel.grid = element_blank()) + ggtitle("Jaccard") + geom_point(size = 2) + scale_color_manual(values = c("#999999", "#000000"), name = "Vegetation complexity") g.pca.jac.p ggsave("./ordination/genus/g.pca.jac.p.pdf", g.pca.jac.p, width = 12, height = 8.5, units = "cm", dpi = 600) dir.create("./mvabund/genus") g.sampledf = data.frame(sample_data(genus.glom)) # create dummy variable to represent Vegetation complexity nested within City - 'CityVegDiv' g.sampledf$CityVegDiv = factor(c("B.Low","B.Low","B.Low","B.Low","B.Low","B.High","B.High","B.High","B.High","B.High","B.High","H.Low","H.Low","H.Low","H.Low","H.Low","H.Low","H.High","H.High","H.High","H.High","H.High","P.Low","P.Low","P.Low","P.Low","P.Low","P.High","P.High","P.High","P.High","P.High")) g.sampledf$VegetationDiv = factor(g.sampledf$VegetationDiv) g.sampledf$City = factor(g.sampledf$City) g.sampledf$Site = factor(g.sampledf$SampleID) ##### PERMANOVA on GENUS composition ##### set.seed(123) g.dist_uni = phyloseq::distance(genus.glom, method = "unifrac") g.adonis.uni = adonis(g.dist_uni ~ City/VegetationDiv, data = g.sampledf) g.adonis.uni capture.output(g.adonis.uni, file = "./mvabund/genus/permanova GENUS unifrac.txt") #check beta dispersion g.beta.uni <- betadisper(g.dist_uni, g.sampledf$CityVegDiv) g.beta.disp.test.uni = permutest(g.beta.uni) g.beta.disp.test.uni capture.output(g.beta.disp.test.uni, file = "./mvabund/genus/permanova.beta.disper GENUS unifrac.txt") #pairwise g.ord.uni.pw = pairwise.adonis2(g.dist_uni ~ CityVegDiv, data = g.sampledf) g.ord.uni.pw capture.output(g.ord.uni.pw, file = "./mvabund/genus/uni cvd pairwise.txt") #PERMANOVA on GENUS composition set.seed(123) g.dist_wun = phyloseq::distance(genus.glom, method = "wunifrac") g.adonis.wun = adonis(g.dist_wun ~ City/VegetationDiv, data = g.sampledf) g.adonis.wun capture.output(g.adonis.wun, file = "./mvabund/genus/permanova GENUS wunifrac.txt") #check beta dispersion g.beta.wun <- betadisper(g.dist_wun, g.sampledf$CityVegDiv) g.beta.disp.test.wun = permutest(g.beta.wun) g.beta.disp.test.wun capture.output(g.beta.disp.test.wun, file = "./mvabund/genus/permanova.beta.disper GENUS wunifrac.txt") #pairwise g.ord.wun.pw = pairwise.adonis2(g.dist_wun ~ CityVegDiv, data = g.sampledf) g.ord.wun.pw capture.output(g.ord.wun.pw, file = "./mvabund/genus/wun cvd pairwise.txt") #PERMANOVA on GENUS composition set.seed(123) g.dist_bray = phyloseq::distance(genus.glom, method = "bray") g.adonis.bray = adonis(g.dist_bray ~ City/VegetationDiv, data = g.sampledf) g.adonis.bray capture.output(g.adonis.bray, file = "./mvabund/genus/permanova GENUS bray.txt") #check beta dispersion g.beta.bray <- betadisper(g.dist_bray, g.sampledf$CityVegDiv) g.beta.disp.test.bray = permutest(g.beta.bray) g.beta.disp.test.bray capture.output(g.beta.disp.test.bray, file = "./mvabund/genus/permanova.beta.disper GENUS bray.txt") #pairwise g.ord.bray.pw = pairwise.adonis2(g.dist_bray ~ CityVegDiv, data = g.sampledf) g.ord.bray.pw capture.output(g.ord.bray.pw, file = "./mvabund/genus/bray cvd pairwise.txt") #PERMANOVA on GENUS composition set.seed(123) g.dist_jac = phyloseq::distance(genus.glom, method = "jaccard", binary = TRUE) g.adonis.jac = adonis(g.dist_jac ~ City/VegetationDiv, data = g.sampledf) g.adonis.jac capture.output(g.adonis.jac, file = "./mvabund/genus/permanova GENUS jac.txt") #check beta dispersion g.beta.jac.cvd <- betadisper(g.dist_jac, g.sampledf$CityVegDiv) g.beta.disp.test.jac.cvd = permutest(g.beta.jac) g.beta.disp.test.jac.cvd capture.output(g.beta.disp.test.jac, file = "./mvabund/genus/permanova.beta.disper GENUS cvd jac.txt") #pairwise g.ord.jac.pw = pairwise.adonis2(g.dist_jac ~ CityVegDiv, data = g.sampledf) g.ord.jac.pw capture.output(g.ord.jac.pw, file = "./mvabund/genus/jac cvd pairwise.txt") # rare genera beta RAREra = filter_taxa(genus.glom, function(x) sum(x) <= 763, prune = TRUE) RAREra genus.glom RAREjaccard.asv = phyloseq::distance(RAREra, method = "jaccard", binary = TRUE) ### PCA (bray) #colour blind friendly palette # the FULL model set.seed(123) RAREpca.bray.ord = ordinate(RAREra, "PCoA", "bray") # the plot RAREpca.bray = plot_ordination(RAREra, RAREpca.bray.ord, type = "samples", color = "VegetationDiv", shape = "City", label = "") RAREpca.bray.p = RAREpca.bray + theme_bw() + theme(panel.grid = element_blank()) + ggtitle("Bray-Curtis") + geom_point(size = 2) + scale_color_manual(values = c("#999999", "#000000"), name = "Vegetation Diversity") RAREpca.bray.p ggsave("./ordination/genus/RAREgenus.pca.bray.p.pdf", RAREpca.bray.p, width = 12, height = 8.5, units = "cm", dpi = 600) ### PCA (jaccard) #colour blind friendly palette # the FULL model set.seed(123) RAREpca.jac.ord = ordinate(RAREra, "PCoA", "RAREjaccard.asv") # the plot RAREpca.jac = plot_ordination(RAREra, RAREpca.jac.ord, type = "samples", color = "VegetationDiv", shape = "City", label = "") RAREpca.jac.p = RAREpca.jac + theme_bw() + theme(panel.grid = element_blank()) + ggtitle("Jaccard") + geom_point(size = 2) + scale_color_manual(values = c("#999999", "#000000"), name = "Vegetation Diversity") RAREpca.jac.p ggsave("./ordination/genus/RAREgenus.pca.jac.p.pdf", RAREpca.jac.p, width = 12, height = 8.5, units = "cm", dpi = 600) #PERMANOVA on GENUS composition set.seed(123) RAREg.dist_bray = phyloseq::distance(RAREra, method = "bray") RAREg.adonis.bray = adonis(RAREg.dist_bray ~ City/VegetationDiv, data = g.sampledf) RAREg.adonis.bray capture.output(RAREg.adonis.bray, file = "./mvabund/genus/RAREpermanova GENUS bray.txt") #check beta dispersion RAREg.beta.bray <- betadisper(RAREg.dist_bray, g.sampledf$CityVegDiv) RAREg.beta.disp.test.bray = permutest(RAREg.beta.bray) RAREg.beta.disp.test.bray capture.output(RAREg.beta.disp.test.bray, file = "./mvabund/genus/RAREpermanova.beta.disper GENUS bray.txt") #pairwise RAREg.ord.bray.pw = pairwise.adonis2(RAREg.dist_bray ~ CityVegDiv, data = g.sampledf) RAREg.ord.bray.pw capture.output(RAREg.ord.bray.pw, file = "./mvabund/genus/RAREbray cvd pairwise.txt") #PERMANOVA on GENUS composition set.seed(123) RAREg.dist_jac = phyloseq::distance(RAREra, method = "jaccard", binary = TRUE) RAREg.adonis.jac = adonis(RAREg.dist_jac ~ City/VegetationDiv, data = g.sampledf) RAREg.adonis.jac capture.output(RAREg.adonis.jac, file = "./mvabund/genus/RAREpermanova GENUS jac.txt") #check beta dispersion RAREg.beta.jac.cvd <- betadisper(RAREg.dist_jac, g.sampledf$CityVegDiv) RAREg.beta.disp.test.jac.cvd = permutest(RAREg.beta.jac.cvd) RAREg.beta.disp.test.jac.cvd capture.output(RAREg.beta.disp.test.jac.cvd, file = "./mvabund/genus/RAREpermanova.beta.disper GENUS cvd jac.txt") #pairwise RAREg.ord.jac.pw = pairwise.adonis2(RAREg.dist_jac ~ CityVegDiv, data = g.sampledf) RAREg.ord.jac.pw capture.output(RAREg.ord.jac.pw, file = "./mvabund/genus/RAREjac cvd pairwise.txt")