#################################################### #Appendix S3 #R code to reproduce analysis from Guerin et al. 'Environmental correlates of key functional traits in Australian plant communities based on relative abundance in plots'. ########################### #load packages library(ausplotsR) library(raster) library(usdm) library(corrplot) library(ape) library(nlme) library(piecewiseSEM) library(MuMIn) library(reshape2) library(scatterplot3d) library(ggplot2) library(data.table) ########################### #load datasets (.csv files provided as Appendix S2) #sites and species abundances v sites: #species and site data from TERN Ausplots ausplots <- list() ausplots$site.info <- read.csv("Appendix_S2_C_sites.csv") ausplots$matrix <- read.csv("Appendix_S2_D_veg.csv", row.names=1) #traits v species traits_by_spp <- read.csv("Appendix_S2_A_traits.csv") traits_by_spp$name_F <- gsub(" ", ".", as.character(traits_by_spp$name), fixed=TRUE, useBytes=TRUE) #change the species format to match abundance data matched_traits <- na.omit(traits_by_spp[match(colnames(ausplots$matrix), traits_by_spp$name_F),c(8, 5, 6, 7)]) #gap-filled trait values matched to species in the abundance data ###################################### #Add a cut-off for trait coverage in plots by abundance: ausplots$matrix_prop <- ausplots$matrix/rowSums(ausplots$matrix) #add a matrix of proportional abundances (abundances divided by total site abundances) ausplots$matrix_prop_match <- ausplots$matrix_prop[,names(ausplots$matrix_prop) %in% matched_traits$name_F] #add a matrix of proportional abundances that is cut down to species that are matched in trait the data ausplots$matrix_prop_match <- ausplots$matrix_prop_match[rowSums(ausplots$matrix_prop_match) > 0.8,] #cut down to high coverage sites by abundance (80%) now that species with no trait data have been removed ausplots$matrix_prop_match <- ausplots$matrix_prop_match[,which(colSums(ausplots$matrix_prop_match) > 0)] #remove also species that don't occur in the dataset when trimmed to 80% coverage #Figure S1 pdf("Fig_S1_Trait_coverage_proportion_cover abundance.pdf", width=6, height=4.2) hist(rowSums(ausplots$matrix_prop_match), breaks=20, col="yellow3", xlab="Trait coverage as a proportion of total cover abundance", ylab="Number of plots", las=1, main="", ylim=c(0,50)) dev.off() ###################################### #Calculate 'community weighted means' (CWMs) by multiplying species proportional abundances by matched trait values and summing by site ausplots$matrix4traits <- ausplots$matrix[rownames(ausplots$matrix) %in% rownames(ausplots$matrix_prop_match), colnames(ausplots$matrix) %in% colnames(ausplots$matrix_prop_match)] #absolute abundances for those sites and species included in the trait analysis as above ausplots$matrix4traits_prop <- ausplots$matrix4traits/rowSums(ausplots$matrix4traits) #convert the above into proportional abundance based on only species included in the analysis so that abundances sum to 1 for each community leaf_cwm <- as.data.frame((as.matrix(ausplots$matrix4traits_prop) %*% matched_traits$leaf_area_gap[match(colnames(ausplots$matrix4traits_prop), matched_traits$name_F)])) #leaf area seed_cwm <- as.data.frame((as.matrix(ausplots$matrix4traits_prop) %*% matched_traits$seed_mass_gap[match(colnames(ausplots$matrix4traits_prop), matched_traits$name_F)])) #seed mass height_cwm <- as.data.frame((as.matrix(ausplots$matrix4traits_prop) %*% matched_traits$max_height_gap[match(colnames(ausplots$matrix4traits_prop), matched_traits$name_F)])) #maximum height ######################################## #Calculate 'community weighted variances' (CWVs), the sum for all species within a site of their proportion times the square of the distance between their trait value and the CWM. #leaf area ausplots$matrix_leaf_var <- ausplots$matrix4traits_prop #copy the proportional abundance matrix to store leaf var components for(i in 1:nrow(ausplots$matrix_leaf_var)) { #for each site for(j in 1:ncol(ausplots$matrix_leaf_var)) { #for each species prop <- ausplots$matrix_leaf_var[i,j] #store the abundance of species j in site i if(prop > 0) { #if the species occurs CWM <- leaf_cwm[i,1] #store the relevant CWM for that site from above value <- matched_traits$leaf_area_gap[match(names(ausplots$matrix_leaf_var)[j], matched_traits$name_F)] #extract the trait value that matches the current species ausplots$matrix_leaf_var[i,j] <- prop * (value - CWM)^2 } } # end j } #end i leaf_cwv <- rowSums(ausplots$matrix_leaf_var) #vector of CWV leaf area for all plots #seed mass ausplots$matrix_seed_var <- ausplots$matrix4traits_prop #copy the proportional abundance matrix to store seed var components for(i in 1:nrow(ausplots$matrix_seed_var)) { #for each site for(j in 1:ncol(ausplots$matrix_seed_var)) { #for each species prop <- ausplots$matrix_seed_var[i,j] if(prop > 0) { CWM <- seed_cwm[i,1] value <- matched_traits$seed_mass_gap[match(names(ausplots$matrix_seed_var)[j], matched_traits$name_F)] ausplots$matrix_seed_var[i,j] <- prop * (value - CWM)^2 } } # end j } #end i seed_cwv <- rowSums(ausplots$matrix_seed_var) #vector of CWV seed mass for all plots #maximum height ausplots$matrix_height_var <- ausplots$matrix4traits_prop #copy the proportional abundance matrix to store height var components for(i in 1:nrow(ausplots$matrix_height_var)) { #for each site for(j in 1:ncol(ausplots$matrix_height_var)) { #for each species prop <- ausplots$matrix_height_var[i,j] if(prop > 0) { CWM <- height_cwm[i,1] value <- matched_traits$max_height_gap[match(names(ausplots$matrix_height_var)[j], matched_traits$name_F)] ausplots$matrix_height_var[i,j] <- prop * (value - CWM)^2 } } # end j } #end i height_cwv <- rowSums(ausplots$matrix_height_var) #vector of CWV max height for all plots ##################################################### ##################################################### #Assess environmental associations ################################################### #site and environmental data #get sites and xy coords for subset of sites included in the analysis trait_sites <- ausplots$site.info[na.omit(match(rownames(ausplots$matrix_prop_match), ausplots$site.info$site_unique)) ,c("site_unique", "longitude", "latitude")] trait_sites_enviro <- read.csv("Appendix_S2_B_environment.csv", row.names=1) #pre-cooked environmental data for the sites ################################################ #calculate species niche centroids (SNCs) and check whether they relate to species traits ausplots$matrix_prop_spp <- t(t(ausplots$matrix4traits)/colSums(ausplots$matrix4traits)) #abundance for each species as a proportion of total for that species over all sites (as opposed to site totals) SNC <- t(as.matrix(ausplots$matrix_prop_spp)) %*% as.matrix(trait_sites_enviro) #multiply that by the environmental values at corresponding sites and sum to get abundance-weighted centroid on each gradient matched_traits_2 <- subset(matched_traits, name_F %in% rownames(SNC)) #subset the matched traits to include only the species we are considering #Relationships of a species trait value to its SNC. pdf("Fig_Species_trait_niche_regressions.pdf", width=6, height=7) par(mfrow=c(3,2), mar=c(5,4,3,1)) #MAX HEIGHT keepers_MH <- c("delete") #vector that will store significant variables for(i in colnames(SNC)) { #for each variable meep <- lm(matched_traits_2$max_height_gap ~ SNC[,i]) #model species trait value against their SNC if(summary(meep)$coefficients[2,4] < 0.05/ncol(SNC)) { #if significant with Bonferroni correction (divide allowed p-value by the number of hypotheses we are testing) keepers_MH <- c(keepers_MH, colnames(SNC)[colnames(SNC)==i]) plot(matched_traits_2$max_height_gap ~ SNC[,i], main=i, pch=20, col="red", cex=0.8, bty="l", ylab="Species maximum height", xlab=i) # abline(meep) title(sub=round(summary(meep)$r.squared, digits=2)) } } keepers_MH <- keepers_MH[2:length(keepers_MH)] # #LEAF AREA keepers_LA <- c("delete") for(i in colnames(SNC)) { meep <- lm(matched_traits_2$leaf_area_gap ~ SNC[,i]) if(summary(meep)$coefficients[2,4] < 0.05/ncol(SNC)) { keepers_LA <- c(keepers_LA, colnames(SNC)[colnames(SNC)==i]) plot(matched_traits_2$leaf_area_gap ~ SNC[,i], main=i, pch=20, col="red", cex=0.8, bty="l", ylab="Species leaf area", xlab=i) abline(meep) title(sub=round(summary(meep)$r.squared, digits=2)) } } keepers_LA <- keepers_LA[2:length(keepers_LA)] # #SEED MASS keepers_SM <- c("delete") for(i in colnames(SNC)) { meep <- lm(matched_traits_2$seed_mass_gap ~ SNC[,i]) if(summary(meep)$coefficients[2,4] < 0.05/44) { keepers_SM <- c(keepers_SM, colnames(SNC)[colnames(SNC)==i]) plot(matched_traits_2$seed_mass_gap ~ SNC[,i], main=i, pch=20, col="red", cex=0.8, bty="l", ylab="Species seed mass", xlab=i) abline(meep) title(sub=round(summary(meep)$r.squared, digits=2)) } } keepers_SM <- keepers_SM[2:length(keepers_SM)] # dev.off() ######################################################## #Plot out relationships for the six response variables (CWM and CWV for three traits) against environmental variables, including only those variables for CWM that were significant above for trait ~ SNC regressions; and that are significant with Bonferroni corrections to account for multiple hypothesis testing. OLS linear models are compared to GLS models that account for spatial autocorrelation and heteroskedastic residuals. #gather response and predictor variables for regressions. model.df <- data.frame(longitude = trait_sites$longitude, latitude = trait_sites$latitude, site_unique = trait_sites$site_unique, Leaf_area_CWM = leaf_cwm$V1, Seed_mass_CWM = seed_cwm$V1, Maximum_height_CWM = height_cwm$V1, Leaf_area_CWV = leaf_cwv, Seed_mass_CWV = seed_cwv, Maximum_height_CWV = height_cwv) model.df <- cbind(model.df, trait_sites_enviro) model.df <- model.df[!duplicated(model.df[,c(1:2)]),] #log transform selected variables model.df.logs <- model.df #copy the dataset in order to log-transform selected variables with extreme values model.df.logs[,c("PTS1")] <- model.df.logs[,c("PTS1")] + 2 + abs(min(model.df.logs[,c("PTS1")])) #due to scaling, making this a positive non-zero vector model.df.logs[,c("PTA", "PTI", "PTX", "PTS1", "ADI", "ADX", "ADM", "EAA", "EAAS", "ECE", "PTO", "SOC", "NTO", "SLOPEDEG", "ELVR1000", "SLPFM300")] <- log(model.df.logs[,c("PTA", "PTI", "PTX", "PTS1", "ADI", "ADX", "ADM", "EAA", "EAAS", "ECE", "PTO", "SOC", "NTO", "SLOPEDEG", "ELVR1000", "SLPFM300")]) #log-transform those variables ### #regressions #Figure S3 pdf("Fig_S3_CWM_CWV_enviro_regressions_with_LOG.pdf", height = 7, width = 6) # par(mfrow=c(3,2), mar=c(5,4,3,1)) # for(j in names(model.df.logs)[4:9]) { if(j == "Leaf_area_CWM") {keepers <- keepers_LA} if(j == "Seed_mass_CWM") {keepers <- keepers_SM} if(j == "Maximum_height_CWM") {keepers <- keepers_MH} if(j %in% c("Leaf_area_CWV", "Seed_mass_CWV", "Maximum_height_CWV")) {keepers <- names(model.df.logs)[10:ncol(model.df.logs)]} for(i in names(model.df.logs)[names(model.df) %in% keepers]) { meep <- lm(get(j) ~ get(i), data = model.df.logs) if(summary(meep)$coefficients[2,4] < 0.05/ncol(trait_sites_enviro)) { if(i %in% c("PTA", "PTI", "PTX", "PTS1", "ADI", "ADX", "ADM", "EAA", "EAAS", "ECE", "PTO", "SOC", "NTO", "SLOPEDEG", "ELVR1000", "SLPFM300")){ plot(get(j) ~ get(i), pch=20, col="red", cex=0.8, bty="l", ylab=j, xlab=paste0("ln(", i, ")"), data=model.df.logs, font.lab=2) } else { plot(get(j) ~ get(i), pch=20, col="red", cex=0.8, bty="l", ylab=j, xlab=i, data=model.df.logs, font.lab=2) } abline(meep, lwd=1.5) r2_lm <- round(summary(meep)$r.squared, digits=2) # if(car::ncvTest(meep)$p < 0.05) { meep <- nlme::gls(as.formula(paste0(j, " ~", i)), correlation = corGaus(form = ~ longitude + latitude, nugget=TRUE), weights = varFixed(as.formula(paste0("~", i))), data = model.df.logs) #gls with spatial correlation and heteroskedastic residuals } else { meep <- nlme::gls(as.formula(paste0(j, " ~", i)), correlation = corGaus(form = ~ longitude + latitude, nugget=TRUE), data = model.df.logs) #gls with spatial correlation } # abline(meep, col="blue", lwd=2, lty=2) title(main=paste(r2_lm, ", ", round(piecewiseSEM::rsquared(meep)[5], digits=2)), col="blue") } # end if significant cat(i, " complete\n") } # end i } #end j # dev.off() ################################################## #Figure S4 - relationships to latitude pdf("Fig_S4_Trait_latitude_plots.pdf", height=5, width=6) # par(mfrow=c(3,2), mar=c(5,4,2,1)) # meep <- lm(Leaf_area_CWM ~ latitude + I(latitude^2), data=model.df) plot(Leaf_area_CWM ~ latitude, cex=0.8, col="darkred", bty="l", xlab="Latitude", ylab="Leaf area CWM", pch=20, data=model.df) meepmeep <- data.frame(x=model.df$latitude, y=predict(meep)) meepmeep <- meepmeep[order(meepmeep$x),] points(meepmeep$x, meepmeep$y, type="l", lwd=1.5) title(main=round(summary(meep)$r.squared, digits=2)) # meep <- lm(Leaf_area_CWV ~ latitude + I(latitude^2), data=model.df) plot(Leaf_area_CWV ~ latitude, cex=0.8, col="darkred", bty="l", xlab="Latitude", ylab="Leaf area CWV", pch=20, data=model.df) meepmeep <- data.frame(x=model.df$latitude, y=predict(meep)) meepmeep <- meepmeep[order(meepmeep$x),] points(meepmeep$x, meepmeep$y, type="l", lwd=1.5) title(main=round(summary(meep)$r.squared, digits=2)) # meep <- lm(Seed_mass_CWM ~ latitude, data=model.df) plot(Seed_mass_CWM ~ latitude, cex=0.8, col="darkred", bty="l", xlab="Latitude", ylab="Seed mass CWM", pch=20, data=model.df) abline(meep, lwd=1.5) title(main=round(summary(meep)$r.squared, digits=2)) # meep <- lm(Seed_mass_CWV ~ latitude + I(latitude^2), data=model.df) plot(Seed_mass_CWV ~ latitude, cex=0.8, col="darkred", bty="l", xlab="Latitude", ylab="Seed mass CWV", pch=20, data=model.df) meepmeep <- data.frame(x=model.df$latitude, y=predict(meep)) meepmeep <- meepmeep[order(meepmeep$x),] points(meepmeep$x, meepmeep$y, type="l", lwd=1.5) title(main=round(summary(meep)$r.squared, digits=2)) # meep <- lm(Maximum_height_CWM ~ latitude + I(latitude^2), data=model.df) plot(Maximum_height_CWM ~ latitude, cex=0.8, col="darkred", bty="l", xlab="Latitude", ylab="Maximum height CWM", pch=20, data=model.df) meepmeep <- data.frame(x=model.df$latitude, y=predict(meep)) meepmeep <- meepmeep[order(meepmeep$x),] points(meepmeep$x, meepmeep$y, type="l", lwd=1.5) title(main=round(summary(meep)$r.squared, digits=2)) # meep <- lm(Maximum_height_CWV ~ latitude + I(latitude^2), data=model.df) plot(Maximum_height_CWV ~ latitude, cex=0.8, col="darkred", bty="l", xlab="Latitude", ylab="Maximum height CWV", pch=20, data=model.df) meepmeep <- data.frame(x=model.df$latitude, y=predict(meep)) meepmeep <- meepmeep[order(meepmeep$x),] points(meepmeep$x, meepmeep$y, type="l", lwd=1.5) title(main=round(summary(meep)$r.squared, digits=2)) # dev.off() ###################################################### #Pearson correlations pdf("Fig_1c_Trait_enviro_corrplot.pdf", height = 2, width = 7) cor.mat <- cor(model.df.logs[c(10:ncol(model.df), 2, 4:9)])[46:51, 1:45] colnames(cor.mat)[colnames(cor.mat) %in% c("PTA", "PTI", "PTX", "PTS1", "ADI", "ADX", "ADM", "EAA", "EAAS", "ECE", "PTO", "SOC", "NTO", "SLOPEDEG", "ELVR1000", "SLPFM300")] <- paste0("ln(", colnames(cor.mat)[colnames(cor.mat) %in% c("PTA", "PTI", "PTX", "PTS1", "ADI", "ADX", "ADM", "EAA", "EAAS", "ECE", "PTO", "SOC", "NTO", "SLOPEDEG", "ELVR1000", "SLPFM300")], ")") corrplot(cor.mat, type="full", diag=T, tl.cex=0.7, cl.pos="r", cl.cex=0.7, cl.length=5) dev.off() ####################################################### #scatterplot of CWM / CWV for the three traits - colours highlight relationship to EAA (MODIS actual evapotranspiration), which was important in the regressions # #Figure S5 pdf("Fig_S5_Three_trait_visuals.pdf", width = 6, height = 6) #par(mfrow=c(1,2)) #par(mar=c(1,1,1,1)) scatterplot3d(data.frame(LA=leaf_cwm, SM=seed_cwm, MH=height_cwm), pch=20, xlab="LA", ylab="SM", zlab="MH", angle=25, type="h", box=F, color=rev(terrain.colors(20))[as.numeric(cut(trait_sites_enviro$EAA, breaks = 20))], las=1) scatterplot3d(data.frame(LA=leaf_cwv, SM=seed_cwv, MH=height_cwv), pch=20, xlab="LA", ylab="SM", zlab="MH", angle=25, type="h", box=F, color=rev(terrain.colors(20))[as.numeric(cut(trait_sites_enviro$EAA, breaks = 20))]) dev.off() #################################################### #Constrained ordination (rda) of CWM and CWV for three traits, combined and separately # #using a set of non-colinear variables: all_rda_cwm <- rda(setnames(model.df.logs[,4:6], old=c("Leaf_area_CWM", "Seed_mass_CWM", "Maximum_height_CWM"), new=c("Leaf area", "Seed mass", "Max. height")) ~ ., exclude(model.df.logs[,10:53], vifstep(model.df.logs[,10:53]))) RsquareAdj(all_rda_cwm) #total 36%: 28 5 4 % RDA1-3 | AdjR2 0.32 # all_rda_cwv <- rda(setnames(model.df.logs[,7:9], old=c("Leaf_area_CWV", "Seed_mass_CWV", "Maximum_height_CWV"), new=c("Leaf area", "Seed mass", "Max. height")) ~ ., exclude(model.df.logs[,10:53], vifstep(model.df.logs[,10:53]))) RsquareAdj(all_rda_cwv) #total 24%: 17 5 2 % RDA1-3 ||AdjR2 0.20 # #using all variables: #all_rda_cwm <- rda(model.df[,4:6] ~ ., model.df[,10:53]) #RsquareAdj(all_rda_cwm) #total 43%: x x x % RDA1-3 || AdjR2 0.37 #all_rda_cwv <- rda(model.df[,7:9] ~ ., model.df[,10:53]) #RsquareAdj(all_rda_cwv) #total 33%: x x x% RDA1-3 || AdjR2 0.25 # ### # pdf("Fig_1b_Contrained_ordinations_cwm.pdf", height=5, width=6) par(mar=c(4,4,2,1)) # plot(all_rda_cwm, type="n") points(all_rda_cwm, pch=21, col="darkblue", bg="pink", cex=0.3) text(all_rda_cwm, dis="bp", cex=1.1, col="red", select=rownames(subset(as.data.frame(all_rda_cwm$CCA$biplot), abs(RDA1) > 0.4 | abs(RDA2) > 0.4))) text(all_rda_cwm, "species", col="blue", cex=1.3) # dev.off() # pdf("Fig_S2_Contrained_ordinations.pdf", height=5, width=6) par(mar=c(4,4,2,1)) # plot(all_rda_cwv, type="n") points(all_rda_cwv, pch=21, col="darkblue", bg="pink", cex=0.3) text(all_rda_cwv, dis="bp", cex=1.1, col="red", select=rownames(subset(as.data.frame(all_rda_cwv$CCA$biplot), abs(RDA1) > 0.3 | abs(RDA2) > 0.3))) text(all_rda_cwv, "species", col="blue", cex=1.3, scaling=3) # dev.off() ################################################ #Map figure pdf("Fig_map.pdf", height=5, width=6) ggplot() + borders("world", colour="gray50",fill="gray85") + xlim(110, 155) + ylim(-45, -10) + geom_point(shape=23, data=trait_sites, aes(longitude, latitude), colour = alpha("magenta", 1/2), size = 1.1, fill = 'red4', alpha = .9) + coord_map("azequalarea",orientation=c(-26,135,0)) + labs(x="Longitude") + labs(y="Latitude") dev.off() ############################################## #trait-trait plot #http://personality-project.org/r/Rfunc/pairs.panels.R panel.lm <- function (x, y, pch = par("pch"), col.lm = "red", ...) { ymin <- min(y) ymax <- max(y) xmin <- min(x) xmax <- max(x) ylim <- c(min(ymin,xmin),max(ymax,xmax)) xlim <- ylim points(x, y, pch = pch,ylim = ylim, xlim= xlim,...) ok <- is.finite(x) & is.finite(y) if (any(ok)) abline(lm(y[ok]~ x[ok]), col = col.lm, ...) } pdf("Fig_S6_trait_trait_scatters.pdf", width=7, height=7) pairs(model.df.logs[,c(4:9)], panel=panel.lm, pch=20, cex=0.5) dev.off() ############################################# #############################################